# Time to learn `.apply` to do something in every row of our dataframe!

You're gonna learn something new. You can ask me to talk about it in class, too.

In [12]:
import pandas as pd

### Let's read in a file...

In [13]:
boros_df = pd.read_csv("boros.csv")
boros_df

Unnamed: 0,borough,cats,population
0,Manhattan,107,1400
1,Brooklyn,205,3412
2,Bronx,33,323


## And write some code to do something for every row...

### It's basically for loops for pandas

In [14]:
# This is a FUNCTION.
# It's given something (our row, in this case)
# And it gives something back with "return"
def cats_per_capita(row):
    cat_count = row['cats']
    people_count = row['population']
    result = cat_count / people_count
    return result

boros_df.apply(cats_per_capita, axis=1)

0    0.076429
1    0.060082
2    0.102167
dtype: float64

## Let's save those results back into the dataframe

In [6]:
boros_df['cats_per_capita'] = boros_df.apply(cats_per_capita, axis=1)
boros_df.head()

Unnamed: 0,borough,cats,population,cats_per_capita
0,Manhattan,107,1400,0.076429
1,Brooklyn,205,3412,0.060082
2,Bronx,33,323,0.102167


# What about multiple rows?

In [8]:
def cat_data(row):
    cat_ratio = row['cats'] / row['population']
    if cat_ratio > 0.1:
        amount = "lots"
    else:
        amount = "few"
    # Instead of returning a number,
    # return a dataframe column
    return pd.Series({
        'ratio': cat_ratio,
        'amount': amount
    })

boros_df.apply(cat_data, axis=1)

Unnamed: 0,amount,ratio
0,few,0.076429
1,few,0.060082
2,lots,0.102167


## Joining the rows back with the original dataframe

In [9]:
# You can join it to put them together
boros_df.apply(cat_data, axis=1).join(boros_df)

Unnamed: 0,amount,ratio,borough,cats,population,cats_per_capita
0,few,0.076429,Manhattan,107,1400,0.076429
1,few,0.060082,Brooklyn,205,3412,0.060082
2,lots,0.102167,Bronx,33,323,0.102167


## Saving the result

In [11]:
# But you still have to save the merged version
# .join doesn't automatically put it into boros_df
merged = boros_df.apply(cat_data, axis=1).join(boros_df)
merged.head()

Unnamed: 0,amount,ratio,borough,cats,population,cats_per_capita
0,few,0.076429,Manhattan,107,1400,0.076429
1,few,0.060082,Brooklyn,205,3412,0.060082
2,lots,0.102167,Bronx,33,323,0.102167
