# Pandas

## Speed up your Pandas Code

In [1]:
import pandas as pd
import numpy as np

## Create our dataset

In [2]:
df= pd.DataFrame()
size= 10_000

df['age']= np.random.randint(0, 100, size)
df['time_in_bed']= np.random.randint(0, 9, size)
df['pct_sleeping']= np.random.randint(size)
df['Favorite_food']= np.random.choice(['pizza', 'taco', 'ice-cream'], size)
df['hate_food']= np.random.choice(['broccoli', 'candy corn', 'eggs'], size)

In [3]:
df.head()

Unnamed: 0,age,time_in_bed,pct_sleeping,Favorite_food,hate_food
0,19,4,5815,pizza,candy corn
1,85,0,5815,taco,broccoli
2,43,1,5815,pizza,candy corn
3,26,2,5815,pizza,candy corn
4,22,8,5815,taco,broccoli


In [4]:
df.shape

(10000, 5)

In [5]:
def get_data(size= 10_000):
    df= pd.DataFrame()

    df['age']= np.random.randint(0, 100, size)
    df['time_in_bed']= np.random.randint(0, 9, size)
    df['pct_sleeping']= np.random.randint(size)
    df['Favorite_food']= np.random.choice(['pizza', 'taco', 'ice-cream'], size)
    df['hate_food']= np.random.choice(['broccoli', 'candy corn', 'eggs'], size)
    return df

In [6]:
get_data()

Unnamed: 0,age,time_in_bed,pct_sleeping,Favorite_food,hate_food
0,54,1,9573,pizza,eggs
1,84,4,9573,pizza,broccoli
2,91,5,9573,pizza,broccoli
3,68,7,9573,ice-cream,broccoli
4,72,7,9573,pizza,eggs
...,...,...,...,...,...
9995,25,4,9573,taco,broccoli
9996,54,7,9573,ice-cream,broccoli
9997,37,3,9573,pizza,eggs
9998,36,6,9573,taco,broccoli


### The Problem

Rewrd calculation
*  If they were in bed for more than 5 hours & they were sleeping for more than 50% we give them their favorite food.
* Otherwise we give them their hate food
* If they are over 90 years old give their favourite food regardless. 

In [7]:
def reward_cal(row):
    if row['age'] >= 90:
        return row['Favorite_food']
    if (row['time_in_bed'] > 5) & (row['pct_sleeping'] > 0.5):
        return row['Favorite_food']
    return row['hate_food']

## Level 1 - Looop

In [12]:
%%timeit
df= get_data()
for index, row in df.iterrows():
    df.loc[index, 'reward']= reward_cal(row)

1.59 s ± 42.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Level 2 - Apply

In [15]:
%%timeit
df= get_data()
df['reward']= df.apply(reward_cal, axis=1)

165 ms ± 6.13 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Level 3 - Vectorized

In [18]:
((df['pct_sleeping']> 0.5) & (df['time_in_bed']>5)) | (df['age']> 90)

0       False
1       False
2       False
3       False
4        True
        ...  
9995    False
9996     True
9997    False
9998    False
9999     True
Length: 10000, dtype: bool

In [19]:
%%timeit
df= get_data()
df['reward']= df['hate_food']
df.loc[((df['pct_sleeping']> 0.5) & 
        (df['time_in_bed']>5)) | 
       (df['age']> 90), 'reward']= df['Favorite_food']

9.79 ms ± 739 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
