In [1]:
# Apply: a Dataframe method
#  apply a functino along an axis of the dataframe:
#         Along columns or rows
#         a built-in function
#         custom defined function(including lambda)
#highly flexible, but there's often faster alternatives 
#last resort for very complicated operations

In [2]:
import pandas as pd
airq = pd.read_pickle('air_quality.pkl')

In [4]:
airq.columns

Index(['date_time', 'PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3', 'TEMP', 'PRES',
       'DEWP', 'RAIN', 'wd', 'WSPM', 'station', 'year', 'month', 'day', 'hour',
       'quarter', 'day_of_week_num', 'day_of_week_name', 'time_until_2022',
       'time_until_2022_days', 'time_until_2022_weeks', 'prior_2016_ind',
       'PM2.5_category', 'Tempcat'],
      dtype='object')

In [6]:
airq[['PM2.5', 'PM10']].apply('mean',axis = 0)

PM2.5     83.477884
PM10     111.899959
dtype: float64

In [7]:
airq[['PM2.5', 'PM10']].apply('mean',axis = 1)

0         9.0
1         4.0
2         4.0
3         5.0
4         4.5
         ... 
95680     9.0
95681    19.5
95682    25.0
95683    28.5
95684    32.5
Length: 95685, dtype: float64

In [8]:
airq[['PM2.5', 'PM10']]

Unnamed: 0,PM2.5,PM10
0,9.0,9.0
1,4.0,4.0
2,4.0,4.0
3,5.0,5.0
4,3.0,6.0
...,...,...
95680,9.0,9.0
95681,10.0,29.0
95682,18.0,32.0
95683,15.0,42.0


In [9]:
airq[['PM2.5', 'PM10']].mean()

PM2.5     83.477884
PM10     111.899959
dtype: float64

In [10]:
airq[['PM2.5', 'PM10']].mean(axis = 1)

0         9.0
1         4.0
2         4.0
3         5.0
4         4.5
         ... 
95680     9.0
95681    19.5
95682    25.0
95683    28.5
95684    32.5
Length: 95685, dtype: float64

In [11]:
def pmratio(row):
    return row['PM2.5']/row['PM10']

airq.apply(pmratio,axis = 1)

0        1.000000
1        1.000000
2        1.000000
3        1.000000
4        0.500000
           ...   
95680    1.000000
95681    0.344828
95682    0.562500
95683    0.357143
95684    0.300000
Length: 95685, dtype: float64

In [12]:
airq.apply(lambda row: row['PM2.5']/row['PM10'], axis = 1)

0        1.000000
1        1.000000
2        1.000000
3        1.000000
4        0.500000
           ...   
95680    1.000000
95681    0.344828
95682    0.562500
95683    0.357143
95684    0.300000
Length: 95685, dtype: float64

In [13]:
airq['PM2.5']/airq['PM10']

0        1.000000
1        1.000000
2        1.000000
3        1.000000
4        0.500000
           ...   
95680    1.000000
95681    0.344828
95682    0.562500
95683    0.357143
95684    0.300000
Length: 95685, dtype: float64

In [14]:
# Add new column to airq: 'Go Outside','Go inside'
# Based on PM2.5_category, TEMPcat, RAIN

In [19]:
def decision(pm25cat, temp, rain):
    if pm25cat in ['Good','Moderate'] and temp in ['Warm','Hot'] and rain == 0:
        return 'Go Outside'
    else:
        return 'Stay Inside'
    
airq.apply(lambda row: decision(row['PM2.5_category'], row['Tempcat'], row['RAIN']),axis = 1)

0        Stay Inside
1        Stay Inside
2        Stay Inside
3        Stay Inside
4        Stay Inside
            ...     
95680     Go Outside
95681     Go Outside
95682     Go Outside
95683     Go Outside
95684    Stay Inside
Length: 95685, dtype: object

In [20]:
airq['activity'] = airq.apply(lambda row: decision(row['PM2.5_category'], row['Tempcat'], row['RAIN']),axis = 1)

In [22]:
airq[['activity','PM2.5_category','Tempcat','RAIN']]

Unnamed: 0,activity,PM2.5_category,Tempcat,RAIN
0,Stay Inside,Good,Very cold,0.0
1,Stay Inside,Good,Very cold,0.0
2,Stay Inside,Good,Very cold,0.0
3,Stay Inside,Good,Very cold,0.0
4,Stay Inside,Good,Very cold,0.0
...,...,...,...,...
95680,Go Outside,Good,Warm,0.0
95681,Go Outside,Good,Warm,0.0
95682,Go Outside,Moderate,Warm,0.0
95683,Go Outside,Moderate,Warm,0.0


In [23]:
airq['activity'].value_counts(normalize = True)

Stay Inside    0.874777
Go Outside     0.125223
Name: activity, dtype: float64

In [None]:
Other apply 