In [1]:
import numpy as np
import pandas as pd
import os 
import sys
import sklearn
from sklearn.metrics.pairwise import cosine_similarity 
from datetime import datetime

# Preprocess

In [2]:
!pwd

/Users/neelmehtani/Galvanize/Capstone3


In [3]:
dp = 'PRSA_Data_20130301-20170228/'

In [4]:
filelist = os.listdir(dp)
filelist.remove('.DS_Store')

In [5]:
dfs = []

for i in filelist:
    df = pd.read_csv(dp + i)
    dfs.append(df)
    
beijing = pd.concat(dfs, axis=0)
len(beijing)

385704

In [6]:
beijing.isnull().sum()

No             0
year           0
month          0
day            0
hour           0
PM2.5       8043
PM10        5965
SO2         8352
NO2        11362
CO         19404
O3         12199
TEMP         379
PRES         374
DEWP         384
RAIN         371
wd          1743
WSPM         305
station        0
dtype: int64

In [7]:
#Re-index all rows. Drop 'No'. 
#Create datetime object for later time series analysis.

beijing.index = range(len(beijing.index))
beijing.drop('No', axis=1, inplace=True)

beijing['date'] = beijing.apply(lambda x: '{}-{}-{}'.format(x['year'], x['month'], x['day']), axis = 1)
beijing['date'] = beijing['date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))


def season_values(month):
    
    if (month >= 3) & (month <= 5):
        
        return 'spring'
        
    elif (month >= 6) & (month <= 8):
        
        return 'summer'
        
    elif (month >= 9) & (month <= 11 ):
        
        return 'fall'
        
    else: 
        return 'winter'
    

beijing['Season'] = beijing['month'].apply(lambda x: season_values(x)) 

szn_dums = pd.get_dummies(beijing['Season'])
beijing.drop('Season', axis = 1, inplace = True)
beijing = pd.concat([beijing, szn_dums], axis = 1)


In [8]:
#Group by year and month to fill in null values based on mean for those dates.
y = beijing.groupby(['year', 'month']).mean()

In [9]:
for i in range(len(beijing)):
    row = beijing.loc[i, :]
    missing_cols = row.index[row.isnull()].tolist()

    if 'wd' in missing_cols:
        missing_cols.remove('wd')
    
    for j in missing_cols: 
        beijing.loc[i, j] = y.loc[(beijing.loc[i, 'year'], beijing.loc[i, 'month']), j]



# Beijing Copy

In [10]:
beijing_copy = beijing.copy()

In [11]:
empty_wd = beijing[beijing['wd'].isnull()]

In [12]:
# def feature_replace_n_neighbors(df, index, feature, top_n=2):
    
#     ratings = R.loc[user].dropna()
#     neighbors = ratings.index
#     sims = similarity_matrix.loc[ratings.index, item].nlargest(top_n)
#     sims_top_idx = sims[
    
#     return sims

In [13]:
beijing_copy.drop('station', axis = 1, inplace= True)

In [14]:
beijing_copy.fillna(0, inplace = True)

In [15]:
beijing_copy.wd.unique()

array(['NW', 'WNW', 'W', 'E', 'ENE', 'NE', 'ESE', 'SSE', 'NNE', 'SW',
       'SSW', 'S', 'SE', 'NNW', 'N', 'WSW', 0], dtype=object)

In [16]:
wd_dict = {v: k for (k,v) in enumerate(beijing_copy.wd.unique())}

In [17]:
beijing_copy.wd = beijing_copy.wd.map(wd_dict)

In [18]:
# cosine_similarity(beijing_copy)

# #for the null wind entries use the majority label from top 1-3 closest entries
# # use argsort to grab the index values of those top labels 
# #from the index grab its wind value and insert into the null row 


# for i in empty_wd.index.tolist():
    
#     row = cosine_sim.iloc[i]
    
#     closest_idx = np.argsort(row)[::-1][0]
    
#     beijing_copy.loc[i, 'wd'] = beijing_copy.loc[closest_idx, 'wd']


# Pollutant Unit Conversions

In [19]:
mw = {'CO': 28, 'O3': 48, 'NO2': 46, 'SO2': 64}

#convert pollutant from ug/m3 to ppb based on temp. Convert to ppm for CO and O3

beijing['NO2'] = (beijing['TEMP'] + 273.15)*beijing['NO2']/(12.187 * mw['NO2'])
beijing['SO2'] = (beijing['TEMP'] + 273.15)*beijing['SO2']/(12.187 * mw['SO2'])


beijing['CO'] = (beijing['TEMP'] + 273.15)*beijing['CO']/(12.187 * mw['CO']*1000)
beijing['O3'] = (beijing['TEMP'] + 273.15)*beijing['O3']/(12.187 * mw['O3']*1000)




In [20]:
pm2 = {'pm2_good' : (0, 15.4), 'pm2_moderate' : (15.5, 40.4), 'pm2_usg' : (40.5, 65.4), 'pm2_unhealthy' : (65.5, 150.4), 'pm2_veryunhealthy' : (150.5, 250.4), 'pm2_hazardous' : (250.5, 500.4)}

pm10 = {'pm10_good' : (0, 54), 'pm10_moderate' : (55, 154), 'pm10_usg' : (155, 254), 'pm10_unhealthy' : (255, 354), 'pm10_veryunhealthy' : (355, 424), 'pm10_hazardous' : (425, 604)}

o3 = {'o3_good' : (0, 0.054), 'o3_moderate' : (0.055, 0.070), 'o3_usg' : (0.071, 0.085), 'o3_unhealthy' : (0.086, 0.105), 'o3_veryunhealthy' : (0.106, 0.200), 'o3_hazardous' : (0.201, 2.00)}

co = {'co_good' : (0, 4.4), 'co_moderate' : (4.5, 9.4), 'co_usg' : (9.5, 12.4), 'co_unhealthy' : (12.5, 15.4), 'co_veryunhealthy' : (15.5, 30.4), 'co_hazardous' : (30.5, 50.4)}

no = {'no_good' : (0, 53), 'no_moderate' : (54, 100), 'no_usg' : (101, 360), 'no_unhealthy' : (361, 649), 'no_veryunhealthy' : (650, 1249), 'no_hazardous' : (1250, 2049)}

so = {'so_good' : (0, 35), 'so_moderate' : (36, 75), 'so_usg' : (76, 185), 'so_unhealthy' : (186, 304), 'so_veryunhealthy' : (305, 604), 'so_hazardous' : (605, 1004)}

aqis = {'aqi_good' : (0, 50),'aqi_moderate' : (51, 100),'aqi_usg' : (101, 150),'aqi_unhealthy' : (151, 200),'aqi_veryunhealthy' : (201, 300),'aqi_hazardous' : (301, 500)}

In [21]:
beijing_daily = beijing.copy()
beijing_daily .drop(['year', 'month', 'day', 'hour'], axis = 1, inplace = True)
beijing_daily = beijing_daily.groupby('date').mean()

In [22]:
# def compute_aqi(pollutant, aqi_dict): 
    
    
#     if (pollutant >= aqi_dict['{}_good'.format(aqi_dict)][0]) & (pollutant <= aqi_dict['{}_good'.format(aqi_dict)][1]):
#         aqi = ((pollutant - aqi_dict['{}_good'.format(aqi_dict)][0])*(aqis['aqi_good'][1] - aqis['aqi_good'][0])/(aqi_dict['{}_good'.format(aqi_dict)][1] - aqi_dict['{}_good'.format(aqi_dict)][0])) + aqis['aqi_good'][0]
#     elif (pollutant >= aqi_dict['{}_moderate'.format(aqi_dict)][0] ) & (pollutant <= aqi_dict['{}_moderate'.format(aqi_dict)][1]):
#         aqi = ((pollutant - aqi_dict['{}_moderate'.format(aqi_dict)][0])*(aqis['aqi_moderate'][1] - aqis['aqi_moderate'][0])/(aqi_dict['{}_moderate'.format(aqi_dict)][1] - aqi_dict['{}_moderate'.format(aqi_dict)][0])) + aqis['aqi_moderate'][0]
#     elif (pollutant >= aqi_dict['{}_usg'.format(aqi_dict)][0] ) & (pollutant <= aqi_dict['{}_usg'.format(aqi_dict)][1]):
#         aqi = ((pollutant - aqi_dict['{}_usg'.format(aqi_dict)][0])*(aqis['aqi_usg'][1] - aqis['aqi_usg'][0])/(aqi_dict['{}_usg'.format(aqi_dict)][1] - aqi_dict['{}_usg'.format(aqi_dict)][0])) + aqis['aqi_usg'][0]
#     elif (pollutant >= aqi_dict['{}_unhealthy'.format(aqi_dict)][0] ) & (pollutant <= aqi_dict['{}_unhealthy'.format(aqi_dict)][1]):
#         aqi = ((pollutant - aqi_dict['{}_unhealthy'.format(aqi_dict)][0])*(aqis['aqi_unhealthy'][1] - aqis['aqi_unhealthy'][0])/(aqi_dict['{}_unhealthy'.format(aqi_dict)][1] - aqi_dict['{}_unhealthy'.format(aqi_dict)][0])) + aqis['aqi_unhealthy'][0]
#     elif (pollutant >= aqi_dict['{}_veryunhealthy'.format(aqi_dict)][0] ) & (pollutant <= aqi_dict['{}_veryunhealthy'.format(aqi_dict)][1]):
#         aqi = ((pollutant - aqi_dict['{}_veryunhealthy'.format(aqi_dict)][0])*(aqis['aqi_veryunhealthy'][1] - aqis['aqi_veryunhealthy'][0])/(aqi_dict['{}_veryunhealthy'.format(aqi_dict)][1] - aqi_dict['{}_veryunhealthy'.format(aqi_dict)][0])) + aqis['aqi_veryunhealthy'][0]
#     else: 
#         aqi = ((pollutant - aqi_dict['{}_hazardous'.format(aqi_dict)][0])*(aqis['aqi_hazardous'][1] - aqis['aqi_hazardous'][0])/(aqi_dict['{}_hazardous'.format(aqi_dict)][1] - aqi_dict['{}_hazardous'.format(aqi_dict)][0])) + aqis['aqi_hazardous'][0]

#     return aqi
            
            

# Beijing Daily: AQI Daily Calculation 

In [23]:
def compute_aqi2(pm_data): 
 
    if (pm_data >= pm2['pm2_good'][0]) & (pm_data <= pm2['pm2_good'][1]):
        aqi = ((pm_data - pm2['pm2_good'][0])*(aqis['aqi_good'][1] - aqis['aqi_good'][0])/(pm2['pm2_good'][1] - pm2['pm2_good'][0])) + aqis['aqi_good'][0]
    elif (pm_data >= pm2['pm2_moderate'][0] ) & (pm_data <= pm2['pm2_moderate'][1]):
        aqi = ((pm_data - pm2['pm2_moderate'][0])*(aqis['aqi_moderate'][1] - aqis['aqi_moderate'][0])/(pm2['pm2_moderate'][1] - pm2['pm2_moderate'][0])) + aqis['aqi_moderate'][0]
    elif (pm_data >= pm2['pm2_usg'][0] ) & (pm_data <= pm2['pm2_usg'][1]):
        aqi = ((pm_data - pm2['pm2_usg'][0])*(aqis['aqi_usg'][1] - aqis['aqi_usg'][0])/(pm2['pm2_usg'][1] - pm2['pm2_usg'][0])) + aqis['aqi_usg'][0]
    elif (pm_data >= pm2['pm2_unhealthy'][0] ) & (pm_data <= pm2['pm2_unhealthy'][1]):
        aqi = ((pm_data - pm2['pm2_unhealthy'][0])*(aqis['aqi_unhealthy'][1] - aqis['aqi_unhealthy'][0])/(pm2['pm2_unhealthy'][1] - pm2['pm2_unhealthy'][0])) + aqis['aqi_unhealthy'][0]
    elif (pm_data >= pm2['pm2_veryunhealthy'][0] ) & (pm_data <= pm2['pm2_veryunhealthy'][1]):
        aqi = ((pm_data - pm2['pm2_veryunhealthy'][0])*(aqis['aqi_veryunhealthy'][1] - aqis['aqi_veryunhealthy'][0])/(pm2['pm2_veryunhealthy'][1] - pm2['pm2_veryunhealthy'][0])) + aqis['aqi_veryunhealthy'][0]
    else: 
        aqi = ((pm_data - pm2['pm2_hazardous'][0])*(aqis['aqi_hazardous'][1] - aqis['aqi_hazardous'][0])/(pm2['pm2_hazardous'][1] - pm2['pm2_hazardous'][0])) + aqis['aqi_hazardous'][0]

    return aqi
            
            


In [24]:
def compute_aqi10(pollutant): 

    
 
    if (pollutant >= pm10['pm10_good'][0]) & (pollutant <= pm10['pm10_good'][1]):
        aqi = ((pollutant - pm10['pm10_good'][0])*(aqis['aqi_good'][1] - aqis['aqi_good'][0])/(pm10['pm10_good'][1] - pm10['pm10_good'][0])) + aqis['aqi_good'][0]
    elif (pollutant >= pm10['pm10_moderate'][0] ) & (pollutant <= pm10['pm10_moderate'][1]):
        aqi = ((pollutant - pm10['pm10_moderate'][0])*(aqis['aqi_moderate'][1] - aqis['aqi_moderate'][0])/(pm10['pm10_moderate'][1] - pm10['pm10_moderate'][0])) + aqis['aqi_moderate'][0]
    elif (pollutant >= pm10['pm10_usg'][0] ) & (pollutant <= pm10['pm10_usg'][1]):
        aqi = ((pollutant - pm10['pm10_usg'][0])*(aqis['aqi_usg'][1] - aqis['aqi_usg'][0])/(pm10['pm10_usg'][1] - pm10['pm10_usg'][0])) + aqis['aqi_usg'][0]
    elif (pollutant >= pm10['pm10_unhealthy'][0] ) & (pollutant <= pm10['pm10_unhealthy'][1]):
        aqi = ((pollutant - pm10['pm10_unhealthy'][0])*(aqis['aqi_unhealthy'][1] - aqis['aqi_unhealthy'][0])/(pm10['pm10_unhealthy'][1] - pm10['pm10_unhealthy'][0])) + aqis['aqi_unhealthy'][0]
    elif (pollutant >= pm10['pm10_veryunhealthy'][0] ) & (pollutant <= pm10['pm10_veryunhealthy'][1]):
        aqi = ((pollutant - pm10['pm10_veryunhealthy'][0])*(aqis['aqi_veryunhealthy'][1] - aqis['aqi_veryunhealthy'][0])/(pm10['pm10_veryunhealthy'][1] - pm10['pm10_veryunhealthy'][0])) + aqis['aqi_veryunhealthy'][0]
    else: 
        aqi = ((pollutant - pm10['pm10_hazardous'][0])*(aqis['aqi_hazardous'][1] - aqis['aqi_hazardous'][0])/(pm10['pm10_hazardous'][1] - pm10['pm10_hazardous'][0])) + aqis['aqi_hazardous'][0]

    return aqi
            
            

In [25]:
def compute_aqico(pollutant): 

    
 
    if (pollutant >= co['co_good'][0]) & (pollutant <= co['co_good'][1]):
        aqi = ((pollutant - co['co_good'][0])*(aqis['aqi_good'][1] - aqis['aqi_good'][0])/(co['co_good'][1] - co['co_good'][0])) + aqis['aqi_good'][0]
    elif (pollutant >= co['co_moderate'][0] ) & (pollutant <= co['co_moderate'][1]):
        aqi = ((pollutant - co['co_moderate'][0])*(aqis['aqi_moderate'][1] - aqis['aqi_moderate'][0])/(co['co_moderate'][1] - co['co_moderate'][0])) + aqis['aqi_moderate'][0]
    elif (pollutant >= co['co_usg'][0] ) & (pollutant <= co['co_usg'][1]):
        aqi = ((pollutant - co['co_usg'][0])*(aqis['aqi_usg'][1] - aqis['aqi_usg'][0])/(co['co_usg'][1] - co['co_usg'][0])) + aqis['aqi_usg'][0]
    elif (pollutant >= co['co_unhealthy'][0] ) & (pollutant <= co['co_unhealthy'][1]):
        aqi = ((pollutant - co['co_unhealthy'][0])*(aqis['aqi_unhealthy'][1] - aqis['aqi_unhealthy'][0])/(co['co_unhealthy'][1] - co['co_unhealthy'][0])) + aqis['aqi_unhealthy'][0]
    elif (pollutant >= co['co_veryunhealthy'][0] ) & (pollutant <= co['co_veryunhealthy'][1]):
        aqi = ((pollutant - co['co_veryunhealthy'][0])*(aqis['aqi_veryunhealthy'][1] - aqis['aqi_veryunhealthy'][0])/(co['co_veryunhealthy'][1] - co['co_veryunhealthy'][0])) + aqis['aqi_veryunhealthy'][0]
    else: 
        aqi = ((pollutant - co['co_hazardous'][0])*(aqis['aqi_hazardous'][1] - aqis['aqi_hazardous'][0])/(co['co_hazardous'][1] - co['co_hazardous'][0])) + aqis['aqi_hazardous'][0]

    return aqi
            
            

In [26]:
def compute_aqio3(pollutant): 

    
 
    if (pollutant >= o3['o3_good'][0]) & (pollutant <= o3['o3_good'][1]):
        aqi = ((pollutant - o3['o3_good'][0])*(aqis['aqi_good'][1] - aqis['aqi_good'][0])/(o3['o3_good'][1] - o3['o3_good'][0])) + aqis['aqi_good'][0]
    elif (pollutant >= o3['o3_moderate'][0] ) & (pollutant <= o3['o3_moderate'][1]):
        aqi = ((pollutant - o3['o3_moderate'][0])*(aqis['aqi_moderate'][1] - aqis['aqi_moderate'][0])/(o3['o3_moderate'][1] - o3['o3_moderate'][0])) + aqis['aqi_moderate'][0]
    elif (pollutant >= o3['o3_usg'][0] ) & (pollutant <= o3['o3_usg'][1]):
        aqi = ((pollutant - o3['o3_usg'][0])*(aqis['aqi_usg'][1] - aqis['aqi_usg'][0])/(o3['o3_usg'][1] - o3['o3_usg'][0])) + aqis['aqi_usg'][0]
    elif (pollutant >= o3['o3_unhealthy'][0] ) & (pollutant <= o3['o3_unhealthy'][1]):
        aqi = ((pollutant - o3['o3_unhealthy'][0])*(aqis['aqi_unhealthy'][1] - aqis['aqi_unhealthy'][0])/(o3['o3_unhealthy'][1] - o3['o3_unhealthy'][0])) + aqis['aqi_unhealthy'][0]
    elif (pollutant >= o3['o3_veryunhealthy'][0] ) & (pollutant <= o3['o3_veryunhealthy'][1]):
        aqi = ((pollutant - o3['o3_veryunhealthy'][0])*(aqis['aqi_veryunhealthy'][1] - aqis['aqi_veryunhealthy'][0])/(o3['o3_veryunhealthy'][1] - o3['o3_veryunhealthy'][0])) + aqis['aqi_veryunhealthy'][0]
    else: 
        aqi = ((pollutant - o3['o3_hazardous'][0])*(aqis['aqi_hazardous'][1] - aqis['aqi_hazardous'][0])/(o3['o3_hazardous'][1] - o3['o3_hazardous'][0])) + aqis['aqi_hazardous'][0]

    return aqi            

In [27]:
def compute_aqino(pollutant): 

    
 
    if (pollutant >= no['no_good'][0]) & (pollutant <= no['no_good'][1]):
        aqi = ((pollutant - no['no_good'][0])*(aqis['aqi_good'][1] - aqis['aqi_good'][0])/(no['no_good'][1] - no['no_good'][0])) + aqis['aqi_good'][0]
    elif (pollutant >= no['no_moderate'][0] ) & (pollutant <= no['no_moderate'][1]):
        aqi = ((pollutant - no['no_moderate'][0])*(aqis['aqi_moderate'][1] - aqis['aqi_moderate'][0])/(no['no_moderate'][1] - no['no_moderate'][0])) + aqis['aqi_moderate'][0]
    elif (pollutant >= no['no_usg'][0] ) & (pollutant <= no['no_usg'][1]):
        aqi = ((pollutant - no['no_usg'][0])*(aqis['aqi_usg'][1] - aqis['aqi_usg'][0])/(no['no_usg'][1] - no['no_usg'][0])) + aqis['aqi_usg'][0]
    elif (pollutant >= no['no_unhealthy'][0] ) & (pollutant <= no['no_unhealthy'][1]):
        aqi = ((pollutant - no['no_unhealthy'][0])*(aqis['aqi_unhealthy'][1] - aqis['aqi_unhealthy'][0])/(no['no_unhealthy'][1] - no['no_unhealthy'][0])) + aqis['aqi_unhealthy'][0]
    elif (pollutant >= no['no_veryunhealthy'][0] ) & (pollutant <= no['no_veryunhealthy'][1]):
        aqi = ((pollutant - no['no_veryunhealthy'][0])*(aqis['aqi_veryunhealthy'][1] - aqis['aqi_veryunhealthy'][0])/(no['no_veryunhealthy'][1] - no['no_veryunhealthy'][0])) + aqis['aqi_veryunhealthy'][0]
    else: 
        aqi = ((pollutant - no['no_hazardous'][0])*(aqis['aqi_hazardous'][1] - aqis['aqi_hazardous'][0])/(no['no_hazardous'][1] - no['no_hazardous'][0])) + aqis['aqi_hazardous'][0]

    return aqi
            

In [28]:
def compute_aqiso(pollutant): 

    
 
    if (pollutant >= so['so_good'][0]) & (pollutant <= so['so_good'][1]):
        aqi = ((pollutant - so['so_good'][0])*(aqis['aqi_good'][1] - aqis['aqi_good'][0])/(so['so_good'][1] - so['so_good'][0])) + aqis['aqi_good'][0]
    elif (pollutant >= so['so_moderate'][0] ) & (pollutant <= so['so_moderate'][1]):
        aqi = ((pollutant - so['so_moderate'][0])*(aqis['aqi_moderate'][1] - aqis['aqi_moderate'][0])/(so['so_moderate'][1] - so['so_moderate'][0])) + aqis['aqi_moderate'][0]
    elif (pollutant >= so['so_usg'][0] ) & (pollutant <= so['so_usg'][1]):
        aqi = ((pollutant - so['so_usg'][0])*(aqis['aqi_usg'][1] - aqis['aqi_usg'][0])/(so['so_usg'][1] - so['so_usg'][0])) + aqis['aqi_usg'][0]
    elif (pollutant >= so['so_unhealthy'][0] ) & (pollutant <= so['so_unhealthy'][1]):
        aqi = ((pollutant - so['so_unhealthy'][0])*(aqis['aqi_unhealthy'][1] - aqis['aqi_unhealthy'][0])/(so['so_unhealthy'][1] - so['so_unhealthy'][0])) + aqis['aqi_unhealthy'][0]
    elif (pollutant >= so['so_veryunhealthy'][0] ) & (pollutant <= so['so_veryunhealthy'][1]):
        aqi = ((pollutant - so['so_veryunhealthy'][0])*(aqis['aqi_veryunhealthy'][1] - aqis['aqi_veryunhealthy'][0])/(so['so_veryunhealthy'][1] - so['so_veryunhealthy'][0])) + aqis['aqi_veryunhealthy'][0]
    else: 
        aqi = ((pollutant - so['so_hazardous'][0])*(aqis['aqi_hazardous'][1] - aqis['aqi_hazardous'][0])/(so['so_hazardous'][1] - so['so_hazardous'][0])) + aqis['aqi_hazardous'][0]

    return aqi
            

In [29]:
beijing_daily['AQI_PM2.5'] = beijing_daily['PM2.5'].apply(lambda x: compute_aqi2(x))
beijing_daily['AQI_PM10'] = beijing_daily['PM10'].apply(lambda x: compute_aqi10(x))
beijing_daily['AQI_CO'] = beijing_daily['CO'].apply(lambda x: compute_aqico(x))
beijing_daily['AQI_NO2'] = beijing_daily['NO2'].apply(lambda x: compute_aqino(x))
beijing_daily['AQI_SO2'] = beijing_daily['SO2'].apply(lambda x: compute_aqiso(x))
beijing_daily['AQI_O3'] = beijing_daily['O3'].apply(lambda x: compute_aqio3(x))

In [30]:
lst = list(beijing_daily.iloc[:, -7:].max(axis = 1))

In [31]:
beijing_daily['aqi'] = lst

In [34]:
beijing_daily

Unnamed: 0_level_0,PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES,DEWP,RAIN,...,spring,summer,winter,AQI_PM2.5,AQI_PM10,AQI_CO,AQI_NO2,AQI_SO2,AQI_O3,aqi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-03-01,7.261364,13.275486,3.505375,14.341677,0.348265,0.032490,1.093182,1026.630303,-20.188636,0.0,...,1,0,0,23.575856,12.292116,3.957552,13.529884,5.007678,30.083737,30.083737
2013-03-02,31.526515,40.919940,11.294616,28.053480,0.772689,0.015471,0.222348,1026.601136,-16.344318,0.0,...,1,0,0,82.538122,37.888833,8.780553,26.465547,16.135166,14.324617,82.538122
2013-03-03,77.772727,109.344697,17.254908,37.503760,1.419104,0.011585,5.256818,1014.256061,-12.375000,0.0,...,1,0,0,158.083199,77.897880,16.126176,35.380905,24.649868,10.727299,158.083199
2013-03-04,23.176567,41.445940,6.819097,22.596189,0.604732,0.028184,9.626515,1017.303030,-12.860606,0.0,...,1,0,0,66.106498,38.375871,6.871956,21.317160,9.741567,26.096449,66.106498
2013-03-05,129.179116,158.106061,25.174703,51.026083,1.649297,0.037918,6.574621,1010.608712,-7.957576,0.0,...,1,0,0,187.752376,102.537343,18.742008,48.137814,35.963861,35.109718,187.752376
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-02-24,24.279938,37.878400,3.418476,21.716438,0.569965,0.022355,4.850379,1019.198106,-11.370833,0.0,...,0,0,1,68.277790,35.072593,6.476874,20.487205,4.883537,20.698901,68.277790
2017-02-25,11.770651,22.266852,2.082350,15.123112,0.390930,0.028563,7.021591,1017.010227,-10.570833,0.0,...,0,0,1,38.216399,20.617455,4.442383,14.267087,2.974786,26.447192,38.216399
2017-02-26,27.816106,45.149427,3.643582,24.807694,0.587603,0.020739,6.729167,1018.506818,-7.982576,0.0,...,0,0,1,75.236513,41.805025,6.677308,23.403485,5.205117,19.202728,75.236513
2017-02-27,66.816611,97.045753,6.062260,36.852067,1.073475,0.013933,7.699621,1015.343182,-7.103788,0.0,...,0,0,1,151.759881,71.810524,12.198582,34.766101,8.660372,12.901127,151.759881


# Save Beijing & Beijing Daily

In [35]:
beijing_daily.to_csv('beijing_grouped_daily.csv')

In [33]:
beijing.to_csv('beijing_ungrouped_aqis.csv', index = False)