In [47]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import json
import requests
from datetime import datetime
%matplotlib inline

In [48]:
#array with channel names, locations, ids and api keys
channel_dict = [{'name':'aq_66', 'location':'Nakasero II(2)', 'id':912221, 'api_key':'WNIR3I3JT61XARH2', 'lat':0.32232, 'long':32.5757}, 
                {'name':'aq_46', 'location':'Kansanga','id':782719, "api_key": "QTNAHITKHEJ4C9I5", 'lat':0.29875, 'long':32.615},
                {'name':'aq_58', 'location':'Nansana East','id':870142, "api_key": "RKD5ITYJH4PPEOYX", 'lat':0.3759, 'long':32.528}, 
                {'name':'aq_39', 'location':'Lubaga','id':737276, "api_key": "VHE2PZCJLMYHB7ZI", 'lat':0.295314, 'long':32.553682},
                {'name':'aq_63', 'location':'Nansana West','id':870147, "api_key": "QKS54M3HPNBOY004", 'lat':0.363, 'long':32.529},
                {'name':'aq_49', 'location':'Lukuli(2)', 'id':782722, 'api_key':'OGG3UX99KTA41C1K', 'lat':0.2836, 'long':32.6},
                {'name':'aq_29', 'location':'Bugolobi', 'id':718028, "api_key": "HNTV5QEJTD8RTG2H", 'lat':0.3075, 'long':32.6206},
                {'name':'aq_59', 'location':'Kyaliwajjala','id':870143, "api_key": "7OA7SZN6FI9GJDQ5", 'lat':0.381576, 'long':32.647109},
                {'name':'aq_32', 'location':'Seguku', 'id':730014, 'api_key':'8A9OOESUKHXIE80Z', 'lat':0.235668, 'long':32.55764},
                {'name':'aq_30', 'location':'Kiwafu', 'id':718029, "api_key":"TRF8VHH9DWUKBT59", 'lat':0.059604, 'long':32.46032},
                {'name':'aq_61', 'location':'Kiwatule','id':870145, "api_key": "800XZ9WZSCA6PKQY", 'lat':0.373078, 'long':32.628226},
                {'name':'aq_26', 'location':'Civic Centre','id':689761, "api_key": "YEGZIVUS16X2R4BO", 'lat':0.314, 'long':32.59},
                {'name':'aq_43', 'location':'Makindye I','id':755612, "api_key": "AZ3Q42XK8FOUF8P7", 'lat':0.289, 'long':32.589}
               ]

In [49]:
def preprocessing(df):
    '''
    Preprocesses data for a location
    '''
    df = df.sort_values(by='created_at',ascending=True)
    df = df.set_index('created_at')
    hourly_df = df.resample('H').mean()
    hourly_df.dropna(inplace=True)
    return hourly_df

In [50]:
def generate_stats(df, column_name):
    '''Calculates the minimum PM 2.5 value of a location'''
    minimum = round(df[column_name].min(), 1)
    maximum = round(df[column_name].max(), 1)
    mean = round(df[column_name].mean(), 1)
    median = round(df[column_name].median(), 1)
    percentile_25 = round(df[column_name].quantile(0.25), 1) 
    percentile_75 = round(df[column_name].quantile(0.75), 1) 
    return minimum, maximum, mean, median, percentile_25, percentile_75

In [51]:
#Local sources
columns = ['Parish', 'Latitude', 'Longitude', 'Minimum', 'Maximum', 'Mean PM 2.5', 'Median PM 2.5', '25th Percentile', '75th Percentile']
final_local_sources_df= pd.DataFrame(columns=columns)
for location in channel_dict:
    df = pd.read_csv('./background vs local/'+location['location']+'.csv', usecols = ['created_at', 'local_sources'],
                     parse_dates =['created_at'])
    preprocessed_df = preprocessing(df)
    minimum, maximum, mean, median, perc_25, perc_75 = generate_stats(preprocessed_df, 'local_sources')
    df2 = pd.DataFrame([[location['location'], location['lat'], location['long'], minimum, maximum, mean, median, perc_25, perc_75]], columns=columns)
    final_local_sources_df = pd.concat([df2, final_local_sources_df])
    print(location['location']+' done')
final_local_sources_df =  final_local_sources_df.iloc[::-1]
final_local_sources_df.to_csv('./background vs local/stats_table_local_sources_30_09_2020.csv', index=False)

Nakasero II(2) done
Kansanga done
Nansana East done
Lubaga done
Nansana West done
Lukuli(2) done
Bugolobi done
Kyaliwajjala done
Seguku done
Kiwafu done
Kiwatule done
Civic Centre done
Makindye I done


In [52]:
#Urban background
columns = ['Parish', 'Latitude', 'Longitude', 'Minimum', 'Maximum', 'Mean PM 2.5', 'Median PM 2.5', '25th Percentile', '75th Percentile']
final_background_df= pd.DataFrame(columns=columns)
for location in channel_dict:
    df = pd.read_csv('./background vs local/'+location['location']+'.csv', usecols = ['created_at', 'urban_background'],
                     parse_dates =['created_at'])
    preprocessed_df = preprocessing(df)
    minimum, maximum, mean, median, perc_25, perc_75 = generate_stats(preprocessed_df, 'urban_background')
    df2 = pd.DataFrame([[location['location'], location['lat'], location['long'], minimum, maximum, mean, median, perc_25, perc_75]], columns=columns)
    final_background_df = pd.concat([df2, final_background_df])
    print(location['location']+' done')
final_background_df =  final_background_df.iloc[::-1]
final_background_df.to_csv('./background vs local/stats_table_urban_background_30_09_2020.csv', index=False)

Nakasero II(2) done
Kansanga done
Nansana East done
Lubaga done
Nansana West done
Lukuli(2) done
Bugolobi done
Kyaliwajjala done
Seguku done
Kiwafu done
Kiwatule done
Civic Centre done
Makindye I done


In [53]:
final_background_df

Unnamed: 0,Parish,Latitude,Longitude,Minimum,Maximum,Mean PM 2.5,Median PM 2.5,25th Percentile,75th Percentile
0,Nakasero II(2),0.32232,32.5757,0.0,134.3,29.0,28.5,12.8,40.8
0,Kansanga,0.29875,32.615,0.0,121.1,25.8,23.1,10.3,37.5
0,Nansana East,0.3759,32.528,0.0,187.9,45.1,40.4,24.7,58.0
0,Lubaga,0.295314,32.553682,0.0,127.0,33.4,32.5,17.9,44.7
0,Nansana West,0.363,32.529,4.8,156.3,40.6,38.4,26.8,49.1
0,Lukuli(2),0.2836,32.6,0.0,176.5,30.9,27.6,11.2,43.2
0,Bugolobi,0.3075,32.6206,0.0,178.8,30.0,27.6,11.1,42.8
0,Kyaliwajjala,0.381576,32.647109,0.0,141.3,39.1,37.0,20.9,50.1
0,Seguku,0.235668,32.55764,0.0,140.8,31.4,30.5,16.0,42.9
0,Kiwafu,0.059604,32.46032,0.0,120.5,21.2,17.0,6.8,33.3


In [54]:
final_local_sources_df.head()

Unnamed: 0,Parish,Latitude,Longitude,Minimum,Maximum,Mean PM 2.5,Median PM 2.5,25th Percentile,75th Percentile
0,Nakasero II(2),0.32232,32.5757,0.2,153.7,8.6,6.3,4.2,10.1
0,Kansanga,0.29875,32.615,0.2,108.9,6.8,5.1,3.3,8.3
0,Nansana East,0.3759,32.528,0.0,242.3,17.9,11.9,7.5,20.5
0,Lubaga,0.295314,32.553682,0.2,652.8,18.7,13.2,8.7,21.5
0,Nansana West,0.363,32.529,1.4,219.6,16.3,10.2,6.7,19.0
