In [1]:
import pandas as pd
import pickle as p
import os
import re
import glob

In [2]:
def r_csv(dataset):
    '''
    read csv
    input string = dataset name
    return dataframe
    '''
    file = dataset  + '.csv'
    path = os.path.join('Datasets', dataset, 'data', file)
    csv = pd.read_csv(path)
    return csv

def r_df(dataset):
    '''
    read df
    input string = dataset name
    return dataframe
    '''
    file = dataset 
    path = os.path.join('Datasets', dataset, 'data', file)
    with open(path, 'rb') as f:
      df = p.load(f)
    return df

def w_csv(dataset, df):
    '''
    write df to csv
    '''
    file = dataset  + '.csv'
    path = os.path.join('Datasets', dataset, 'data', file)
    df.to_csv(path)
    
def w_df(dataset, df):
    '''
    write df to pickle file
    '''
    file = dataset
    path = os.path.join('Datasets', dataset, 'data', file)
    with open(path, "wb") as f:
      p.dump(df, f)

In [3]:
def _AQI(row):
  """
  return Air Quality Index
  """
  # pollutants AQIs EU
  pm25 = [10,0, 20,0, 25,0, 50,0]
  so2 = [100,0, 200,0, 350,0, 500,0]
  
  aqi = []
  for i in range(0,5):
    if row['DUSMASS25'] <= pm25[i]:
      aqi.append(i+1)
    if row['SO2SMASS'] <= so2[i]:
      aqi.append(i+1) 
  while len(aqi) < 2:
    aqi.append(5)

  #  final AQI is the average of single pollutant AQIs 
  AQI = ( sum(aqi) // len(aqi) )
  
  return AQI

In [4]:
def AOD_merge(path):
    '''
    create unique dataframe for satellite dataset
    '''
    files = glob.glob(path + "/*.csv")

    aod = []
    for ff in files:
      df = pd.read_csv(ff)
      df = df.drop(labels='Unnamed: 8', axis=1)
      df.columns = ['YYYY', 'MM', 'DD', 'Latitude', 'Longitude', 'AOD1', 'AOD3', 'STD3']
      f1 = re.split('[/_.]', ff)
      f1 = f1[7:len(f1)-1]
      df['Country'] = df.apply( lambda row: f1[0], axis=1 )
      df['City'] = df.apply( lambda row: f1[1], axis=1 )
      df['Location'] = df.apply( lambda row: f1[2], axis=1 )
      df['Time'] = df.apply( lambda row: f1[3], axis=1 )  
      aod.append(df)


    AOD = pd.concat(aod, ignore_index = True, sort = True)
    return AOD