In [0]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import pandas as pd
import os

In [0]:
def _AQI(row):
  """
  return Air Quality Index
  """
  # pollutants AQIs EU
  pm25 = [10,0, 20,0, 25,0, 50,0]
  so2 = [100,0, 200,0, 350,0, 500,0]
  
  aqi = []
  for i in range(0,5):
    if row['DUSMASS25'] <= pm25[i]:
      aqi.append(i+1)
    if row['SO2SMASS'] <= so2[i]:
      aqi.append(i+1) 
  while len(aqi) < 2:
    aqi.append(5)

  #  final aqi is the average of single pollutant AQIs 
  AQI = ( sum(aqi) // len(aqi) )
  
  return AQI


In [0]:
# add Air Quality Index to MERRA2 dataset
merra = pd.read_csv("/content/drive/My Drive/NASA/MERRA2/20181231.state.slv.aer.csv")
merra['AQI'] = merra.apply ( lambda row: _AQI(row), axis=1 )


In [0]:
# create unique satellite dataset in AOD variable

import re
import glob

path = "/content/drive/My Drive/NASA/AOD_DATA"

files = glob.glob(path + "/*.csv")

aod = []
for ff in files:
  df = pd.read_csv(ff)
  df = df.drop(labels='Unnamed: 8', axis=1)
  df.columns = ['YYYY', 'MM', 'DD', 'Latitude', 'Longitude', 'AOD1', 'AOD3', 'STD3']
  f1 = re.split('[/_.]', ff)
  f1 = f1[7:len(f1)-1]
  df['Country'] = df.apply( lambda row: f1[0], axis=1 )
  df['City'] = df.apply( lambda row: f1[1], axis=1 )
  df['Location'] = df.apply( lambda row: f1[2], axis=1 )
  df['Time'] = df.apply( lambda row: f1[3], axis=1 )  
  aod.append(df)


AOD = pd.concat(aod, ignore_index = True, sort = True)


In [0]:
# save new files
import pickle as p

with open("/content/drive/My Drive/NASA/aod", "wb") as f:
  p.dump(AOD, f)

with open("/content/drive/My Drive/NASA/merra", "wb") as f:
          p.dump(merra, f)

AOD.to_csv("/content/drive/My Drive/NASA/AOD.csv")
merra.to_csv("/content/drive/My Drive/NASA/MERRA2.csv")