# Script for deriving Canada LUR three year mean variables from one year mean variables #

** Author: Andrew Larkin ** Developed for Perry Hystad, Oregon State University <br>
** Date created: ** September 17, 2018

### Summary ###

The desired Canada LUR NO2 model will estimate mean near surface NO2 concentrations for the three year period 2014-2016.  This script takes one year estimates of buffer variables and creates three year averages.  Scripts were developed because there are many buffer variables for each buffer-based exposure.

### Setup ###

Load libraries, define filepaths and constants

In [9]:
import pandas as ps
import os
import math
workFolder = "C:/users/larkinan/desktop/CanadaLUR/"
inputEnvCSV = workFolder + "Canada_LUR_Vars_Sep17_18.csv"
outputEnvCSV = workFolder + "Canada_LUR_Varsv2_Sep17_18.csv"
startYear = 2013
endYear = 2017
varNames = ["N3","N4","N5","N6","N7"]
bufferDists = [50,100,250,500,750,1000,2000,3000,4000,5000,10000,15000,20000]

### Helper functions ###

In [2]:
# load predictor variables from csv file 
def loadEnvInputs(inputCSV):
    rawData = ps.read_csv(inputCSV)
    return rawData

In [25]:
# calculate three year average for one variable
def calcOneAverage(inData,varNames,newVarname):
    inData[newVarname] = 0
    for var in varNames:
        inData[newVarname] += inData[var]
    inData[newVarname] /= (len(varNames)*1.0)

In [40]:
# calculate three year averages for all predictor variables
def calcEnvThreeYearAvgs(inData):
    for dist in bufferDists:
        startVarIndex = 0
        while(len(varNames) - startVarIndex > 2):
            varsToProcess = []
            for varToAddIndex in range(0,3):
                varsToProcess.append(varNames[startVarIndex + varToAddIndex] + str(dist) + 'm')
            calcOneAverage(inData,varsToProcess,'NDVI_' + str(13+startVarIndex) + '_' + 
                           str(13+startVarIndex+2)+ '_' + str(dist) + 'm') 
            startVarIndex +=1

In [None]:
# load data from air monitor csv files and merge
def loadAirMonitorCSVFiles(dataFolder):
    candidateFiles = os.listdir(annualNO2Folder)
    filesToProcess = []
    for candidateFile in candidateFiles:
        if(candidateFile[len(candidateFile)-4:]) == '.csv':
            filesToProcess.append(annualNO2Folder + candidateFile)

    if(len(filesToProcess) >0):
        mergedAvgs = ps.read_csv(filesToProcess[0])
        currFileIndex = 1
        while(currFileIndex < len(filesToProcess)):
            tempFile = ps.read_csv(filesToProcess[currFileIndex])
            mergedAvgs = ps.merge(mergedAvgs,tempFile,on='NAPS ID',how = 'outer')
            currFileIndex+=1

In [None]:
# calculate three year averages for air monitor records
def calcThreeYearNO2Avg(inData):
    mergedAvgs['numObs'] = (3 - 1*(mergedAvgs['Mean_2016'].isnull()) - 1*(mergedAvgs['Mean_2015'].isnull()) - 1*(mergedAvgs['Mean_2015'].isnull()))
    mergedAvgs['meanNO2_2014_2016'] = (mergedAvgs.fillna(0)['Mean_2014'] + mergedAvgs.fillna(0)['Mean_2015']
                                       + mergedAvgs.fillna(0)['Mean_2016']) / (mergedAvgs['numObs']*1.0)
    return(mergedAvgs)

In [None]:
# load and preprocess air monitor records
def processNO2MonitorData(annualNO2Folder):
    rawData = loadCSVFiles(annualNO2Folder)
    threeYearAvg = calcThreeYearNO2Avg(rawData)
    screenedData = threeYearAvg[threeYearAvg['numObs'] >0]
    return(screenedData)

In [None]:
# load and preprocess predictor variables
def processEnvExposureData(inputCSV,outputCSV):
    if os.path.exists(tempFile==False):
        inputData = loadEnvInputs(inputCSV)
        calcEnvThreeYearAvgs(inputData)
        inputData.to_csv(outputCSV)
    envExposures = ps.read_csv(outputCSV)
    return(envExposures)

### Main function ###

In [43]:
def main():
    airMonitorData = processNO2MonitorData(annualNO2Folder)
    envExposures = processEnvExposureData(inputEnvCSV,outputEnvCSV)
    mergedDataset = ps.merge(airMonitorData,envExposures,on='NAPS ID')
    mergedDataset.to_csv(resultsFile)

### Call main function ###

In [44]:
main()