#### Notebook for cleaning raw data

In [2]:
import json
import re
import os

In [3]:
def process_file(filename, metric):
    with open(os.path.join("data", "raw", filename), "r") as f:
        base = json.load(f)
        
    dates = list(base['pollutionMeasurements']['date'].keys())
    
    stations = []
    for d in dates:
        s = list(base['pollutionMeasurements']['date'][d][metric].keys())
        stations += s
        
    stations = set(stations)
    
    data_list = []
    for d in dates:
        data = {}
        data['date'] = d
        for s in stations:
            if s in base['pollutionMeasurements']['date'][d][metric].keys():
                data[s] = base['pollutionMeasurements']['date'][d][metric][s]
        data_list.append(data)
        
    pre = re.findall(r'(.+)\.json', filename)[0]
    output_name = pre + "_sm_" + metric + ".json"
    
    with open(os.path.join("data", "cleaned", output_name), "w") as f:
        json.dump(data_list, f)

#### Contamination data

In [4]:
process_file("contaminantes_2015.json", "O3")

In [5]:
process_file("contaminantes_2016.json", "O3")

#### Air pressure data

In [5]:
process_file("PA_2015.json", "PA")

In [6]:
process_file("PA_2016.json", "PA")

#### Other meteorological data

In [7]:
# RH = Humidity
# TMP = Temperature
# WSP = Wind Speed
# WDR = Wind Direction
# PBa = Pressure?

In [8]:
process_file("meteorología_2015.json", "RH")
process_file("meteorología_2015.json", "TMP")
process_file("meteorología_2015.json", "WSP")

In [9]:
process_file("meteorología_2016.json", "RH")
process_file("meteorología_2016.json", "TMP")
process_file("meteorología_2016.json", "WSP")