In [9]:
#Install tensorflow in jupyter notebook if needed
import sys
!{sys.executable} -m pip install --user tensorflow



In [1]:
import tensorflow as tf
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt  # Python standard library datetime  module
import csv
from plotnine import *
from IPython.display import display, HTML

In [3]:
def _stats(a):
    """ calc_stats calculates the mean and standard deviation of numpy array a."""
    a_std = a.std(axis=0)
    # We don't want to be solving for numerical noise.
    a_std[a_std < 1.e-6] = 1
    return _twod(a.mean(axis=0)), _twod(a_std)

def _twod(a):
    """ twod converts a 1-d array to 2-d """
    if len(a.shape) == 1:
        a = a.reshape((1, a.shape[0]))
    return a

def decode_func(num_conc, num_met):
    """ decode decodes a single TFRecord example with input concentrataion
    and meteorology and output delta concentration information. """
    def _decode(serialized_example):
        features = tf.parse_single_example(
            serialized_example,
            features={
                'conc': tf.FixedLenFeature([num_conc], tf.float32),
                'met': tf.FixedLenFeature([num_met], tf.float32),
                'delta': tf.FixedLenFeature([num_conc], tf.float32),
            })
        return {"conc":features["conc"], "met": features["met"]}, features["delta"]
    return _decode


n_conc_vars = 77
n_met_vars = 4
n_samples = 100000

dataset = tf.data.TFRecordDataset("data/dt60_diurnal_10000080r_t0.tfrecords")
dataset = dataset.apply(tf.contrib.data.map_and_batch(
    decode_func(n_conc_vars, n_met_vars),n_samples))
inputs, input_delta = dataset.make_one_shot_iterator().get_next()
sess = tf.Session()
with sess.as_default():
    conc_, met_, delta_ = sess.run([inputs["conc"], inputs["met"], input_delta])
    conc_mean, conc_std = _stats(conc_)
    met_mean, met_std = _stats(met_)
    delta_mean, delta_std = _stats(delta_)

species_titles = pd.read_csv('data/UB_initial_conc.csv', nrows=1).columns.tolist()
print(species_titles)
ranges = pd.read_csv('data/conc_range.csv')
ranges = ranges.iloc[:, 0:77]

['H2SO4', 'HNO3', 'HCl', 'NH3', 'NO', 'NO2', 'NO3', 'N2O5', 'HONO', 'HNO4', 'O3', 'O1D', 'O3P', 'OH', 'HO2', 'H2O2', 'CO', 'SO2', 'CH4', 'C2H6', 'CH3O2', 'ETHP', 'HCHO', 'CH3OH', 'ANOL', 'CH3OOH', 'ETHOOH', 'ALD2', 'HCOOH', 'RCOOH', 'C2O3', 'PAN', 'ARO1', 'ARO2', 'ALK1', 'OLE1', 'API1', 'API2', 'LIM1', 'LIM2', 'PAR', 'AONE', 'MGLY', 'ETH', 'OLET', 'OLEI', 'TOL', 'XYL', 'CRES', 'TO2', 'CRO', 'OPEN', 'ONIT', 'ROOH', 'RO2', 'ANO2', 'NAP', 'XO2', 'XPAR', 'ISOP', 'ISOPRD', 'ISOPP', 'ISOPN', 'ISOPO2', 'API', 'LIM', 'DMS', 'MSA', 'DMSO', 'DMSO2', 'CH3SO2H', 'CH3SCH2OO', 'CH3SO2', 'CH3SO3', 'CH3SO2OO', 'CH3SO2CH2OO', 'SULFHOX', 'Num', 'DpgN', 'Sigmag', 'JHyst', 'Water', 'SO4', 'PNO3', 'Cl', 'NH4', 'PMSA', 'Aro1', 'Aro2', 'Alk1', 'Ole1', 'PApi1', 'PApi2', 'Lim1', 'Lim2', 'CO3', 'Na', 'Ca', 'Oin', 'OC', 'BC']


In [5]:
#conc[Simulation number, pollutant out of 77] 
#conc[100000, 77]

display(HTML(ranges.to_html()))

concentration = pd.DataFrame(conc_)
concentration.columns = species_titles[0:77]
Delta = pd.DataFrame(delta_)
Delta.columns = species_titles[0:77]
met = pd.DataFrame(met_)
met.columns = ['Temp', 'Pres', 'RH', 'SZA']

print((concentration.shape))
#ggplot(concentration , aes(x=met.SZA, y='O3')) + geom_point()

Unnamed: 0,H2SO4,HNO3,HCl,NH3,NO,NO2,NO3,N2O5,HONO,HNO4,O3,O1D,O3P,OH,HO2,H2O2,CO,SO2,CH4,C2H6,CH3O2,ETHP,HCHO,CH3OH,ANOL,CH3OOH,ETHOOH,ALD2,HCOOH,RCOOH,C2O3,PAN,ARO1,ARO2,ALK1,OLE1,API1,API2,LIM1,LIM2,PAR,AONE,MGLY,ETH,OLET,OLEI,TOL,XYL,CRES,TO2,CRO,OPEN,ONIT,ROOH,RO2,ANO2,NAP,XO2,XPAR,ISOP,ISOPRD,ISOPP,ISOPN,ISOPO2,API,LIM,DMS,MSA,DMSO,DMSO2,CH3SO2H,CH3SCH2OO,CH3SO2,CH3SO3,CH3SO2OO,CH3SO2CH2OO,SULFHOX
0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,30,30,50,20,20,0.1,0.1,0.2,2,300,0.1,0.1,0.1,0.5,50,500,20,1900,2,2,2,50,20,20,30,5,50,5,20,10,30,50,50,50,50,10,10,10,10,50,50,50,50,50,50,100,100,1,10,10,20,50,20,10,10,10,10,30,10,20,10,10,10,10,10,0.1,0.5,0.1,0.2,0.1,0.1,0.1,0.1,0.1,0.1,0.1


(100000, 77)
