# Tensorflow Model

In [22]:
from os import path
from random import sample

import tensorflow as tf

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd


In [2]:
mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

### Load, split, and normalize the training data

In [66]:
def load_raw_evalmetrics_df():
    return pd.read_pickle(path.join('.', 'raw_evalmetrics_df.pkl'))

def prep_multivarate(df):
    """ Calculates density and removes unnecessary columns """
    new_df = df.copy()
    new_df['density'] = df.latestTotalPopulation / df.LND110210
    new_df = new_df.drop(columns=['latestTotalPopulation','fips','LND110210'])
    return new_df

def split_sample(df, percent_train=.80):
    county = set(df.county)
    train_county = sample(county, int(len(county) * percent_train))
    val_county = county - set(train_county)
    train_filter = [c in train_county for c in df.county]
    val_filter = [c in val_county for c in df.county]
    return df[train_filter].fillna(0), df[val_filter].fillna(0)

def normalize_df(df):
    new_df = df.copy()
    cols = ['confirmed_cases','confirmed_deaths', 'confirmed_recoveries','hospitalIcuBeds','hospitalStaffedBeds','hospitalLicensedBeds', 'density']
    for col in cols:
        data = new_df[col].astype('float')
        data_mean = data.mean(axis=0)
        data_std = data.std(axis=0)
        new_df[col] = (data-data_mean)/data_std
    return new_df.fillna(0)

def get_data():
    evalmetric_df = load_raw_evalmetrics_df()
    prepped_df = prep_multivarate(evalmetric_df)
    train_df, val_df = split_sample(prepped_df)
    return normalize_df(train_df), val_df



### Prep the data for RNN

In [67]:
# df = load_raw_evalmetrics_df()
# df = prep_multivarate_df_predict_death(df)
# train_df, val_df = split_sample(df)
# normalized_train_df = normalize_df(train_df)
# normalized_train_df
get_data()


(                                      county        date  confirmed_cases  \
 0       Abbeville_SouthCarolina_UnitedStates  2020-01-01        -0.067146   
 144            Acadia_Louisiana_UnitedStates  2020-01-01        -0.067146   
 288           Accomack_Virginia_UnitedStates  2020-01-01        -0.067146   
 432                   Ada_Idaho_UnitedStates  2020-01-01        -0.067146   
 576                  Adair_Iowa_UnitedStates  2020-01-01        -0.067146   
 ...                                      ...         ...              ...   
 466415          Yuba_California_UnitedStates  2020-05-23        -0.062721   
 466703             Yuma_Arizona_UnitedStates  2020-05-23         0.013137   
 466991             Zapata_Texas_UnitedStates  2020-05-23        -0.065882   
 467135             Zavala_Texas_UnitedStates  2020-05-23        -0.065408   
 467279      Ziebach_SouthDakota_UnitedStates  2020-05-23        -0.066988   
 
         confirmed_deaths  confirmed_recoveries  hospitalIcuBe

In [60]:
normalized_train_df.fillna(0)

Unnamed: 0,county,date,confirmed_cases,confirmed_deaths,confirmed_recoveries,hospitalIcuBeds,hospitalStaffedBeds,hospitalLicensedBeds,density
0,Abbeville_SouthCarolina_UnitedStates,2020-01-01,-0.066701,-0.044823,-0.029246,-0.288129,-0.340598,-0.346901,-0.207182
144,Acadia_Louisiana_UnitedStates,2020-01-01,-0.066701,-0.044823,-0.029246,-0.278693,-0.170109,-0.186308,-0.154326
288,Accomack_Virginia_UnitedStates,2020-01-01,-0.066701,-0.044823,-0.029246,-0.250386,-0.316076,-0.319969,-0.181302
576,Adair_Iowa_UnitedStates,2020-01-01,-0.066701,-0.044823,-0.029246,-0.307000,-0.340598,-0.346901,-0.251456
720,Adair_Kentucky_UnitedStates,2020-01-01,-0.066701,-0.044823,-0.029246,0.000000,0.000000,0.000000,-0.210288
...,...,...,...,...,...,...,...,...,...
466271,Young_Texas_UnitedStates,2020-05-23,-0.066066,-0.042686,-0.029246,-0.269258,-0.311405,-0.321964,-0.243024
466415,Yuba_California_UnitedStates,2020-05-23,-0.062253,-0.042686,-0.029246,-0.118287,-0.195799,-0.151397,-0.119091
466847,Yuma_Colorado_UnitedStates,2020-05-23,-0.064795,-0.044823,-0.029246,-0.288129,-0.334759,-0.341913,-0.261301
467135,Zavala_Texas_UnitedStates,2020-05-23,-0.064954,-0.044823,-0.029246,0.000000,0.000000,0.000000,-0.255520


In [55]:
data = train_df.confirmed_cases.astype('float')

data_mean = data.mean(axis=0)
data_std = data.std(axis=0)
data = (data-data_mean)/data_std
print(round(data.mean(axis=0),10))
print(round(data.std(axis=0),10))

0.0
1.0
