In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import model_selection
from sklearn import metrics
from sklearn.preprocessing import StandardScaler, normalize
import sys
import sklearn.gaussian_process as gp
import matplotlib.pyplot as plt

In [2]:
def getData(fips):
    '''
    # Cleaning and retrieving symptoms data
    symG = pd.read_csv('covidsymptoms/covidsymptoms_google.csv')
    symG = symG.loc[symG['fips'] == float(fips)]
    cols = [0,2,3]
    symG = symG.drop(symG.columns[cols], axis=1)
    
    # Cleaning and retrieving symptoms data
    symFB = pd.read_csv('covidsymptoms/covidsymptoms_fb.csv')
    symFB = symFB.loc[symFB['geo_value'] == float(fips)]
    cols = [0,2,3]
    symFB = symFB.drop(symFB.columns[cols], axis=1)
    
    # Cleaning and retrieving testing rate data
    tes = pd.read_csv('testingrates/testing_rates_county.csv')
    tes = tes.loc[tes['fips'] == float(fips)]
    cols = [0]
    tes = tes.drop(tes.columns[cols], axis=1)
    
    # Cleaning and retrieving demographics data
    dem = pd.read_csv('countydemographics/county_demographics.csv')
    dem = dem.loc[dem['fips'] == float(fips)]
    cols = [0,2,3,4,5,6,8,9,10,11]
    dem = dem.drop(dem.columns[cols], axis=1)
    med_age = dem['median_age'].values[0]
    pop_den = dem['Density per square mile of land area'].values[0]
    '''
    # Cleaning and retrieving mobility data
    mob = pd.read_csv('applemobilitydata/applemobilitycleaned_county.csv')
    mob = mob.loc[mob['fips'] == float(fips)]
    cols = [0,2,3]
    mob = mob.drop(mob.columns[cols], axis=1)
    
    # Cleaning and retrieving GHT data
    ght = pd.read_csv('covidsymptoms/googlehealthtrends.csv')
    ght = ght.loc[ght['fips'] == float(fips)]
    cols = [0,2,3]
    ght = ght.drop(ght.columns[cols], axis=1)
    
    # Cleaning and retrieving case data
    cas = pd.read_csv('covidstats/confirmed_cases_county.csv')
    cas = cas.loc[cas['fips'] == float(fips)]
    cols = [0]
    cas = cas.drop(cas.columns[cols], axis=1)
    
    if mob.shape[0] == 0 or ght.shape[0] == 0 or cas.shape[0] == 0:
        print('Missing one or more data for county, please use another county.')
        return None

    return mob, ght, cas

In [3]:
def prepareData(mob, ght, cas):
    
    newX = pd.concat([mob, ght])
    newX = newX.reset_index()
    newX = newX.drop(newX.columns[[0,1]], axis=1)
    
    newY = pd.concat([cas])
    newY = newY.reset_index()
    newY = newY.drop(newY.columns[[0,1]], axis=1)
    
    newX = newX.T
    newY = newY.T
    newX = newX.fillna(0)
    newY = newY.fillna(0)
    
    X = newX[[0,1]]
    y = newY[[0]]

    # Moving averages

    X[0] = X[0].rolling(window=7).mean().dropna()
    X[0] = X[0].shift(4).dropna()
    X[1] = X[1].shift(12).dropna()
    
    #y = y.rolling(window=7).mean().dropna()
    
    X = X[X.index.isin(y.index)]
    y = y[y.index.isin(X.index)]
    X = X.fillna(0)
    y = y.fillna(0)

    scaleX = StandardScaler().fit(X)
    scaley = StandardScaler().fit(y)
    X = scaleX.transform(X)
    y = scaley.transform(y)
    
    return X, y, scaleX, scaley

In [None]:
def plot(fips, county):
    X, y, scaleX, scaley = prepareData(*getData(fips))

    kernel = gp.kernels.ConstantKernel(1.0, (1e-1, 1e3)) * gp.kernels.RBF(10.0, (1e-3, 1e3))
    regressor = gp.GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, alpha=0.1, normalize_y=False)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)
    model = regressor.fit(X_train, y_train)
    
    y_pred = regressor.predict(X_train)
    y_pred = scaley.inverse_transform(y_pred)
    y_train = scaley.inverse_transform(y_train)
    y_pred = y_pred.clip(min=0)

    fig = plt.figure() # figsize=(10,10)
    ax = fig.add_subplot(111)
    ax.plot(y_train.flatten(), label='Actual')
    ax.plot(y_pred.flatten(), label='Predicted')
    ax.set_title(f'{county} [TRAINING DATA]')
    plt.legend()
    plt.xlabel("2020-01-22 to 2020-04-19")
    plt.ylabel("Number of daily cases")
    
    y_pred = regressor.predict(X_test)
    y_pred = scaley.inverse_transform(y_pred)
    y_test = scaley.inverse_transform(y_test)
    y_pred = y_pred.clip(min=0)

    fig = plt.figure() # figsize=(10,10)
    ax = fig.add_subplot(111)
    ax.plot(y_test.flatten(), label='Actual')
    ax.plot(y_pred.flatten(), label='Predicted')
    ax.set_title(f'{county} [TESTING DATA]')
    plt.legend()
    plt.xlabel("2020-04-20 to 2020-05-27")
    plt.ylabel("Number of cases")

In [None]:
plot('36061', 'New York County')

In [None]:
plot('22071', 'Orleans, Louisiana')

In [None]:
plot('36029', 'Erie, New York')

In [4]:
mappings = {}
def mapping(fips):
    try:
        passed = getData(fips)
    except:
        return None
    
    if not passed:
        return None
    
    X, y, scaleX, scaley = prepareData(*getData(fips))

    kernel = gp.kernels.ConstantKernel(1.0, (1e-1, 1e3)) * gp.kernels.RBF(10.0, (1e-3, 1e3))
    regressor = gp.GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, alpha=0.1, normalize_y=False)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=False)
    model = regressor.fit(X_train, y_train)
    
    y_pred = regressor.predict(X_test)
    y_pred = scaley.inverse_transform(y_pred)
    y_test = scaley.inverse_transform(y_test)
    y_pred = y_pred.clip(min=0)
    
    gradient = float((y_pred[-1]-y_pred[0])/(len(y_pred)))
    mappings.update({fips:gradient})

In [5]:
fips = pd.read_csv('county_fips_codes.csv')['fips']
for fip in fips:
    mapping(str(fip))

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, ple

Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.
Missing one or more data for county, please use another county.


In [6]:
mappings

{'1001': 0.11122933474238608,
 '1003': 0.08670510333451796,
 '1005': 0.07874015748031496,
 '1007': 0.043355544204081324,
 '1009': 5.97069382035204e-06,
 '1013': 0.21470561964173315,
 '1015': 1.0400826191175456e-05,
 '1017': 0.01935118661285655,
 '1019': 5.260879564645143e-05,
 '1021': 0.0,
 '1025': 0.07694521134636688,
 '1027': 0.0,
 '1029': 0.0,
 '1031': 0.10500889216507482,
 '1033': 1.9605414611015923e-05,
 '1035': 0.010947505511996273,
 '1039': 0.0,
 '1043': 6.542950455195723e-07,
 '1045': 0.056763167943813295,
 '1047': -0.04593933904622013,
 '1049': 0.07669504606837074,
 '1051': 0.0,
 '1053': 1.0120109878313928e-14,
 '1055': 0.00018554409189500771,
 '1057': 0.008819230812528108,
 '1059': 0.3022410660205936,
 '1061': 0.010243266726203657,
 '1063': 5.520434729911782e-05,
 '1065': -0.007400191443324642,
 '1067': 0.037256337175933615,
 '1069': 0.0671993703741561,
 '1071': 0.00419991960006686,
 '1073': 1.0133252574197456,
 '1075': 0.005974860336149976,
 '1077': 0.048533568569728694,
 '1

In [78]:
df = pd.DataFrame(mappings.items())

In [79]:
def tanhFunc(x):
  return np.tanh(float(x.values[1]))

In [80]:
df[1] = df.apply(tanhFunc, axis=1)
df.columns = ['fips', 'mapping']

In [82]:
df.to_csv('county_prediction_mapping.csv')