In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sqlalchemy import create_engine
from config import DB_NAME, DB_URL, DB_PORT, DB_NAME, USERNAME, PASSWORD

# Import Data

In [2]:
engine_string = "postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}".format(
    user = USERNAME,
    password = PASSWORD,
    host = DB_URL,
    port = DB_PORT,
    database = DB_NAME
)

engine = create_engine(engine_string)

In [3]:
# Pull tables from database.
# !!! Note read_sql_table() requires sqlalchemy v1.4
sparse_covid_df = pd.read_sql_table('sparse_county_covid',engine).drop(columns=['index'])
dense_covid_df = pd.read_sql_table('dense_county_covid',engine).drop(columns=['index'])
weather_df = pd.read_sql_table('county_weather',engine).drop(columns=['index'])
county_df = pd.read_sql_table('county_pop',engine)

In [4]:
#####################################################################
### !!! To be removed once database is updated
#####################################################################
# Drop harrisonburg county
dense_covid_df = dense_covid_df[dense_covid_df['county'] != 'harrisonburg']

# Transform Data

In [5]:
# Add past_delta14 columns to sparse and dense counties
sparse_covid_df['past_delta14'] = pd.Series(dtype=int)
dense_covid_df['past_delta14'] = pd.Series(dtype=int)

# Sparse county past_delta_14
for i in range(14,len(sparse_covid_df.index)):

    # Set past_delta_14
    past_cases = sparse_covid_df.iloc[i-14,3]
    present_cases = sparse_covid_df.iloc[i,3]
    delta14 = present_cases - past_cases

    sparse_covid_df.iloc[i,7] = delta14

# Dense county past delta14
for i in range(14,len(dense_covid_df.index)):

    # Set past_delta_14
    past_cases = dense_covid_df.iloc[i-14,3]
    present_cases = dense_covid_df.iloc[i,3]
    delta14 = present_cases - past_cases

    dense_covid_df.iloc[i,7] = delta14

In [6]:
# Sort weather data according to the sparse or dense county sets
sparse_counties = sparse_covid_df['county'].unique()
dense_counties = dense_covid_df['county'].unique()

# Sort weather for each set
sparse_weather_df = weather_df[weather_df['county'].isin(sparse_counties)]
dense_weather_df = weather_df[weather_df['county'].isin(dense_counties)]

# Sort county data for each set
sparse_county_df = county_df[county_df['county'].isin(sparse_counties)]
dense_county_df = county_df[county_df['county'].isin(dense_counties)]

In [7]:
# Merge weather data on sparse and dense data
sparse_weather_covid_df = sparse_covid_df.merge(sparse_weather_df,on=['date','county'])
dense_weather_covid_df = dense_covid_df.merge(dense_weather_df,on=['date','county'])

In [8]:
# Verify Sparse Data
sparse_weather_covid_df.head()

Unnamed: 0,date,county,state_x,total_cases,new_cases,future_delta7,future_delta14,past_delta14,state_y,temp_mean(c),precip_sum(mm),wind_max(km/h),min_humidity(%),max_humidity(%),mean_humidity(%)
0,2020-03-07,fairfax,virginia,1,1,9.0,21.0,,virginia,5.0,0.0,27.6,30.0,65.0,48.0
1,2020-03-08,fairfax,virginia,2,1,8.0,29.0,,virginia,6.8,0.0,16.7,24.0,67.0,47.0
2,2020-03-09,fairfax,virginia,4,2,6.0,39.0,,virginia,11.7,0.0,18.8,30.0,69.0,50.0
3,2020-03-10,fairfax,virginia,4,0,8.0,42.0,,virginia,14.6,2.1,29.8,49.0,95.0,69.0
4,2020-03-11,fairfax,virginia,4,0,10.0,72.0,,virginia,9.1,0.6,15.7,63.0,91.0,73.0


In [9]:
# Verify Dense Data
dense_weather_covid_df.head()

Unnamed: 0,date,county,state_x,total_cases,new_cases,future_delta7,future_delta14,past_delta14,state_y,temp_mean(c),precip_sum(mm),wind_max(km/h),min_humidity(%),max_humidity(%),mean_humidity(%)
0,2020-03-11,baltimore,maryland,1,1,6.0,50.0,,maryland,9.1,0.2,20.9,54.0,88.0,72.0
1,2020-03-12,baltimore,maryland,1,0,12.0,80.0,,maryland,9.9,0.0,14.8,57.0,93.0,77.0
2,2020-03-13,baltimore,maryland,2,1,11.0,101.0,,maryland,14.5,7.6,24.7,34.0,98.0,69.0
3,2020-03-14,baltimore,maryland,3,1,16.0,138.0,,maryland,9.4,2.0,20.1,32.0,81.0,45.0
4,2020-03-15,baltimore,maryland,3,0,25.0,159.0,,maryland,7.1,6.9,14.7,46.0,92.0,75.0


In [10]:
# Merge county data on sparse and dense data
sparse_weather_covid_county_df = sparse_weather_covid_df.merge(county_df,on=['county'])
dense_weather_covid_county_df = dense_weather_covid_df.merge(county_df,on=['county'])

In [11]:
# Set rows to keep
# Set Feature Categories
weather_features = ['temp_mean(c)','precip_sum(mm)','wind_max(km/h)','min_humidity(%)','max_humidity(%)','mean_humidity(%)']
county_features = ['total_pop','pop_dens(/sqmi)']
covid_features = ['new_cases','past_delta14']
target = ['future_delta14']

In [12]:
# Select only those columns to keep
sparse_weather_covid_county_df = sparse_weather_covid_county_df[target + weather_features + county_features + covid_features]
dense_weather_covid_county_df = dense_weather_covid_county_df[target + weather_features + county_features + covid_features]

In [13]:
# Ensure columns are of string type
sparse_weather_covid_county_df.columns = sparse_weather_covid_county_df.columns.astype(str)
dense_weather_covid_county_df.columns = dense_weather_covid_county_df.columns.astype(str)

In [14]:
# Drop rows with null data
sparse = sparse_weather_covid_county_df.dropna()
dense = dense_weather_covid_county_df.dropna()

In [15]:
# Verify Sparse Data
sparse_weather_covid_county_df.head()

Unnamed: 0,future_delta14,temp_mean(c),precip_sum(mm),wind_max(km/h),min_humidity(%),max_humidity(%),mean_humidity(%),total_pop,pop_dens(/sqmi),new_cases,past_delta14
0,21.0,5.0,0.0,27.6,30.0,65.0,48.0,1143529,1129,1,
1,29.0,6.8,0.0,16.7,24.0,67.0,47.0,1143529,1129,1,
2,39.0,11.7,0.0,18.8,30.0,69.0,50.0,1143529,1129,2,
3,42.0,14.6,2.1,29.8,49.0,95.0,69.0,1143529,1129,0,
4,72.0,9.1,0.6,15.7,63.0,91.0,73.0,1143529,1129,0,


In [16]:
# Verify Dense Data
dense_weather_covid_county_df.head()

Unnamed: 0,future_delta14,temp_mean(c),precip_sum(mm),wind_max(km/h),min_humidity(%),max_humidity(%),mean_humidity(%),total_pop,pop_dens(/sqmi),new_cases,past_delta14
0,50.0,9.1,0.2,20.9,54.0,88.0,72.0,614700,2932,1,
1,80.0,9.9,0.0,14.8,57.0,93.0,77.0,614700,2932,0,
2,101.0,14.5,7.6,24.7,34.0,98.0,69.0,614700,2932,1,
3,138.0,9.4,2.0,20.1,32.0,81.0,45.0,614700,2932,1,
4,159.0,7.1,6.9,14.7,46.0,92.0,75.0,614700,2932,0,


In [17]:
print(f"Sparse County Rows: {len(sparse_weather_covid_county_df.index)}")
print(f"Dense County Rows: {len(dense_weather_covid_county_df.index)}")

Sparse County Rows: 2484
Dense County Rows: 1665


# Run Models

## Sparse Data Model
### All Features

In [18]:
# Split data into Training and Testing Sets

# Use All Features
X = sparse[weather_features + covid_features + county_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case prediction model
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("---------------- All Features -------------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

---------------- All Features -------------------
Training Score: 0.5337930881238495
Testing Score: 0.5534654648393535

----------------- Coefficients ------------------
temp_mean(c) : 0.06807328603708902
precip_sum(mm) : -8.334512673381317
wind_max(km/h) : -21.06507377306915
min_humidity(%) : 7.101118832027623
max_humidity(%) : -21.920519491363073
mean_humidity(%) : 1.9004357321938128
new_cases : 2.8269759543349906
past_delta14 : 0.027736202161525187
total_pop : 0.0016368991453624242
pop_dens(/sqmi) : -0.28401780053805276
y-intercept : 2245.5458348968086


### Only Weather Features

In [19]:
# Split data into Training and Testing Sets

# Use Only Weather Features
X = sparse[weather_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case 
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("-------------- Weather Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

-------------- Weather Features -----------------
Training Score: 0.053233426035620535
Testing Score: 0.05667275823849516

----------------- Coefficients ------------------
temp_mean(c) : 143.55532799658616
precip_sum(mm) : -27.443770387622713
wind_max(km/h) : 62.50516672046642
min_humidity(%) : -2.6426109849093815
max_humidity(%) : -48.46309847041625
mean_humidity(%) : 39.53675575810575
y-intercept : 817.7735508205924


### Only Covid Features

In [20]:
# Split data into Training and Testing Sets

# Use Only Covid Features
X = sparse[covid_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case prediction model
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("---------------- Covid Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

---------------- Covid Features -----------------
Training Score: 0.33405661628074945
Testing Score: 0.41414789157162235

----------------- Coefficients ------------------
new_cases : 4.400572663571119
past_delta14 : 0.036949366969927566
y-intercept : 2148.452629155905


### Only County Features

In [21]:
# Split data into Training and Testing Sets

# Use County Features
X = sparse[county_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case prediction model
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("--------------- County Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

--------------- County Features -----------------
Training Score: 0.40980470943809233
Testing Score: 0.38856731917108456

----------------- Coefficients ------------------
total_pop : 0.0021467062104496637
pop_dens(/sqmi) : -0.22164911467450482
y-intercept : 170.1751096252351


### Only County / Weather Features

In [22]:
# Split data into Training and Testing Sets

# Use Only County / Weather Features
X = sparse[weather_features + county_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case prediction model
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("---------- County / Weather Features ------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

---------- County / Weather Features ------------
Training Score: 0.41651945931639556
Testing Score: 0.3926342642500774

----------------- Coefficients ------------------
temp_mean(c) : 39.08330587500373
precip_sum(mm) : -0.20935980578658556
wind_max(km/h) : -14.219504932750578
min_humidity(%) : 16.181239633015487
max_humidity(%) : -28.845289460354355
mean_humidity(%) : -7.948548921163592
total_pop : 0.002119186276502852
pop_dens(/sqmi) : -0.21718175575029056
y-intercept : 1953.0739977333121


### Only Covid / Weather Features

In [23]:
# Split data into Training and Testing Sets

# Use Only Covid / Weather Features
X = sparse[weather_features + covid_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case prediction model
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("---------- Covid / Weather Features -------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

---------- Covid / Weather Features -------------
Training Score: 0.3445099731572373
Testing Score: 0.4221252207183508

----------------- Coefficients ------------------
temp_mean(c) : 57.786219985685015
precip_sum(mm) : -30.11079372695609
wind_max(km/h) : 26.491344801274934
min_humidity(%) : -9.218454339999685
max_humidity(%) : -32.49547939917074
mean_humidity(%) : 36.769307698915135
new_cases : 4.307830660165947
past_delta14 : 0.031763466965416876
y-intercept : 1397.987654715214


### Only County / Covid Features

In [24]:
# Split data into Training and Testing Sets

# Use Only County / Covid Features
X = sparse[covid_features + county_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case 
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("----------- County / Covid Features -------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

----------- County / Covid Features -------------
Training Score: 0.5308753534993098
Testing Score: 0.5552996724808096

----------------- Coefficients ------------------
new_cases : 2.821145649527326
past_delta14 : 0.028133989393608054
total_pop : 0.0016139738709402202
pop_dens(/sqmi) : -0.32679517226466065
y-intercept : 503.09950354241846


## Dense Data Model
### All Features

In [25]:
# Split data into Training and Testing Sets

# Use All features
X = dense[weather_features + covid_features + county_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print("---------------- All Features -------------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------
---------------- All Features -------------------
Training Score: 0.8419322291651712
Testing Score: 0.8786911179285752

----------------- Coefficients ------------------
temp_mean(c) : -50.975321991148434
precip_sum(mm) : -17.019859341484334
wind_max(km/h) : 35.24487261573176
min_humidity(%) : -56.37402482010014
max_humidity(%) : -67.96644748306957
mean_humidity(%) : 117.32166080426897
new_cases : 11.624866651611374
past_delta14 : 0.0033197996094041916
total_pop : 0.0005725588582330512
pop_dens(/sqmi) : 0.1961636158301347
y-intercept : 446.51211754122505


### Only Weather Features

In [26]:
# Split data into Training and Testing Sets

# Use Only Weather Features
X = dense[weather_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("-------------- Weather Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

-------------- Weather Features -----------------
Training Score: 0.12546983980134208
Testing Score: 0.11523621918497562

----------------- Coefficients ------------------
temp_mean(c) : -255.59955645680256
precip_sum(mm) : -88.03132946729707
wind_max(km/h) : 165.07580933902227
min_humidity(%) : 28.49008219175401
max_humidity(%) : -194.7587184553073
mean_humidity(%) : 114.37480578345526
y-intercept : 12754.986876671534


### Only Covid Features

In [27]:
# Split data into Training and Testing Sets

# Use Only Covid Features
X = dense[covid_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("---------------- Covid Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

---------------- Covid Features -----------------
Training Score: 0.8265027707750646
Testing Score: 0.868171802107904

----------------- Coefficients ------------------
new_cases : 12.752543932685171
past_delta14 : 0.002334921297737128
y-intercept : 651.6217496708928


### Only County Features

In [28]:
# Split data into Training and Testing Sets

# Use Only County Features
X = dense[county_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("--------------- County Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

--------------- County Features -----------------
Training Score: 0.33010078487171335
Testing Score: 0.32885767148258205

----------------- Coefficients ------------------
total_pop : 0.002750762127948149
pop_dens(/sqmi) : 0.40296150566147054
y-intercept : -1182.584527434391


### Only County / Weather Features

In [29]:
# Split data into Training and Testing Sets

# Use Only County / Weather Features
X = dense[weather_features + county_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("---------- County / Weather Features ------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

---------- County / Weather Features ------------
Training Score: 0.3645004623974457
Testing Score: 0.36541143690113176

----------------- Coefficients ------------------
temp_mean(c) : -112.58476903442364
precip_sum(mm) : -22.44158042350348
wind_max(km/h) : 128.32546903524775
min_humidity(%) : -133.16263688696728
max_humidity(%) : -178.90078658304577
mean_humidity(%) : 248.03937497521628
total_pop : 0.00256024746459854
pop_dens(/sqmi) : 0.5503178805165173
y-intercept : 3473.9861710443465


### Only Covid / Weather Features

In [30]:
# Split data into Training and Testing Sets

# Use Only Covid / Weather Features
X = dense[weather_features + covid_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("---------- Covid / Weather Features -------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

---------- Covid / Weather Features -------------
Training Score: 0.832760687147398
Testing Score: 0.8710100691042756

----------------- Coefficients ------------------
temp_mean(c) : -72.68630716901536
precip_sum(mm) : -27.450302158560735
wind_max(km/h) : 34.60208145354717
min_humidity(%) : -24.344137907644075
max_humidity(%) : -63.16261077787307
mean_humidity(%) : 86.14697125793734
new_cases : 12.374542590609337
past_delta14 : 0.005280364072631712
y-intercept : 2051.6208425198583


### Only County / Covid Features

In [31]:
# Split data into Training and Testing Sets

# Use Only County / Covid Features
X = dense[covid_features + county_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("----------- County / Covid Features -------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

----------- County / Covid Features -------------
Training Score: 0.8382434888935428
Testing Score: 0.8787693620711217

----------------- Coefficients ------------------
new_cases : 11.81515591283089
past_delta14 : 0.0010304197895450158
total_pop : 0.0006132714457706965
pop_dens(/sqmi) : 0.15488534004541632
y-intercept : -412.0330313840068
