In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sqlalchemy import create_engine
from config import DB_NAME, DB_URL, DB_PORT, DB_NAME, USERNAME, PASSWORD

# Import Data

In [2]:
#####################################################################
### !!! This csv import to be replaced by import from DB instance !!!
### !!! For model design only !!!
#####################################################################

# Read CSV data and verify
sparse_covid_df = pd.read_csv('../data/covid/sparse_county_covid.csv',index_col=0)
dense_covid_df = pd.read_csv('../data/covid/dense_county_covid.csv',index_col=0)
weather_df = pd.read_csv('../data/weather/county_weather.csv',index_col=0)

# Transform Data

In [3]:
# Add population density and total population values
density = {
    'baltimore': 6866,
    'essex': 6168,
    'cook': 5301,
    'union': 5150,
    'norfolk': 5026,
    'nassau': 4954,
    'harrisonburg':4765,
    'fairfax':2454,
    'camden':2289,
    'harris':2700,
    'franklin':2186,
    'marion':2466,
    'dekalb':2482,
    'duval':1305,
    'wake':1377,
    'bexar':1620
}

population = {
    'baltimore': 621342,
    'essex': 755618,
    'cook': 5231351,
    'union': 543976,
    'norfolk': 245782,
    'nassau': 74629,
    'harrisonburg':50981,
    'fairfax':1118602,
    'camden':513539,
    'harris':4253700,
    'franklin':1195537,
    'marion':918977,
    'dekalb':707089,
    'duval':879602,
    'wake':952151,
    'bexar':1785704
}


In [4]:
# Add pop_density and population columns to covid data
sparse_covid_df['population'] = pd.Series(dtype=int)
sparse_covid_df['pop_density'] = pd.Series(dtype=int)

dense_covid_df['population'] = pd.Series(dtype=int)
dense_covid_df['pop_density'] = pd.Series(dtype=int)

# Set population and density columns for sparse counties
for i in range(len(sparse_covid_df.index)):
    county = sparse_covid_df.iloc[i,1]

    sparse_covid_df.iloc[i,7] = population[county]
    sparse_covid_df.iloc[i,8] = density[county]

# Set population and density columns for dense counties
for i in range(len(dense_covid_df.index)):
    county = dense_covid_df.iloc[i,1]

    dense_covid_df.iloc[i,7] = population[county]
    dense_covid_df.iloc[i,8] = density[county]

In [5]:
# Add past_delta14 columns to sparse and dense counties
sparse_covid_df['past_delta14'] = pd.Series(dtype=int)
dense_covid_df['past_delta14'] = pd.Series(dtype=int)

# Sparse county past_delta_14
for i in range(14,len(sparse_covid_df.index)):

    # Set past_delta_14
    past_cases = sparse_covid_df.iloc[i-14,3]
    present_cases = sparse_covid_df.iloc[i,3]
    delta14 = present_cases - past_cases

    sparse_covid_df.iloc[i,9] = delta14

# Dense county past delta14
for i in range(14,len(dense_covid_df.index)):

    # Set past_delta_14
    past_cases = dense_covid_df.iloc[i-14,3]
    present_cases = dense_covid_df.iloc[i,3]
    delta14 = present_cases - past_cases

    dense_covid_df.iloc[i,9] = delta14

In [6]:
# Sort weather data according to the sparse or dense county sets
sparse_counties = sparse_covid_df['county'].unique()
dense_counties = dense_covid_df['county'].unique()

# Sort weather for each set
sparse_weather_df = weather_df[weather_df['county'].isin(sparse_counties)]
dense_weather_df = weather_df[weather_df['county'].isin(dense_counties)]

In [7]:
# Merge weather data on sparse and dense covid data
sparse_weather_covid_df = sparse_covid_df.merge(sparse_weather_df,on=['date','county'])
dense_weather_covid_df = dense_covid_df.merge(dense_weather_df,on=['date','county'])

In [8]:
# Verify Sparse Data
sparse_weather_covid_df.head()

Unnamed: 0,date,county,state_x,total_cases,new_cases,future_delta7,future_delta14,population,pop_density,past_delta14,state_y,temp_mean(C),precip_sum(mm),wind_max(km/h),min_humidity(%),max_humidity(%),mean_humidity(%)
0,2020-03-07,fairfax,virginia,1,1,9.0,21.0,1118602.0,2454.0,,virginia,5.0,0.0,27.6,30,65,48.0
1,2020-03-08,fairfax,virginia,2,1,8.0,29.0,1118602.0,2454.0,,virginia,6.8,0.0,16.7,24,67,47.0
2,2020-03-09,fairfax,virginia,4,2,6.0,39.0,1118602.0,2454.0,,virginia,11.7,0.0,18.8,30,69,50.0
3,2020-03-10,fairfax,virginia,4,0,8.0,42.0,1118602.0,2454.0,,virginia,14.6,2.1,29.8,49,95,69.0
4,2020-03-11,fairfax,virginia,4,0,10.0,72.0,1118602.0,2454.0,,virginia,9.1,0.6,15.7,63,91,73.0


In [9]:
# Verify Dense Data
dense_weather_covid_df.head()

Unnamed: 0,date,county,state_x,total_cases,new_cases,future_delta7,future_delta14,population,pop_density,past_delta14,state_y,temp_mean(C),precip_sum(mm),wind_max(km/h),min_humidity(%),max_humidity(%),mean_humidity(%)
0,2020-03-11,baltimore,maryland,1,1,6.0,50.0,621342.0,6866.0,,maryland,9.1,0.2,20.9,54,88,72.0
1,2020-03-12,baltimore,maryland,1,0,12.0,80.0,621342.0,6866.0,,maryland,9.9,0.0,14.8,57,93,77.0
2,2020-03-13,baltimore,maryland,2,1,11.0,101.0,621342.0,6866.0,,maryland,14.5,7.6,24.7,34,98,69.0
3,2020-03-14,baltimore,maryland,3,1,16.0,138.0,621342.0,6866.0,,maryland,9.4,2.0,20.1,32,81,45.0
4,2020-03-15,baltimore,maryland,3,0,25.0,159.0,621342.0,6866.0,,maryland,7.1,6.9,14.7,46,92,75.0


# Run Models

In [10]:
# Set Feature categories
weather_features = ['temp_mean(C)','precip_sum(mm)','wind_max(km/h)','min_humidity(%)','max_humidity(%)','mean_humidity(%)']
county_features = ['population','pop_density']
covid_features = ['new_cases','past_delta14']
target = ['future_delta14']

## Sparse Data Model
### All Features

In [11]:
# Split data into Training and Testing Sets
# Drop rows with null data
sparse = sparse_weather_covid_df.dropna()

# Use All Features
X = sparse[weather_features + covid_features + county_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case prediction model
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("---------------- All Features -------------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

---------------- All Features -------------------
Training Score: 0.5333855574734403
Testing Score: 0.5533243754457358

----------------- Coefficients ------------------
temp_mean(C) : 1.7869059274487162
precip_sum(mm) : -8.753470078224304
wind_max(km/h) : -21.220775885799252
min_humidity(%) : 9.025257954545006
max_humidity(%) : -20.954526510954985
mean_humidity(%) : -0.8051576680887356
new_cases : 2.828587393688116
past_delta14 : 0.027130071674316508
population : 0.0018072587937380648
pop_density : 0.0504653469046505
y-intercept : 1678.444797353632


### Only Weather Features

In [12]:
# Split data into Training and Testing Sets
# Drop rows with null data
sparse = sparse_weather_covid_df.dropna()

# Use Only Weather Features
X = sparse[weather_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case 
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("-------------- Weather Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

-------------- Weather Features -----------------
Training Score: 0.053233426035620535
Testing Score: 0.05667275823849516

----------------- Coefficients ------------------
temp_mean(C) : 143.55532799658616
precip_sum(mm) : -27.443770387622713
wind_max(km/h) : 62.50516672046642
min_humidity(%) : -2.6426109849093815
max_humidity(%) : -48.46309847041625
mean_humidity(%) : 39.53675575810575
y-intercept : 817.7735508205924


### Only Covid Features

In [13]:
# Split data into Training and Testing Sets
# Drop rows with null data
sparse = sparse_weather_covid_df.dropna()

# Use Only Covid Features
X = sparse[covid_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case prediction model
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("---------------- Covid Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

---------------- Covid Features -----------------
Training Score: 0.33405435233698333
Testing Score: 0.41407684650497334

----------------- Coefficients ------------------
new_cases : 4.400591045022983
past_delta14 : 0.03694778491384128
y-intercept : 2148.4325167726474


### Only County Features

In [14]:
# Split data into Training and Testing Sets
# Drop rows with null data
sparse = sparse_weather_covid_df.dropna()

# Use County Features
X = sparse[county_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case prediction model
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("--------------- County Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

--------------- County Features -----------------
Training Score: 0.40988114934456243
Testing Score: 0.3882990381464737

----------------- Coefficients ------------------
population : 0.00235722047264775
pop_density : 0.06270177284217926
y-intercept : -332.59477153692296


### Only County / Weather Features

In [15]:
# Split data into Training and Testing Sets
# Drop rows with null data
sparse = sparse_weather_covid_df.dropna()

# Use Only County / Weather Features
X = sparse[weather_features + county_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case prediction model
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("---------- County / Weather Features ------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

---------- County / Weather Features ------------
Training Score: 0.4172084750748948
Testing Score: 0.39313946705464387

----------------- Coefficients ------------------
temp_mean(C) : 43.97160653297896
precip_sum(mm) : -1.0678015021715286
wind_max(km/h) : -13.416924579178513
min_humidity(%) : 18.158383817664518
max_humidity(%) : -28.035906009629873
mean_humidity(%) : -10.30782922363045
population : 0.002299771879762929
pop_density : 0.19810527416308546
y-intercept : 1095.816924973717


### Only Covid / Weather Features

In [16]:
# Split data into Training and Testing Sets
# Drop rows with null data
sparse = sparse_weather_covid_df.dropna()

# Use Only Covid / Weather Features
X = sparse[weather_features + covid_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case prediction model
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("---------- Covid / Weather Features -------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

---------- Covid / Weather Features -------------
Training Score: 0.3445088362364712
Testing Score: 0.4221152016643346

----------------- Coefficients ------------------
temp_mean(C) : 57.789110475195905
precip_sum(mm) : -30.11114855932623
wind_max(km/h) : 26.48964544573924
min_humidity(%) : -9.221217929147134
max_humidity(%) : -32.500022060199896
mean_humidity(%) : 36.774620310402604
new_cases : 4.307841259493756
past_delta14 : 0.031762171949477995
y-intercept : 1398.1095150841447


### Only County / Covid Features

In [17]:
# Split data into Training and Testing Sets
# Drop rows with null data
sparse = sparse_weather_covid_df.dropna()

# Use Only County / Covid Features
X = sparse[covid_features + county_features]
y = sparse[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
sparse_model = LinearRegression()

# Sparse 14-day new case 
sparse_model.fit(X_train,y_train)
training_score = sparse_model.score(X_train,y_train)
testing_score = sparse_model.score(X_test,y_test)

print("------- Sparse 14-day New Case Prediction -------")
print()
print("----------- County / Covid Features -------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{sparse_model.feature_names_in_[i]} : {sparse_model.coef_[0][i]}")
print(f"y-intercept : {sparse_model.intercept_[0]}")

------- Sparse 14-day New Case Prediction -------

----------- County / Covid Features -------------
Training Score: 0.5303413451153369
Testing Score: 0.554892954163845

----------------- Coefficients ------------------
new_cases : 2.824077058433859
past_delta14 : 0.02761279525529619
population : 0.0017869456856691162
pop_density : 0.05630334211551039
y-intercept : -111.64407453486501


## Dense Data Model
### All Features

In [18]:
# Split data into Training and Testing Sets
# Drop rows with null data
dense = dense_weather_covid_df.dropna()

# Use All features
X = dense[weather_features + covid_features + county_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print("---------------- All Features -------------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------
---------------- All Features -------------------
Training Score: 0.8519537925041163
Testing Score: 0.8600310959306668

----------------- Coefficients ------------------
temp_mean(C) : -46.160352654614904
precip_sum(mm) : -13.793491573260745
wind_max(km/h) : 33.62278147108551
min_humidity(%) : -33.0451106090321
max_humidity(%) : -46.36913179393523
mean_humidity(%) : 78.22757768242289
new_cases : 11.44455391209538
past_delta14 : 0.003843602929943127
population : 0.0005700809018952879
pop_density : -0.0652452064014548
y-intercept : 970.61248072421


### Only Weather Features

In [19]:
# Split data into Training and Testing Sets
# Drop rows with null data
dense = dense_weather_covid_df.dropna()

# Use Only Weather Features
X = dense[weather_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("-------------- Weather Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

-------------- Weather Features -----------------
Training Score: 0.1279813679600208
Testing Score: 0.1399736671399645

----------------- Coefficients ------------------
temp_mean(C) : -202.43373141036056
precip_sum(mm) : -94.19708401596651
wind_max(km/h) : 205.77857379492747
min_humidity(%) : 50.74608926295998
max_humidity(%) : -141.04860994630897
mean_humidity(%) : 63.9292697635151
y-intercept : 8564.806146006667


### Only Covid Features

In [20]:
# Split data into Training and Testing Sets
# Drop rows with null data
dense = dense_weather_covid_df.dropna()

# Use Only Covid Features
X = dense[covid_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("---------------- Covid Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

---------------- Covid Features -----------------
Training Score: 0.8350199383214991
Testing Score: 0.8536315525461682

----------------- Coefficients ------------------
new_cases : 12.63056829539692
past_delta14 : 0.0020697214307039543
y-intercept : 583.6365746581391


### Only County Features

In [21]:
# Split data into Training and Testing Sets
# Drop rows with null data
dense = dense_weather_covid_df.dropna()

# Use Only County Features
X = dense[county_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("--------------- County Features -----------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

--------------- County Features -----------------
Training Score: 0.3313553327652058
Testing Score: 0.3413832229944833

----------------- Coefficients ------------------
population : 0.002521818311684598
pop_density : -0.3993297444300957
y-intercept : 2697.0389771147484


### Only County / Weather Features

In [22]:
# Split data into Training and Testing Sets
# Drop rows with null data
dense = dense_weather_covid_df.dropna()

# Use Only County / Weather Features
X = dense[weather_features + county_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("---------- County / Weather Features ------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

---------- County / Weather Features ------------
Training Score: 0.37526815831106397
Testing Score: 0.3764447037843972

----------------- Coefficients ------------------
temp_mean(C) : -96.10413791773452
precip_sum(mm) : -42.11401821033905
wind_max(km/h) : 177.9308104449363
min_humidity(%) : -89.67552680672466
max_humidity(%) : -129.6153752546824
mean_humidity(%) : 180.49928548808427
population : 0.0023347032467475955
pop_density : -0.3756697049247753
y-intercept : 4578.639884924242


### Only Covid / Weather Features

In [23]:
# Split data into Training and Testing Sets
# Drop rows with null data
dense = dense_weather_covid_df.dropna()

# Use Only Covid / Weather Features
X = dense[weather_features + covid_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("---------- Covid / Weather Features -------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

---------- Covid / Weather Features -------------
Training Score: 0.8406009914594911
Testing Score: 0.8581097607405397

----------------- Coefficients ------------------
temp_mean(C) : -62.93607163932601
precip_sum(mm) : -21.26583262532711
wind_max(km/h) : 28.123304438410706
min_humidity(%) : -2.540061038948038
max_humidity(%) : -42.580192128910944
mean_humidity(%) : 48.84452381768421
new_cases : 12.274757315926115
past_delta14 : 0.0050612119694376885
y-intercept : 1638.3420160535347


### Only County / Covid Features

In [24]:
# Split data into Training and Testing Sets
# Drop rows with null data
dense = dense_weather_covid_df.dropna()

# Use Only County / Covid Features
X = dense[covid_features + county_features]
y = dense[target]

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

# Use linear regression model
dense_model = LinearRegression()

# Dense 14-day new case prediction model
dense_model.fit(X_train,y_train)
training_score = dense_model.score(X_train,y_train)
testing_score = dense_model.score(X_test,y_test)

# View performance, coefficients and y-intercept
print("------- Dense 14-day New Case Prediction --------")
print()
print("----------- County / Covid Features -------------")
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print()
print('----------------- Coefficients ------------------')
for i in range(0,len(X.columns)):
    print(f"{dense_model.feature_names_in_[i]} : {dense_model.coef_[0][i]}")
print(f"y-intercept : {dense_model.intercept_[0]}")


------- Dense 14-day New Case Prediction --------

----------- County / Covid Features -------------
Training Score: 0.8487196885598844
Testing Score: 0.8573535120476613

----------------- Coefficients ------------------
new_cases : 11.648398390467086
past_delta14 : 0.0015935571950835114
population : 0.0006007766341705575
pop_density : -0.08430480169813748
y-intercept : 552.7905682368864
