In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# sklearn utilities
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score, accuracy_score

# sklearn models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA

# Inputing Files

In [28]:
county_area = pd.read_csv('county_area.csv')
county_area = county_area.drop(["Areaname"], axis=1)
county_area = county_area.rename(columns={"STCOU": "fips"})
county_area

Unnamed: 0,fips,AREA
0,1001,604.49
1,1003,2027.08
2,1005,904.59
3,1007,625.50
4,1009,650.65
...,...,...
3123,56037,10491.73
3124,56039,4221.96
3125,56041,2087.66
3126,56043,2242.85


In [29]:
county_mask_use = pd.read_csv('mask_use.csv')
county_mask_use = county_mask_use.rename(columns={"COUNTYFP": "fips"})
county_mask_use

Unnamed: 0,fips,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,1001,0.053,0.074,0.134,0.295,0.444
1,1003,0.083,0.059,0.098,0.323,0.436
2,1005,0.067,0.121,0.120,0.201,0.491
3,1007,0.020,0.034,0.096,0.278,0.572
4,1009,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...
3123,56037,0.061,0.295,0.230,0.146,0.268
3124,56039,0.095,0.157,0.160,0.247,0.340
3125,56041,0.098,0.278,0.154,0.207,0.264
3126,56043,0.204,0.155,0.069,0.285,0.287


In [23]:
county_cases = pd.read_csv('county_cases.csv')
county_cases = county_cases.drop(["county","state"], axis=1)
county_cases

Unnamed: 0,fips,cases,deaths
0,1001,2059,31
1,1003,6658,69
2,1005,1033,9
3,1007,840,14
4,1009,1932,25
...,...,...,...
3123,56037,462,2
3124,56039,747,1
3125,56041,431,3
3126,56043,144,7


In [30]:
county_pop = pd.read_csv('county_pop.csv', encoding='latin-1')
county_pop = county_pop.rename(columns={"COUNTY": "fips"})
county_pop

Unnamed: 0,fips,STNAME,CTYNAME,POPESTIMATE2019
0,1001,Alabama,Autauga,55869
1,1003,Alabama,Baldwin,223234
2,1005,Alabama,Barbour,24686
3,1007,Alabama,Bibb,22394
4,1009,Alabama,Blount,57826
...,...,...,...,...
3123,56037,Wyoming,Sweetwater,42343
3124,56039,Wyoming,Teton,23464
3125,56041,Wyoming,Uinta,20226
3126,56043,Wyoming,Washakie,7805


## Merging Dataframes Togethor

In [32]:
df = county_pop.merge(county_cases, how='left', on="fips")
df = df.merge(county_mask_use, how='left', on="fips")
df = df.merge(county_area, how="left", on="fips")
df

Unnamed: 0,fips,STNAME,CTYNAME,POPESTIMATE2019,cases,deaths,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS,AREA
0,1001,Alabama,Autauga,55869,2059,31,0.053,0.074,0.134,0.295,0.444,604.49
1,1003,Alabama,Baldwin,223234,6658,69,0.083,0.059,0.098,0.323,0.436,2027.08
2,1005,Alabama,Barbour,24686,1033,9,0.067,0.121,0.120,0.201,0.491,904.59
3,1007,Alabama,Bibb,22394,840,14,0.020,0.034,0.096,0.278,0.572,625.50
4,1009,Alabama,Blount,57826,1932,25,0.053,0.114,0.180,0.194,0.459,650.65
...,...,...,...,...,...,...,...,...,...,...,...,...
3123,56037,Wyoming,Sweetwater,42343,462,2,0.061,0.295,0.230,0.146,0.268,10491.73
3124,56039,Wyoming,Teton,23464,747,1,0.095,0.157,0.160,0.247,0.340,4221.96
3125,56041,Wyoming,Uinta,20226,431,3,0.098,0.278,0.154,0.207,0.264,2087.66
3126,56043,Wyoming,Washakie,7805,144,7,0.204,0.155,0.069,0.285,0.287,2242.85
