In [None]:
# Import dependencies
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#Load dataset
usa = pd.read_csv('enigma_Aug_8_2020.csv')
usa.head()

In [None]:
# Remove not needed columns
usa = usa.drop(columns = ['state_fips', 'lat', 'long'])
usa.head()

In [None]:
# Convert dates to datetime
usa['datetime'] = pd.to_datetime(usa['date'])
usa.tail()

###  West North Central Divsion 4 (Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, and South Dakota)

In [None]:
# Creat DF for just West North Central
west_northern_central =  usa[(usa['state_name']=='Iowa') | (usa['state_name'] == 'Kansas') |
                      (usa['state_name'] == 'Minnesota') | (usa['state_name'] == 'Missouri') | 
                     (usa['state_name']=='North Dakota') | (usa['state_name']=='South Dakota')] 

west_northern_central.head()

In [None]:
# Summing case count for Western Northern Central
# Groupby to consolidate cases by entire region

wnc_group = west_northern_central.groupby(["datetime"]).sum()
wnc_group.tail()

In [None]:
# Sort by datetime and reset index
wnc_group.sort_values(by = 'datetime')
wnc_group = wnc_group.reset_index()
wnc_group.tail()

In [None]:
# Finish datetime conversion
wnc_group['datetime'] = pd.to_datetime(wnc_group['datetime'])
wnc_group['day'] = wnc_group['datetime'].map(lambda x: x.day)
wnc_group['month'] = wnc_group['datetime'].map(lambda x: x.month)
wnc_group['year'] = wnc_group['datetime'].map(lambda x: x.year)

# Now create new column for growth rate
wnc_group['growth_rate'] = wnc_group['cases'].pct_change()
wnc_group.tail()

In [None]:
# Look at the stats
wnc_group['growth_rate'].describe()

In [None]:
wnc_group.isnull().sum()

In [None]:
wnc_group = wnc_group.dropna()

In [None]:
# Linear Regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
wnc_model = LinearRegression()

In [None]:
X_wnc = wnc_group[["day", "month", "year"]]
y_wnc = wnc_group[["cases", "growth_rate"]]

In [None]:
X_train_wnc, X_test_wnc, y_train_wnc, y_test_wnc = train_test_split(X_wnc, y_wnc, random_state=42) 

In [None]:
wnc_model.fit(X_train_wnc, y_train_wnc)
wnc_model.score(X_test_wnc, y_test_wnc)

In [None]:
import numpy as np
wnc_model.predict(np.array([4,11,2020]).reshape(1,-1))

### Visualize Growth Rate and Cases

In [None]:
# Scatter plot for growth rate

plt.figure(figsize = (15, 6))
plt.scatter(wnc_group['datetime'],wnc_group['growth_rate'],c='m',)
plt.grid(True)
plt.title("West Northern Central Division Growth Rate", fontsize = 20)
plt.ylabel('Growth Rate', fontsize = 20)
plt.show()

In [None]:
# Scatter plot for cases

plt.figure(figsize = (15, 6))
plt.scatter(wnc_group['datetime'],wnc_group['cases'],c='m',)
plt.grid(True)
plt.title('West Northern Central Division Cases Over Time', fontsize = 20)
plt.ylabel('Cases', fontsize = 20)
plt.show()

### South Division 5: South Atlantic (Delaware, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, District of Columbia, and West Virginia)

In [None]:
# Make Df for just South Atlantic divsion.

south_atlantic = usa[(usa['state_name']=='Delaware') | (usa['state_name'] == 'Flordia') |
                      (usa['state_name'] == 'Georgia') | (usa['state_name'] == 'Maryland') | 
                     (usa['state_name']=='North Carolina') | (usa['state_name']=='South Carolina') |
                     (usa['state_name']=='Virginia') | (usa['state_name']== 'District of Columbia') |
                    (usa['state_name'] == 'West Virginia')]
south_atlantic.head()

In [None]:
# Summing case count for South Atlantic
# Groupby to consolidate cases by entire region

sa_group = south_atlantic.groupby(["datetime"]).sum()
sa_group.tail()

In [None]:
# Sort by datetime and reset index
sa_group.sort_values(by = 'datetime')
sa_group = sa_group.reset_index()
sa_group.tail()

In [None]:
# Finish datetime conversion
sa_group['datetime'] = pd.to_datetime(sa_group['datetime'])
sa_group['day'] = sa_group['datetime'].map(lambda x: x.day)
sa_group['month'] = sa_group['datetime'].map(lambda x: x.month)
sa_group['year'] = sa_group['datetime'].map(lambda x: x.year)

# Now create new column for growth rate
sa_group['growth_rate'] = sa_group['cases'].pct_change()
sa_group.tail()

In [None]:
# Look at the stats
sa_group['growth_rate'].describe()

### Visualize Growth Rate and Cases

In [None]:
# Scatter plot for growth rate

plt.figure(figsize = (15, 6))
plt.scatter(sa_group['datetime'],sa_group['growth_rate'],c='c',)
plt.grid(True)
plt.title("South Atlantic Division Growth Rate", fontsize = 20)
plt.ylabel('Growth Rate', fontsize = 20)
plt.show()

In [None]:
# Scatter plot for cases

plt.figure(figsize = (15, 6))
plt.scatter(sa_group['datetime'],sa_group['cases'],c='c',)
plt.grid(True)
plt.title('South Atlantic Division Cases Over Time', fontsize = 20)
plt.ylabel('Cases', fontsize = 20)
plt.show()

### South Division 6: East South Central (Alabama, Kentucky, Mississippi, and Tennessee)

In [None]:
# Create DF for just East South Central Divsion
east_south_central =  usa[(usa['state_name']=='Alabama') | (usa['state_name'] == 'Kentucky') |
                      (usa['state_name'] == 'Mississippi') | (usa['state_name'] == 'Tennessee')] 

east_south_central.head()

In [None]:
# Summing case count for East South Central
# Groupby to consolidate cases by entire region

esc_group = east_south_central.groupby(["datetime"]).sum()
esc_group.tail()

In [None]:
# Sort by datetime and reset index
esc_group.sort_values(by = 'datetime')
esc_group = esc_group.reset_index()
esc_group.tail()

In [None]:
# Finish datetime conversion
esc_group['datetime'] = pd.to_datetime(esc_group['datetime'])
esc_group['day'] = esc_group['datetime'].map(lambda x: x.day)
esc_group['month'] = esc_group['datetime'].map(lambda x: x.month)
esc_group['year'] = esc_group['datetime'].map(lambda x: x.year)

# Now create new column for growth rate
esc_group['growth_rate'] = esc_group['cases'].pct_change()
esc_group.tail()

In [None]:
# Look at the stats
esc_group['growth_rate'].describe()

### Visualize Growth Rate and Cases

In [None]:
# Scatter plot for growth rate

plt.figure(figsize = (15, 6))
plt.scatter(esc_group['datetime'],esc_group['growth_rate'],c='b',)
plt.grid(True)
plt.title("East South Central Division Growth Rate", fontsize = 20)
plt.ylabel('Growth Rate', fontsize = 20)
plt.show()

In [None]:
# Scatter plot for cases

plt.figure(figsize = (15, 6))
plt.scatter(esc_group['datetime'],esc_group['cases'],c='b')
plt.grid(True)
plt.title('East South Central Division Cases Over Time', fontsize = 20)
plt.ylabel('Cases', fontsize = 20)
plt.show()

### South Division 7: West South Central (Arkansas, Louisiana, Oklahoma, and Texas)

In [None]:
west_south_central =  usa[(usa['state_name']=='Arkansas') | (usa['state_name'] == 'Louisiana') |
                      (usa['state_name'] == 'Oklahoma') | (usa['state_name'] == 'Texas') ] 

west_south_central.head()

In [None]:
# Summing case count for West South Central
# Groupby to consolidate cases by entire region

wsc_group = west_south_central.groupby(["datetime"]).sum()
wsc_group.tail()

In [None]:
# Sort by datetime and reset index
wsc_group.sort_values(by = 'datetime')
wsc_group = wsc_group.reset_index()
wsc_group.tail()

In [None]:
# Finish datetime conversion
wsc_group['datetime'] = pd.to_datetime(wsc_group['datetime'])
wsc_group['day'] = wsc_group['datetime'].map(lambda x: x.day)
wsc_group['month'] = wsc_group['datetime'].map(lambda x: x.month)
wsc_group['year'] = wsc_group['datetime'].map(lambda x: x.year)

# Now create new column for growth rate
wsc_group['growth_rate'] = wsc_group['cases'].pct_change()
wsc_group.tail()

In [None]:
# Look at the stats
wsc_group['growth_rate'].describe()

### Visualize Growth Rate and Cases

In [None]:
# Scatter plot for growth rate

plt.figure(figsize = (15, 6))
plt.scatter(wsc_group['datetime'],wsc_group['growth_rate'],c='xkcd:magenta',)
plt.grid(True)
plt.title("West South Central Division Growth Rate", fontsize = 20)
plt.ylabel('Growth Rate', fontsize = 20)
plt.show()

In [None]:
# Scatter plot for cases

plt.figure(figsize = (15, 6))
plt.scatter(wsc_group['datetime'],wsc_group['cases'],c='xkcd:magenta')
plt.grid(True)
plt.title('West South Central Division Cases Over Time', fontsize = 20)
plt.ylabel('Cases', fontsize = 20)
plt.show()

### West Mountain Divsion 8 (Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, and Wyoming)

In [None]:
# Create a DF for just Mountain divsion
mountain =  usa[(usa['state_name']=='Arizona') | (usa['state_name'] == 'Colorado') |
                      (usa['state_name'] == 'Idaho') | (usa['state_name'] == 'Montana') | 
                     (usa['state_name']=='Nevada') | (usa['state_name']=='New Mexico') |
                     (usa['state_name']=='Utah') | (usa['state_name']== 'Wyoming') ]

mountain.head()

In [None]:
# Summing case count for Mountain Division
# Groupby to consolidate cases by entire region

md_group = mountain.groupby(["datetime"]).sum()
md_group.tail()

In [None]:
# Sort by datetime and reset index
md_group.sort_values(by = 'datetime')
md_group = md_group.reset_index()
md_group.tail()

In [None]:
# Finish datetime conversion
md_group['datetime'] = pd.to_datetime(md_group['datetime'])
md_group['day'] = md_group['datetime'].map(lambda x: x.day)
md_group['month'] = md_group['datetime'].map(lambda x: x.month)
md_group['year'] = md_group['datetime'].map(lambda x: x.year)

# Now create new column for growth rate
md_group['growth_rate'] = md_group['cases'].pct_change()
md_group.tail()

In [None]:
# Look at the stats
md_group['growth_rate'].describe()

### Visualize Growth Rate and Cases

In [None]:
# Scatter plot for growth rate

plt.figure(figsize = (15, 6))
plt.scatter(md_group['datetime'],md_group['growth_rate'],c='xkcd:sky blue',)
plt.grid(True)
plt.title("Mountain Division Growth Rate", fontsize = 20)
plt.ylabel('Growth Rate', fontsize = 20)
plt.show()

In [None]:
# Scatter plot for cases

plt.figure(figsize = (15, 6))
plt.scatter(md_group['datetime'],md_group['cases'],c='xkcd:sky blue')
plt.grid(True)
plt.title('Mountain Division Cases Over Time', fontsize = 20)
plt.ylabel('Cases', fontsize = 20)
plt.show()

### West Pacific (Alaska, California, Hawaii, Oregon, and Washington)

In [None]:
# Make DF for just Pacific Divsion
pacific =  usa[(usa['state_name']=='California') | (usa['state_name'] == 'Hawaii') |
                      (usa['state_name'] == 'Oregon') | (usa['state_name'] == 'Washington') |
                      (usa['state_name']=='Alaska')]

pacific.head()

In [None]:
# Summing case count for Pacific Division
# Groupby to consolidate cases by entire region

p_group = pacific.groupby(["datetime"]).sum()
p_group.tail(20)

In [None]:
# Sort by datetime and reset index
p_group.sort_values(by = 'datetime')
p_group = md_group.reset_index()
p_group.tail()

In [None]:
# Finish datetime conversion
p_group['datetime'] = pd.to_datetime(p_group['datetime'])
p_group['day'] = p_group['datetime'].map(lambda x: x.day)
p_group['month'] = p_group['datetime'].map(lambda x: x.month)
p_group['year'] = p_group['datetime'].map(lambda x: x.year)

# Now create new column for growth rate
p_group['growth_rate'] = p_group['cases'].pct_change()
p_group.tail(20)

In [None]:
# Look at the stats
p_group['growth_rate'].describe()

### Visualize Growth Rate and Cases

In [None]:
# Scatter plot for growth rate

plt.figure(figsize = (15, 6))
plt.scatter(p_group['datetime'],p_group['growth_rate'],c='xkcd:orange',)
plt.grid(True)
plt.title("Pacific Division Growth Rate", fontsize = 20)
plt.ylabel('Growth Rate', fontsize = 20)
plt.show()

In [None]:
# Scatter plot for cases

plt.figure(figsize = (15, 6))
plt.scatter(p_group['datetime'],p_group['cases'],c='xkcd:orange')
plt.grid(True)
plt.title('Pacific Division Cases Over Time', fontsize = 20)
plt.ylabel('Cases', fontsize = 20)
plt.show()

### NEW YORK AS A TEST CASE

In [None]:
# Make DF for just NEW YORK
ny =  usa[(usa['state_name']=='New York')]

ny.head()

In [None]:
# Summing case count for NY
# Groupby to consolidate cases by entire region

ny_group = ny.groupby(["datetime"]).sum()
ny_group.tail()

In [None]:
ny_group.isnull().sum()

In [None]:
# Sort by datetime and reset index
ny_group.sort_values(by = 'datetime')
ny_group = md_group.reset_index()
ny_group.tail(20)

In [None]:
# Finish datetime conversion
ny_group['datetime'] = pd.to_datetime(ny_group['datetime'])
ny_group['day'] = ny_group['datetime'].map(lambda x: x.day)
ny_group['month'] = ny_group['datetime'].map(lambda x: x.month)
ny_group['year'] = ny_group['datetime'].map(lambda x: x.year)

# Now create new column for growth rate
ny_group['growth_rate'] = ny_group['cases'].pct_change()
ny_group.tail()

In [None]:
# Look at the stats
ny_group['growth_rate'].describe()

### Visualize Growth Rate and Cases

In [None]:
# Scatter plot for growth rate

plt.figure(figsize = (15, 6))
plt.scatter(ny_group['datetime'],ny_group['growth_rate'],c='xkcd:black',)
plt.grid(True)
plt.title("New York Growth Rate", fontsize = 20)
plt.ylabel('Growth Rate', fontsize = 20)
plt.show()

In [None]:
# Scatter plot for cases

plt.figure(figsize = (15, 6))
plt.scatter(ny_group['datetime'],ny_group['cases'],c='xkcd:black')
plt.grid(True)
plt.title('New York Cases Over Time', fontsize = 20)
plt.ylabel('Cases', fontsize = 20)
plt.show()

In [None]:
# Scatter plot for cases

plt.figure(figsize = (15, 6))
plt.scatter(ny_group['datetime'],ny_group['tests_positive'],c='xkcd:black')
plt.grid(True)
plt.title('New York Positive Test Results Over Time', fontsize = 20)
plt.ylabel('Cases', fontsize = 20)
plt.show()