In [None]:
import requests
import pandas as pd
import io
import datetime as dt
import matplotlib as mpl
from matplotlib.dates import DateFormatter
import matplotlib.pyplot as plt
import scipy.stats as stats
from IPython.display import JSON
#import seaborn as sns
#sns.set()
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import os

In [None]:
csv = requests.get("https://opendata.arcgis.com/datasets/37abda537d17458bae6677b8ab75fcb9_0.csv").content
main_df = pd.read_csv(io.StringIO(csv.decode('utf-8')))

1. Filtering out "Hospitalizations"

In [None]:
filt = main_df["Hospitalized"] == "YES"
df = main_df[filt]

In [None]:
# Trying to save the data to a csv
# df = df[['County','Age',"Age_group","Gender","Jurisdiction","Travel_related","Hospitalized","Died","CaseDate"]]
# filepath = "../Resources/cleaned_csv.csv"
# df.to_csv(filepath)

In [None]:
#df

2. Changing 'EventDate' to more manageable structure

In [None]:
# Splitting dates
#df.Case1 = df.loc[:,"Case1"].str.split()
#df.Case1 = df.Case1.map(lambda x: x[0])
#df.rename(columns = {"Case1":"CaseDate"},inplace=True)

In [None]:
# First Run through
#df.loc[:,'EventDate'] = pd.to_datetime(df.loc[:,'EventDate'],format='%Y/%m/%d %H:%M:%S')
#df.loc[:,"FormattedEventDate"] = df.loc[:,"EventDate"].dt.strftime("%m/%d/%Y")
#df.loc[:,"EventDate"] = pd.to_datetime(df.loc[:,"FormattedEventDate"],format='%m/%d/%Y')
#df = df.iloc[:,[0,1,2,3,7,8,9,13,16]]

In [None]:
df.loc[:,'Case1'] = pd.to_datetime(df.loc[:,'Case1'],format='%Y/%m/%d %H:%M:%S')
df.loc[:,"FormattedCase1"] = df.loc[:,"Case1"].dt.strftime("%m/%d/%Y")
df.loc[:,"Case1"] = pd.to_datetime(df.loc[:,"FormattedCase1"],format='%m/%d/%Y')
df = df.iloc[:,[0,1,2,3,7,8,9,13,16]]

In [None]:
df.columns

In [None]:
df.head(3)

In [None]:
dtype()


In [None]:
hospitalized_df = df.groupby("FormattedCase1").count().reset_index()[['FormattedCase1','Hospitalized']]
df.groupby("FormattedCase1").count()['Hospitalized'].tail(20)

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(8,6)
plt.scatter(hospitalized_df['FormattedCase1'],hospitalized_df["Hospitalized"])
ax.xaxis.set_major_formatter(DateFormatter("%m/%d"))
ax.xaxis_date()
plt.title(label="New Hospitalizations in Florida")
#plt.xlim(left=plt.xlim()[1]-99,right=plt.xlim()[1]-9)

## Hypothesis Testing
-------------

In [None]:
day_delta = 14
opening_date = dt.datetime(2020,5,4) + dt.timedelta(days=6)
d_before = opening_date - dt.timedelta(days=day_delta)
d_after = opening_date + dt.timedelta(days=day_delta)
filt_before = ((df["EventDate"]>=d_before) & (df["EventDate"] < opening_date))
before_df = df[filt_before]
filt_after = ((df["EventDate"]>opening_date) & (df["EventDate"] <= d_after))
after_df = df[filt_after]

grouped_before = before_df.groupby("EventDate").count().reset_index()[['EventDate','Hospitalized']]
grouped_after = after_df.groupby("EventDate").count().reset_index()[['EventDate','Hospitalized']]

In [None]:
# Generate some fake data to test with
def gendata(loc=0):
    sample1 = grouped_before['Hospitalized']
    sample2 = grouped_after['Hospitalized']

    # Scatter Plot of Data
    plt.subplot(2, 1, 1)
    plt.scatter(range(len(sample1)), sample1, label="before")
    plt.scatter(range(len(sample2)), sample2, label="after")
    plt.legend()

    # Histogram Plot of Data
    plt.subplot(2, 1, 2)
    plt.hist(sample1, 20, density=True, alpha=0.7, label="before")
    plt.hist(sample2, 20, density=True, alpha=0.7, label="after")
    plt.axvline(sample1.mean(), color='k', linestyle='dashed', linewidth=1)
    plt.axvline(sample2.mean(), color='k', linestyle='dashed', linewidth=1)
    plt.legend()  
    
    return sample1, sample2

sample1, sample2 = gendata()
stats.ttest_ind_from_stats(grouped_before["Hospitalized"].mean(),grouped_before["Hospitalized"].std(),grouped_before["Hospitalized"].size,grouped_after["Hospitalized"].mean(),grouped_after["Hospitalized"].std(),grouped_after["Hospitalized"].size)

In [None]:
grouped_after["Hospitalized"].describe()

In [None]:
grouped_before["Hospitalized"].describe()