# Data Visualization with Plotly
https://plotly.com/python/getting-started/ 

## Analyzing Covid 19 Data

This notebook analyzes Covid 19 data collected by the New York Times.

https://github.com/nytimes/covid-19-data


Be sure to run "git pull origin master" before analyzing the data to pull the most recent data (if you cloned the repo)


In [None]:
import pandas as pd
import numpy as np
# 
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
# 
%matplotlib inline
!pip install chart_studio
!pip install cufflinks

# this is to make everything work properly offline 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 
init_notebook_mode(connected=True)
cf.go_offline()
import chart_studio.plotly as py
import cufflinks as cf

## Process Data

### State level data

In [None]:
# Read data
d = pd.read_csv(r"C:\Users\Hanna Willwerth\ACE-592\TA_Material\pyintro_resources\us-states.csv")

In [None]:
d.head(3)

In [None]:
# First sort
d = d.sort_values(['state','date'],ascending=[True, True])

In [None]:
d.head(3)

In [None]:
# To find new cases, take the first order difference
changes = d.groupby("state")[['cases','deaths']].diff()

In [None]:
changes.head(3)

In [None]:
# Join the new numbers
d = d.join(changes,rsuffix="_new")

In [None]:
d.head(3)

In [None]:
# Taking rolling averages of the daily cases and deaths
rm7 = d.groupby("state")[['cases_new','deaths_new']].rolling(7).mean()

In [None]:
rm7=rm7.reset_index().set_index("level_1")

In [None]:
d= d.join(rm7.iloc[:,1:],rsuffix="_rm7")

In [None]:
d

This cell reads in the state codes to be read with Plotly

In [None]:
state_codes = pd.read_csv(r"C:\Users\Hanna Willwerth\ACE-592\TA_Material\pyintro_resources\State_Codes.csv",usecols = ['code','state'])
state_codes['state'] = state_codes.state.str.strip()
state_codes['code'] = state_codes.code.str.strip()

In [None]:
state_codes;

In [None]:
d = d.merge(state_codes,how='left')

In [None]:
d;

### County level data

Reading in a county geo-json file. We will use this object (counties) as an argument inside Plotly.  Don't worry about it now.

In [None]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)


In [None]:
county_data = pd.read_csv(r"C:\Users\Hanna Willwerth\ACE-592\TA_Material\pyintro_resources\us-counties.csv",dtype={"fips":str})

In [None]:
# To take differences, we need to sort the data first.
# FIPS codes are numbers which uniquely identify geographic areas. 
county_data = county_data.sort_values(["fips","date"])

# Take the difference of the cumulative case numbers.
county_changes = county_data.groupby("fips")[['cases','deaths']].diff()

# Join the new numbers
county_data = county_data.join(county_changes,rsuffix="_new")

# Taking rolling averages
rm7_county = county_data.groupby("state")[['cases_new','deaths_new']].rolling(7).mean()

# Join them back into the data.
county_data = county_data.join(rm7_county.reset_index().set_index("level_1").iloc[:,1:],rsuffix="_rm7")

In [None]:
county_data;

In [None]:
county_data.groupby("fips").last().sort_values("cases_new",ascending=False).head(10)

In [None]:
county_data.groupby("fips").last().sort_values("deaths_new",ascending=False).head(10)

In [None]:
county_data['county-state'] = county_data['county'] + ", " + county_data['state']

In [None]:
county_data

In [None]:
fig = px.choropleth(county_data.groupby("fips").last().reset_index(), \
                           geojson=counties, locations='fips', color='cases_new',
                           color_continuous_scale="Reds",
                           scope="usa",
                           range_color =(0,500),
                            hover_name ="county-state",
                            hover_data = ["cases_new","deaths_new"]
                          )

fig.update_layout(
    title_text = 'New Covid 19 Cases on {}, County Level'.format(county_data.date.max()),
)
fig.show()

In [None]:
county_data['cases_new_rate'] = county_data.groupby(["fips"])['cases_new'].diff()

In [None]:
fig = px.choropleth(county_data.groupby("fips").last().reset_index(), \
                           geojson=counties, locations='fips', color='cases_new_rate',
                           color_continuous_scale="RdBu_r",
                           scope="usa",
                           range_color =(-500,500),
                            hover_name ="county-state",
                            hover_data = ["cases_new","deaths_new"]
                          )

fig.update_layout(
    title_text = 'New Covid 19 Cases on {}, County Level'.format(county_data.date.max()),
)
fig.show()

### State level data

In [None]:
d1=pd.DataFrame(d).reset_index()

In [None]:
fig = px.bar(d1,x="state",y="cases_new", 
       title="Covid-19 new cases by States",
       labels={'cases_new':"Number of cases","state":"State"})
fig.show()

In [None]:
# matplotlib way
var = "cases_new"
d.groupby("state").last().sort_values(var,ascending=True)[var].tail(20).plot(kind='barh');
plt.xlabel("New Cases");
plt.xticks(rotation=-45);
plt.ylabel("State");

In [None]:
fig = px.choropleth(d.sort_values(["date","state"]),
              locations='code',
              color='cases_new',
              locationmode="USA-states",
                    hover_name='state',
#                 colorscale = 'Blues',
              animation_frame="date",
                    color_continuous_scale="Reds",
                    range_color=(0,5000)
              )

fig.update_layout(
    title_text = 'New Covid 19 Cases',
    geo_scope='usa', # limite map scope to USA
)

### National Level Data & Selected States

In [None]:
national = pd.DataFrame(d.groupby("date")['cases_new'].sum().rolling(7).mean()).reset_index()

In [None]:
national

In [None]:
px.line(national,x='date',y="cases_new")

In [None]:
ill = d[d['state']=="Illinois"][["date","cases_new_rm7"]].set_index('date')
ca =  d[d['state']=="California"][["date","cases_new_rm7"]].set_index('date')
tx = d[d['state']=="Texas"][["date","cases_new_rm7"]].set_index('date')

In [None]:
data=ill.join(ca,rsuffix="_ca").join(tx,rsuffix="_tx").reset_index()

In [None]:
fig = px.line(data,x='date',y=["cases_new_rm7","cases_new_rm7_ca","cases_new_rm7_tx"],
              title = "COVID-19 Rolling 7-days Average cases" )
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.date,y=data.cases_new_rm7, mode='lines', line=dict(color='darkcyan'),name='Illinois'))
fig.add_trace(go.Scatter(x=data.date,y=data.cases_new_rm7_ca, mode='lines',line=dict(color='slategrey'),name='California'))
fig.add_trace(go.Scatter(x=data.date,y=data.cases_new_rm7_tx, mode='lines',line=dict(color='firebrick'),name='Texas'))

fig.update_layout(title="COVID-19 Rolling 7-days Average cases",
                 xaxis_title="Date", yaxis_title="Cases")

fig.update_layout(xaxis=dict(showline=True, 
                             showgrid=False,
                             showticklabels=True,
                             linewidth=2, 
                             linecolor='black', 
                             ticks='outside'),
                  yaxis=dict(showline=True, linecolor='black'),
                 plot_bgcolor='white')
fig.show()