# Data Visualization with Plotly
https://plotly.com/python/getting-started/ 

## Review Covid 19 Notebook by Prof. Jared

Covid 19 data collected by the New York Times. https://github.com/nytimes/covid-19-data


In [None]:
import pandas as pd
import numpy as np
import chart_studio.plotly as py
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import cufflinks as cf
%matplotlib inline

# this is to make everything work properly offline 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 
init_notebook_mode(connected=True)
cf.go_offline()

## Processing Data

### 1. County level data

#### Choropleth Maps
A Choropleth Map is a map composed of colored polygons. It is used to represent spatial variations of a quantity. 

Choropleth Maps documentation: 
https://plotly.com/python/choropleth-maps/


What we need:


- This can either be a supplied GeoJSON file where each feature has either an id field or some identifying value in properties; or
- one of the built-in geometries within plotly: US states and world countries (see below)
- A list of values indexed by feature identifier.


#### GeoJSON file for US Counties - GeoJSON with feature.id

In [None]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

In [None]:
counties["features"][0]

In [None]:
county_data = pd.read_csv("Data/covid19/covid-19-data/us-counties.csv",dtype={"fips":str})

In [None]:
county_data.head(4)

In [None]:
# To take differences, we need to sort the data first.
# FIPS codes are numbers which uniquely identify geographic areas. 
county_data = county_data.sort_values(["fips","date"])

In [None]:
# Take the difference of the cumulative case numbers.
county_changes = county_data.groupby("fips")[['cases','deaths']].diff()

In [None]:
# Join the new numbers
county_data = county_data.join(county_changes,rsuffix="_new")

In [None]:
# Taking rolling averages
rm7_county = county_data.groupby("state")[['cases_new','deaths_new']].rolling(7).mean()

# Join them back into the data.
county_data = county_data.join(rm7_county.reset_index().set_index("level_1").iloc[:,1:],rsuffix="_rm7")

In [None]:
county_data.head(3)

In [None]:
# Top 5  Counties with the most daily cases: 
county_data.groupby("fips").last().sort_values("cases_new",ascending=False).head(5)

In [None]:
# Top 5  Counties with the most deaths 
county_data.groupby("fips").last().sort_values("deaths_new",ascending=False).head(5)

In [None]:
# County-state variable for label 
county_data['county-state'] = county_data['county'] + ", " + county_data['state']

In [None]:
county_data.head(4)

In [None]:
# Choropleth map:

# GeojSON object is passed to the geojson arument
# Data is paased into the color argument
# IDs are passed into the location argument
fig = px.choropleth(county_data.groupby("fips").last().reset_index(), \
                           geojson=counties, locations='fips', color='cases_new',
                           color_continuous_scale="Reds",
                           scope="usa",
                           range_color =(0,500),
                            hover_name ="county-state",
                            hover_data = ["cases_new","deaths_new"],
                            labels={'cases_new':'Daily Cases', 'deaths_new':'Daily Deaths'}
                          )

fig.update_layout(
    title_text = 'New Covid 19 Daily Cases on {}, County Level'.format(county_data.date.max()),
)
fig.show()

In [None]:
county_data['cases_new_rate'] = county_data.groupby(["fips"])['cases_new'].diff()

In [None]:
fig = px.choropleth(county_data.groupby("fips").last().reset_index(), \
                           geojson=counties, locations='fips', color='cases_new_rate',
                           color_continuous_scale="RdBu_r",
                           scope="usa",
                           range_color =(-500,500),
                            hover_name ="county-state",
                            hover_data = ["cases_new_rate","cases_new","deaths_new"],
                            labels={'cases_new_rate':'Change in Daily Cases','cases_new':'Daily Cases', 'deaths_new':'Daily Deaths'}

                          )

fig.update_layout(
    title_text = 'New Covid 19 Cases on {}, County Level. First difference'.format(county_data.date.max()),
)
fig.show()

### 2. State level data

In [None]:
# Read data
d = pd.read_csv("Data/covid19/covid-19-data/us-states.csv")

In [None]:
d.head(3)

In [None]:
# First sort
d = d.sort_values(['state','date'],ascending=[True, True])
# To find new cases, take the first order difference
changes = d.groupby("state")[['cases','deaths']].diff()
# Join the new numbers
d = d.join(changes,rsuffix="_new")
# Taking rolling averages of the daily cases and deaths
rm7 = d.groupby("state")[['cases_new','deaths_new']].rolling(7).mean()
rm7=rm7.reset_index().set_index("level_1")
d= d.join(rm7.iloc[:,1:],rsuffix="_rm7")

In [None]:
d.head(10)

This cell reads in the state codes to be read with Plotly

In [None]:
state_codes = pd.read_csv("Data/covid19/State_Codes.csv",usecols = ['code','state'])
state_codes['state'] = state_codes.state.str.strip()
state_codes['code'] = state_codes.code.str.strip()

In [None]:
state_codes;

In [None]:
# Merge data with codes
d = d.merge(state_codes,how='left')

In [None]:
d.head(5)

### Plots

In [None]:
d1 = d.groupby("state").last().sort_values("cases_new",ascending=True)["cases_new"].tail(20)
d1 =  pd.DataFrame(d1).reset_index()

In [None]:
fig = px.bar(d1,x="state",y="cases_new", 
       title='Top 20 States with the most daily Covid-19 Cases on {}'.format(d.date.max()),
       labels={'cases_new':"Number of cases","state":"State"})
fig.show()

In [None]:
# matplotlib way
var = "cases_new"
d.groupby("state").last().sort_values(var,ascending=True)[var].tail(20).plot(kind='barh');
plt.xlabel("New Cases");
plt.xticks(rotation=-45);
plt.ylabel("State");

In [None]:
fig = px.choropleth(d.sort_values(["date","state"]),
              locations='code',
              color='cases_new',
              locationmode="USA-states",
                    hover_name='state',
#                 colorscale = 'Blues',
              animation_frame="date",
                    color_continuous_scale="Reds",
                    range_color=(0,5000)
              )

fig.update_layout(
    title_text = 'New Covid 19 Cases',
    geo_scope='usa', # limite map scope to USA
)

### 3. National Level Data & Selected States 
#### Line plots with Plotly

In [None]:
national = pd.DataFrame(d.groupby("date")['cases_new'].sum().rolling(7).mean()).reset_index()

In [None]:
# Default plot
px.line(national,x='date',y="cases_new")

#### Selected States: Illinois, California, Texas

In [None]:
ill = d[d['state']=="Illinois"][["date","cases_new_rm7"]].set_index('date')
ca =  d[d['state']=="California"][["date","cases_new_rm7"]].set_index('date')
tx = d[d['state']=="Texas"][["date","cases_new_rm7"]].set_index('date')

In [None]:
data=ill.join(ca,rsuffix="_ca").join(tx,rsuffix="_tx").reset_index()

In [None]:
# The default plot:
fig = px.line(data,x='date',y=["cases_new_rm7","cases_new_rm7_ca","cases_new_rm7_tx"],
              title = "COVID-19 Rolling 7-days Average cases" )
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.date,y=data.cases_new_rm7, mode='lines', line=dict(color='darkcyan'),name='Illinois'))
fig.add_trace(go.Scatter(x=data.date,y=data.cases_new_rm7_ca, mode='lines',line=dict(color='slategrey'),name='California'))
fig.add_trace(go.Scatter(x=data.date,y=data.cases_new_rm7_tx, mode='lines',line=dict(color='firebrick'),name='Texas'))

fig.update_layout(title="COVID-19 Rolling 7-days Average cases",
                 xaxis_title="Date", yaxis_title="Cases")

fig.update_layout(xaxis=dict(showline=True, 
                             showgrid=False,
                             showticklabels=True,
                             linewidth=2, 
                             linecolor='black', 
                             ticks='outside'),
                  yaxis=dict(showline=True, linecolor='black'),
                 plot_bgcolor='white')
fig.show()