In [None]:
# libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Calculate some stuff

In [None]:
# get nytimes covid data
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv'
df = pd.read_csv(url, error_bad_lines=False)

# pivot data from long to wide
df = df.pivot(index='date', columns='state', values='cases')

# clean up the data so blanks are zeros and numbers are integers
df = df.fillna(0)
df = df.astype(int)

# reset index (to make graph work)
df = df.reset_index()

# Make dataframe for DIFFERENCE between two days
df_delta = df.copy()
df_delta.drop(['date'], axis=1, inplace=True)
df_delta = df_delta.diff()
df_delta['date'] = df['date']
df_delta = df_delta[ ['date'] + [ col for col in df_delta.columns if col != 'date' ] ]

# Total Cases

In [None]:
# Note: `df.index[-1]` == length of the dataframe. ie if there are 74 days of data, df.index[-1] == 74

state = "Tennessee"

#plt.style.use('fivethirtyeight')
plt.style.use('seaborn-darkgrid')
my_dpi=96
plt.figure(figsize=(1080/my_dpi, 480/my_dpi), dpi=my_dpi)
plt.xticks(rotation=90)
# plt.yscale("log")
 
# lines for each state
for column in df.drop('date', axis=1):
    plt.plot(df['date'], df[column], marker='', color='grey', linewidth=1, alpha=0.9, label=column)

# redo the line for Tennessee
plt.plot(df['date'], df[state], marker='', color='orange', linewidth=4, alpha=0.7)

# Change xlim (last two weeks: [-14] = start 14 days ago, [-1] start today)
plt.xlim(df.index[-30],df.index[-1])
 
# annotate the lines
num=0
for i in df.values[df.index[-1]][1:]:
   num+=1
   name=list(df)[num]
   if name != state:
      plt.text(df.index[-1], i, name, horizontalalignment='left', size='small', color='grey')
 
# And add a special annotation for the group we are interested in
plt.text(df.index[-1], df[state].tail(1), state, horizontalalignment='left', size='small', color='orange')
 
# Add titles
plt.title("SARS-CoV-2 Total Cases by Date", loc='left', fontsize=12, fontweight=0, color='orange')
plt.xlabel("Time")
plt.ylabel("Cases")

## New Cases

In [None]:
# Note: `df.index[-1]` == length of the dataframe. ie if there are 74 days of data, df.index[-1] == 74

state = "Tennessee"

#plt.style.use('fivethirtyeight')
plt.style.use('seaborn-darkgrid')
my_dpi=96
plt.figure(figsize=(1080/my_dpi, 620/my_dpi), dpi=my_dpi)
plt.xticks(rotation=90)
# plt.yscale("log")
 
# lines for each state
for column in df_delta.drop('date', axis=1):
    plt.plot(df_delta['date'], df_delta[column], marker='', color='grey', linewidth=1, alpha=0.9, label=column)

# redo the line for Tennessee
plt.plot(df_delta['date'], df_delta[state], marker='', color='orange', linewidth=4, alpha=0.7)

# Change xlim (last two weeks: [-14] = start 14 days ago, [-1] start today)
plt.xlim(df_delta.index[-30],df_delta.index[-1])
 
# annotate the lines
num=0
for i in df_delta.values[df_delta.index[-1]][1:]:
   num+=1
   name=list(df_delta)[num]
   if name != state:
      plt.text(df_delta.index[-1], i, name, horizontalalignment='left', size='small', color='grey')
 
# And add a special annotation for the group we are interested in
plt.text(df_delta.index[-1], df_delta[state].tail(1), state, horizontalalignment='left', size='small', color='orange')
 
# Add titles
plt.title("SARS-CoV-2 New Cases by Date", loc='left', fontsize=12, fontweight=0, color='orange')
plt.xlabel("Time")
plt.ylabel("Cases")

## Tennessee Data past 10 days

In [None]:
df[state].tail(10)

In [None]:
df_delta[state].tail(10)

## Population Data

In [None]:
# get data from wikipedia
WIKI_URL = "https://simple.wikipedia.org/wiki/List_of_U.S._states_by_population"
df_pop = pd.read_html(WIKI_URL, header=0)

# fix the dataframe, it ends up inside a list for some reason
df_pop = df_pop[0]

# delete all the columns execpt for "Population estimate, July 1, 2019[2]" and "State"
for columns in df_pop.columns:
   if columns not in ["Population estimate, July 1, 2019[2]", "State"]:
      df_pop.drop([columns], axis=1, inplace=True)

# Rename "Population estimate, July 1, 2019[2]" to "Population"
df_pop.rename(columns={"Population estimate, July 1, 2019[2]": "Population"}, inplace=True)

# Drop the terretories and totals, etc
df_pop.drop([31, 52, 53, 54, 55, 56, 57, 58, 59], inplace=True)

# sort list alphabetically by state name
df_pop = df_pop.sort_values("State")
df_pop = df_pop.reset_index()
df_pop.drop(["index"], axis=1, inplace=True)
df_pop.set_index("State", inplace=True)

print(df_pop)

In [None]:
# df_test = df_delta.copy()
# df_test.drop(["New York", "New Jersey"], axis=1, inplace=True)