In [None]:
# We can follow the recipe at 
# https://towardsdatascience.com/creating-bar-chart-race-animation-with-python-cdb01144074e
# for generating bar chart race animations.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
counties = pd.read_csv("us-counties.csv")

In [None]:
counties.head()

In [None]:
d = pd.to_datetime(counties["date"])
counties["date"] = d
# Note, I'm overwriting the data by reformatting it here.

In [None]:
counties[np.isnan(counties.deaths)] =0
counties[np.isnan(counties.fips)] = 999999
counties["fips"] = counties.fips.astype(np.int64)
# Fix missing data for NYC and PR, two largest missing-fips-code county-like-areas.
counties.loc[counties.county == "New York City","fips"] = 36998
counties.loc[counties.state == "Puerto Rico","fips"] = 72999

febdata = counties.loc[np.where(counties.date ==np.datetime64("2021-01-31"))]
cookco = counties.loc[np.where( counties.fips == 17031)]
deathsbycounty = febdata.sort_values("deaths")

In [None]:
# This table purports to assemble 2020 census data in a usable table
# https://github.com/nytimes/covid-19-data/issues/180
population = pd.read_json("https://raw.githubusercontent.com/Zoooook/CoronavirusTimelapse/master/static/population.json")
population["fips"] = population.us_county_fips.astype(np.int64)
deathswithpop= counties.merge(population, on="fips")
deathswithpop.head()

In [None]:
deathswithpop["countystate"] = deathswithpop.county+","
deathswithpop.head(1)

In [None]:
deathswithpop["countystate"] = deathswithpop.county+","+deathswithpop.state
deathswithpop.head(1)

In [None]:
pivot = deathswithpop.pivot_table(values= "deaths", index="date", columns="countystate")

In [None]:
pivot.head()

In [None]:
pivot.fillna(0, inplace=True)
pivot.head(1)

In [None]:
pivot.sum(axis=1)

In [None]:
pivot["sum"] = pivot.sum(axis=1)

In [None]:
# Seeing that number 700,000 makes me want to sneak a line plot
plt.plot( pivot["sum"])

In [None]:
# I have to admit this is "less than reassuring" in general.  


In [None]:
pivot.sum(axis=0)

In [None]:
# What's with Ada Co, ID?  These are sums of the cumulative deaths -- not very meaningful.

In [None]:
# A recipe for generating "bar chart races" in python:
# https://towardsdatascience.com/creating-bar-chart-race-animation-with-python-cdb01144074e

In [None]:
# Make a set of the counties that ever make the top N
N = 10
top_counties = set()
for index, row in pivot.iterrows():
    top_counties = top_counties.union(set(row.sort_values(ascending=False).head(N).index))

In [None]:
top_counties

In [None]:
top_counties.remove("sum")

In [None]:
top_counties

In [None]:
top_county_df = pivot[top_counties]

In [None]:
import bar_chart_race as bcr

In [None]:
bcr.bar_chart_race(df = top_county_df[::60], 
                   n_bars = 10, 
                   sort='desc',
                   title='Cumulative deaths from COVID by county',
                   filename = 'barchartrace.mp4')


In [None]:
# This took a long time to render for me, so let me plot only every 60th point --2 month intervals.
bcr.bar_chart_race(df = top_county_df[::60], 
                   n_bars = 10, 
                   sort='desc',
                   title='Cumulative deaths from COVID by county',
                   filename = 'barchartrace.mp4')
# This runs too fast.  NYC is conspicuously missing.

In [None]:
bcr.bar_chart_race(df = top_county_df[::14], 
                   n_bars = 10, 
                   sort='desc',
                   title='Cumulative deaths from COVID by county',
                   filename = 'barchartrace-biweekly.mp4')

In [None]:
# This is more acceptable.
bcr.bar_chart_race(df = top_county_df[::7], 
                   n_bars = 10, 
                   sort='desc',
                   title='Cumulative deaths from COVID by county',
                   filename = 'barchartrace-weekly.mp4')

In [None]:
list(pivot.columns)

In [None]:
"New York, New York" in list(pivot.columns)


In [None]:
"New York City, New York" in list(pivot.columns)


In [None]:
deathswithpop.sort_values("deaths").head()

In [None]:
deathswithpop.sort_values("deaths", ascending=False).head()

In [None]:
deathswithpop.loc[np.where(deathswithpop.fips == 999999)].sort_values("deaths", ascending=False).head()

In [None]:
# Ok.  NYC didn't survive the merge. 
