# Virus Tracking

The CDC tracks virus cases by state, where each state reports their current count to the CDC.
I want to see the number of cases in Texas as they grow over time, broken down by city if possible.

I have only found 1 website that offers anything like this kind of data, and it only shows the "current" cases:
https://dshs.texas.gov/news/updates.shtm

DSHS is updating their data source every day at noon, so if I *add* to my data source every day at 1pm (rather than replacing the current), I should be able to see change over time.

In [143]:
# Step 0: Import all the things!

import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime as dt
import geopy
from geopy.extra.rate_limiter import RateLimiter
from IPython.display import HTML, Markdown, clear_output
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import urllib3

%run controls.ipynb

def datestamp(date):
    """Convert a datetime object into a date stamp: YYYYMMDD"""
    return f"{date.month}/{date.day}/{date.year}"

today = dt.datetime.today()
today_text = datestamp(today)

yesterday = today - dt.timedelta(days=1)
yesterday_text = datestamp(yesterday)

# Now that data loading is done in a separate script, this should be all I need to load the data.
df_num_cases = pd.read_csv("data.csv")
df_num_cases = df_num_cases.drop(["Unnamed: 0"], axis=1)

min_date_text = df_num_cases["date"].min()
max_date_text = df_num_cases["date"].max()

parts = min_date_text.split("/")
min_date = dt.datetime(int(parts[2]), int(parts[0]), int(parts[1]))

parts = max_date_text.split("/")
max_date = dt.datetime(int(parts[2]), int(parts[0]), int(parts[1]))

total_num_counties = 254

The next data set will show you how many cases are recorded in each county as of today.

In [144]:
# It's useful to have today's and yesterday's cases in their own DataFrames.
cases_today = df_num_cases.loc[df_num_cases["date"] == today_text]
cases_yesterday = df_num_cases.loc[df_num_cases["date"] == yesterday_text]

dir(pd.options.display)
pd.options.display.max_rows = len(cases_today)
display(cases_today)

Unnamed: 0,county,date,num_cases,latitude,longitude
215,Dallas,3/25/2020,169,32.776272,-96.796856
216,Harris,3/25/2020,134,29.811977,-95.374125
217,Travis,3/25/2020,98,30.287857,-97.756139
218,Tarrant,3/25/2020,71,32.751366,-97.335696
219,Bexar,3/25/2020,69,29.426399,-98.510478
220,Fort Bend,3/25/2020,46,29.511218,-95.780735
221,Collin,3/25/2020,45,33.160963,-96.606098
222,Denton,3/25/2020,30,33.183879,-97.141342
223,McLennan,3/25/2020,23,31.548769,-97.218925
224,Montgomery,3/25/2020,23,30.301949,-95.506594


In [145]:
num_counties_added = len(cases_today) - len(cases_yesterday)
display(Markdown(f"# *{num_counties_added}* counties were added from {yesterday_text} to {today_text}."))

yesterday_counties = [row[0] for row in cases_yesterday.values]
today_counties = [row[0] for row in cases_today.values]
new_counties = []
for county in today_counties:
    if county not in yesterday_counties:
        new_counties.append(county)

new_counties = pd.DataFrame(new_counties)
new_counties.columns = ["county"]
display(new_counties)

# *34* counties were added from 3/24/2020 to 3/25/2020.

Unnamed: 0,county
0,Guadalupe
1,Jefferson
2,Wharton
3,Atascosa
4,Eastland
5,Grimes
6,Hardin
7,Kaufman
8,Kendall
9,Milam


In [148]:
total_output = widgets.Output()

def fct(date=dt.datetime.today()):
    date_text = f"{date.month}/{date.day}/{date.year}"
    
    prevdate = date - dt.timedelta(days=1)
    prevdate_text = f"{prevdate.month}/{prevdate.day}/{prevdate.year}"
    
    date_total = df_num_cases.loc[df_num_cases["date"] == date_text]["num_cases"].astype('int32').sum()
    prevdate_total = df_num_cases.loc[df_num_cases["date"] == prevdate_text]["num_cases"].astype('int32').sum()
    
    delta_total = date_total - prevdate_total
    delta_total_text = "-" if delta_total < 0 else "+"
    delta_total_text += str(delta_total)
    
    with total_output:
        clear_output()
        display(HTML(f"<h3><b>{date_total}</b> cases found in Texas as of {date_text} ({delta_total_text}).</h3>"))
        display(HTML(f"<h3>{int((len(cases_today) / total_num_counties) * 100)}% of Texas counties have confirmed cases.</h3>"))

w=DatePicker(start=min_date_text,end=max_date_text,freq='D',fmt='%m/%d/%Y')
w.observe=fct
w.display()

display(total_output)

fct(min_date)

SelectionSlider(continuous_update=False, description='date', options=(('03/20/2020', Timestamp('2020-03-20 00:…

Output()

In [149]:
# Everybody loves a good chart.  This one should become more useful over time.

import altair as alt

totals = pd.DataFrame({'num_cases' : df_num_cases.astype({'num_cases': 'int'}).groupby("date")["num_cases"].sum()}).reset_index()

alt.Chart(totals).mark_line().encode(
    x='date:O',
    y='num_cases:Q'
).properties(
  width=800
).interactive(bind_y=False)

So far the Texas cases are increasing linearly.

In [151]:
Markdown(f"This map shows the locations where virus has been found, with the size of the circle corresponding to the number of cases as of *{today_text}*.")

This map shows the locations where virus has been found, with the size of the circle corresponding to the number of cases as of *3/25/2020*.

In [158]:
import folium
from folium import plugins

def fct(date):
    county_map = folium.Map(
        location=[31.9686, -99.9018],
        tiles='cartodbpositron',
        zoom_start=6
    )
    
    date_text = f"{date.month}/{date.day}/{date.year}"
    
    df_cases_date = df_num_cases.loc[df_num_cases["date"] == date_text]
   
    #df_num_cases.loc[df_num_cases["date"] == date_text].apply(lambda row: folium.CircleMarker(location=[row["latitude"], row["longitude"]], radius = row["num_cases"]).add_to(county_map), axis=1)
    plugins.HeatMap(data=df_cases_date[['latitude', 'longitude', 'num_cases']].groupby(['latitude', 'longitude']).sum().reset_index().values.tolist(), radius=24, max_zoom=16).add_to(county_map)
    
    df_cases_date.apply(lambda row: folium.Marker(location=[row["latitude"], row["longitude"]],
                                                 icon=folium.DivIcon(html=f"<div style='font-size:1.3em'><b>{row['num_cases']}<b></div>")).add_to(county_map), axis=1)

    #total_cases = sum([int(n) for n in df_num_cases.loc[df_num_cases["date"] == date_text]["num_cases"].to_list()])
    #with map_output:
    #    clear_output()
    display(county_map)

#w = DatePicker(start=min_date_text,end=max_date_text,freq='D',fmt='%m/%d/%Y')
#w.observe = fct
#w.display()

#map_output = widgets.Output()
#display(map_output)
#fct(min_date)

fct(max_date)

* Note: Any data in the Gulf of Mexico represents the cases that are "Pending County Assignment".

The next map shows how much growth each area has had in the last 24 hours.  Circle size corresponds to the number of new cases.

In [155]:
cases_delta = []
for row_today in cases_today.values:
    # Grab the num_cases from yesterday.
    row_yesterday = [n for n in cases_yesterday.values if n[0] == row_today[0]]
    if len(row_yesterday) == 0:
        delta_cases = row_today[2]
    else:
        row_yesterday = row_yesterday[0]
        delta_cases = int(row_today[2]) - row_yesterday[2]
    cases_delta.append([row_today[0], delta_cases, row_today[3], row_today[4]])

df_cases_delta = pd.DataFrame(cases_delta)
df_cases_delta.columns = ["county", "num_cases", "latitude", "longitude"]
df_cases_delta = df_cases_delta.loc[df_cases_delta["num_cases"] > 0]

county_map = folium.Map(
    location=[31.9686, -99.9018],
    tiles='cartodbpositron',
    zoom_start=6,
)

df_cases_delta.apply(lambda row: folium.CircleMarker(location=[row["latitude"], row["longitude"]], radius = row["num_cases"]).add_to(county_map), axis=1)

df_cases_delta.apply(lambda row: folium.Marker(location=[row["latitude"], row["longitude"]],
                                               icon=folium.DivIcon(html=f"<div style='font-size:1.3em'><b>{row['num_cases']}<b></div>")).add_to(county_map), axis=1)

county_map

### 