In [1]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
import geopandas as gpd
import pandas as pd
import math
import calendar

In [2]:
#Read in LA neighborhood map
#Retrieved from http://boundaries.latimes.com/set/la-county-neighborhoods-current/
nbd= gpd.read_file("data/la-county-neighborhoods-current_nocatalina.geojson")
#nbd['name'][nbd.name.str.contains('Armenia')] 

In [3]:
nbd.head()

Unnamed: 0,kind,external_id,name,slug,set,metadata,resource_uri,geometry
0,L.A. County Neighborhood (Current),acton,Acton,acton-la-county-neighborhood-current,/1.0/boundary-set/la-county-neighborhoods-curr...,"{'sqmi': 39.3391089485, 'type': 'unincorporate...",/1.0/boundary/acton-la-county-neighborhood-cur...,"MULTIPOLYGON (((-118.20262 34.53899, -118.1894..."
1,L.A. County Neighborhood (Current),adams-normandie,Adams-Normandie,adams-normandie-la-county-neighborhood-current,/1.0/boundary-set/la-county-neighborhoods-curr...,"{'sqmi': 0.805350187789, 'type': 'segment-of-a...",/1.0/boundary/adams-normandie-la-county-neighb...,"MULTIPOLYGON (((-118.30901 34.03741, -118.3004..."
2,L.A. County Neighborhood (Current),agoura-hills,Agoura Hills,agoura-hills-la-county-neighborhood-current,/1.0/boundary-set/la-county-neighborhoods-curr...,"{'sqmi': 8.14676029818, 'type': 'standalone-ci...",/1.0/boundary/agoura-hills-la-county-neighborh...,"MULTIPOLYGON (((-118.76193 34.16820, -118.7263..."
3,L.A. County Neighborhood (Current),agua-dulce,Agua Dulce,agua-dulce-la-county-neighborhood-current,/1.0/boundary-set/la-county-neighborhoods-curr...,"{'sqmi': 31.4626319451, 'type': 'unincorporate...",/1.0/boundary/agua-dulce-la-county-neighborhoo...,"MULTIPOLYGON (((-118.25468 34.55830, -118.2555..."
4,L.A. County Neighborhood (Current),alhambra,Alhambra,alhambra-la-county-neighborhood-current,/1.0/boundary-set/la-county-neighborhoods-curr...,"{'sqmi': 7.62381430605, 'type': 'standalone-ci...",/1.0/boundary/alhambra-la-county-neighborhood-...,"MULTIPOLYGON (((-118.12175 34.10504, -118.1168..."


In [4]:
#Get boundary file to align with County Public Health nomenclature
#Still more need to be understood and fixed
nbd.to_csv("nocatalina.csv", columns=['name'])
nbd.replace({'Bel-Air': 'Bel Air', 
             'West Whittier-Los Nietos':'West Whittier/Los Nietos',
             'Unincorporated Santa Monica Mountains':'Santa Monica Mountains',
            'Mount Washington':'Mt. Washington',
            'Lopez/Kagel Canyons':'Kagel/Lopez Canyons',
            'View Park-Windsor Hills':'View Park/Windsor Hills',
            'Lake View Terrace':'Lakeview Terrace',
            'Playa del Rey':'Playa Del Rey', #should really be fixed in source
            'Silver Lake': 'Silverlake',#should really be fixed in source
            'Mid-City': 'Mid-city',#should really be fixed in source
            'East San Gabriel': 'Northeast San Gabriel' #correct?
#            'Unincorporated Catalina Island': 'Santa Catalina Island'
            }, inplace=True)

In [5]:
##GEO LABELING setup
#Read in LA county regions - for use in labeling
regs = gpd.read_file("data/la-county-regions-current.geojson")
#regs
regsd = regs.drop([0, 5, 6, 7, 10,14]) #Keep only desired regions

#Rename Central LA Hollywood to better match its centroid
d = {'Central L.A.':'Hollywood'}
regsd = regsd.replace(d)
regsd

#Store Long Beach centroid to use for plotting 'Harbor' data label - so that it's not in the ocean, due to Catalina
nbd[nbd['name'].str.contains('Long Beach')].geometry.centroid
longbeach = (-118.16071, 33.80522)

In [None]:
def daily_map(mm, dd, usemax=0, labels=False, savename=False):
    
    month = str(mm).zfill(2)
    day = str(dd).zfill(2)
    strmonth = calendar.month_abbr[mm]

    #Read in daily update
    #Retrieved from http://publichealth.lacounty.gov/media/Coronavirus/locations.htm
    dailyrepi = pd.read_csv('data/covid_'+month+day+'.csv')

    #We will drop suppressed case counts
    dailyrepi['count'] = dailyrepi['count'].apply(pd.to_numeric, errors='coerce')

    dailyrep = dailyrepi[dailyrepi['count'].notna()]

    #Find values of note: number under investigation, total
    underinv = dailyrep[dailyrep['city'].str.contains('Under Investigation')]['count'].item()
    nconfirmed = dailyrep[dailyrep['city'].str.contains('Total')]['count'].item()
    
    #Use a persistent scalebar across plots
    if usemax:
        vmax = usemax
    else:
        vmax = dailyrep[~dailyrep["city"].str.contains("Under Investigation") & ~dailyrep["city"].str.contains("Total") & ~dailyrep["city"].str.contains("AGGREGATE") ]['count'].max()
        print('Normalizing to', vmax)#, '\n For context:', ~dailyrep["count"].str.contains("suppressed").nlargest(5))
        
    #Add geographic information for neighborhoods
    merged = nbd.set_index('name').join(dailyrep.set_index('city'))
    


    #Plot
    fig, ax = plt.subplots(1,1, figsize=(10, 11))
    nbd.plot(ax=ax, color='#eeeeee',edgecolor='white', linewidth=1)
    divider = make_axes_locatable(ax)
    cax1 = divider.append_axes("right", size="5%", pad=0.05) #cax1 = divider.append_axes("bottom", size="5%", pad=-3)
    ax = merged.plot(column='count', vmin =0, vmax = vmax, cmap='Reds', ax=ax, cax=cax1, legend=True,edgecolor='white', linewidth=0.4)#legend_kwds={"aspect":"10"})#orientation": "horizontal"})
    
    #Add labels for case counts
    # merged.apply(lambda x: ax.annotate(x.count(), xy=x.geometry.centroid.coords[0], ha='center'),axis=1);    
    for idx, row in merged.iterrows():
        if not math.isnan(row['count']):
            if (row.external_id=='long-beach'):
                ha = 'left'
                ax.annotate(s='  '+str(int(row['count'])), xy=row.geometry.centroid.coords[0], horizontalalignment=ha,color=color, fontsize=6, weight='bold')
            else:
                ha = 'center'
                color='white'
                ax.annotate(s=int(row['count']), xy=row.geometry.centroid.coords[0], horizontalalignment=ha, color=color, fontsize=6, weight='bold')

    #Add labels for regions
    # regsd.plot(facecolor='b',edgecolor='k', linewidth=1, ax=ax, alpha = 0.2) #Plot region boundaries
    if labels:
        for idx, row in regsd.iterrows():
            if (idx==2):
                fs = 8
                ax.annotate(s=row['name'], xy=row.geometry.centroid.coords[0], horizontalalignment='center', color='#767676', fontsize=fs, alpha = 0.95, weight='bold')
            elif (idx==4):
                fs=11
                ax.annotate(s=row['name'], xy=longbeach, horizontalalignment='right', verticalalignment='top', color='#767676', fontsize=fs, alpha = 0.95, weight='bold')
            else:
                fs = 11
                ax.annotate(s=row['name'], xy=row.geometry.centroid.coords[0], horizontalalignment='center', color='#767676', fontsize=fs, alpha = 0.95, weight='bold')

    #Adjust plot display
    ax.set_ylim([33.7, 34.85])
    ax.axis('off')
    ax.set_title('Confirmed COVID-19 Cases in LA County', fontdict={'fontsize': '20', 'fontweight' : 'demibold'})
    ax.annotate(strmonth+' '+str(dd),xy=(0.1, .2),  xycoords='figure fraction', horizontalalignment='left', verticalalignment='top', fontsize=20, color='k', weight='demibold')
    ax.annotate('Total: '+str(int(nconfirmed)),xy=(0.1, .15),  xycoords='figure fraction', horizontalalignment='left', verticalalignment='top', fontsize=18, color='#555555', weight='demibold')
    ax.annotate('Under investigation: '+str(int(underinv))+' (not shown)',xy=(0.1, .11),  xycoords='figure fraction', horizontalalignment='left', verticalalignment='top', fontsize=16, color='#555555')#, weight='demibold')
    ax.annotate('Source: Los Angeles County Department of Public Health',xy=(0.1, .05),  xycoords='figure fraction', horizontalalignment='left', verticalalignment='top', fontsize=12, color='#666666')
    if savename:
        fout = 'maps/covid_la_'+month+day+savename+'.png' 
    else:
        fout = 'maps/covid_la_'+month+day+'.png'
    fig.savefig(fout)#, bbox_inches = 'tight')
    plt.close(fig)
    return vmax

In [None]:
#Create plots for the specified days
dates = [320, 323, 324, 325, 326, 327,328, 329, 330, 331, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 501, 502, 503, 504, 505, 506] 

plotmaps = True

vmax = 0
if plotmaps:
    for idx,date in enumerate(reversed(dates)):
        month = int(date/100)
        day = date-month*100
        print('MO', month, 'DA', day)

        if (idx==0):
            vmax = daily_map(month,day, usemax=False, savename='_0',labels=False)
            vmax = daily_map(month,day, usemax=False, savename='_1', labels=True)
        else:
            if (idx==len(dates)-1):
                daily_map(month,day, usemax=vmax, savename='_1', labels=False)
                daily_map(month,day, usemax=vmax, savename='_0', labels=True)
            else:
                daily_map(month,day, usemax=vmax)

To create an animated gif, use imagemagick locally.  Currently using:

convert -delay 90 -loop 0 *png covid_la.gif


In [6]:
#Write out files
import glob
import os

#path = 'data'
#all_files = glob.glob(path + "/*.csv")

df = pd.read_csv('data/covid_0320.csv').set_index('city')

df = df.fillna('null').rename(columns={'count': '03/20/2020'})

In [8]:
dates = [323, 324, 325, 326, 327,328, 329, 330, 331, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 501, 502, 503, 504, 505, 506, 513, 515, 527]
for idx, date in enumerate(dates):
    strdate = str(date).zfill(4)
    month = int(date/100)
    day = str(date-month*100)
    filename = 'data/covid_'+strdate+'.csv'
    if (filename == 'data/covid_0322_s.csv'):
        continue
    print('Working on ',filename)

    df0 = pd.read_csv(filename).set_index('city') 

    df0 = df0.rename(columns={'count' : str(month)+'/'+day+'/2020'})
    df0 = df0.rename(columns={'rate' : 'Rate'+str(month)+'/'+day+'/2020'})
#    df0=df0.loc[df0.index.drop_duplicates()]
    df = pd.concat([df, df0], join='outer', axis=1)
    print(df.head())

    if (idx == len(dates)-1):
        print('My date is ', strdate)
        mymerged = nbd.set_index('name').join(df0)
        mymerged.to_file("./data/combined_reports_regs_"+strdate+".json",index=True, driver="GeoJSON")


Working on  data/covid_0323.csv
               03/20/2020  3/23/2020
Alhambra              3.0          4
Altadena              2.0          5
Arcadia               2.0          3
Baldwin Hills         1.0          3
Beverly Hills         5.0         12
Working on  data/covid_0324.csv
               03/20/2020  3/23/2020  3/24/2020
Alhambra              3.0        4.0        5.0
Altadena              2.0        5.0        5.0
Arcadia               2.0        3.0        4.0
Baldwin Hills         1.0        3.0        3.0
Beverly Hills         5.0       12.0       12.0
Working on  data/covid_0325.csv
               03/20/2020  3/23/2020  3/24/2020  3/25/2020
Alhambra              3.0        4.0        5.0        6.0
Altadena              2.0        5.0        5.0        5.0
Arcadia               2.0        3.0        4.0        4.0
Baldwin Hills         1.0        3.0        3.0        3.0
Beverly Hills         5.0       12.0       12.0       13.0
Working on  data/covid_0326.csv
        

               03/20/2020  3/23/2020  3/24/2020  3/25/2020  3/26/2020  \
Alhambra              3.0        4.0        5.0        6.0        8.0   
Altadena              2.0        5.0        5.0        5.0        6.0   
Arcadia               2.0        3.0        4.0        4.0        6.0   
Baldwin Hills         1.0        3.0        3.0        3.0        4.0   
Beverly Hills         5.0       12.0       12.0       13.0       15.0   

               3/27/2020  3/28/2020  3/29/2020  3/30/2020 3/31/2020  \
Alhambra             8.0       10.0       10.0       11.0        13   
Altadena             9.0       10.0       10.0       10.0        12   
Arcadia              6.0        8.0        9.0        9.0        10   
Baldwin Hills        5.0        7.0        9.0        9.0        11   
Beverly Hills       21.0       21.0       24.0       28.0        35   

              Rate3/31/2020 4/1/2020 Rate4/1/2020 4/2/2020 Rate4/2/2020  \
Alhambra              14.99       13        14.99       15 

               03/20/2020  3/23/2020  3/24/2020  3/25/2020  3/26/2020  \
Alhambra              3.0        4.0        5.0        6.0        8.0   
Altadena              2.0        5.0        5.0        5.0        6.0   
Arcadia               2.0        3.0        4.0        4.0        6.0   
Baldwin Hills         1.0        3.0        3.0        3.0        4.0   
Beverly Hills         5.0       12.0       12.0       13.0       15.0   

               3/27/2020  3/28/2020  3/29/2020  3/30/2020 3/31/2020  ...  \
Alhambra             8.0       10.0       10.0       11.0        13  ...   
Altadena             9.0       10.0       10.0       10.0        12  ...   
Arcadia              6.0        8.0        9.0        9.0        10  ...   
Baldwin Hills        5.0        7.0        9.0        9.0        11  ...   
Beverly Hills       21.0       21.0       24.0       28.0        35  ...   

              4/10/2020 Rate4/10/2020 4/11/2020 Rate4/11/2020 4/12/2020  \
Alhambra             32      

               03/20/2020  3/23/2020  3/24/2020  3/25/2020  3/26/2020  \
Alhambra              3.0        4.0        5.0        6.0        8.0   
Altadena              2.0        5.0        5.0        5.0        6.0   
Arcadia               2.0        3.0        4.0        4.0        6.0   
Baldwin Hills         1.0        3.0        3.0        3.0        4.0   
Beverly Hills         5.0       12.0       12.0       13.0       15.0   

               3/27/2020  3/28/2020  3/29/2020  3/30/2020 3/31/2020  ...  \
Alhambra             8.0       10.0       10.0       11.0        13  ...   
Altadena             9.0       10.0       10.0       10.0        12  ...   
Arcadia              6.0        8.0        9.0        9.0        10  ...   
Baldwin Hills        5.0        7.0        9.0        9.0        11  ...   
Beverly Hills       21.0       21.0       24.0       28.0        35  ...   

              4/20/2020 Rate4/20/2020 4/21/2020 Rate4/21/2020 4/22/2020  \
Alhambra             47      

               03/20/2020  3/23/2020  3/24/2020  3/25/2020  3/26/2020  \
Alhambra              3.0        4.0        5.0        6.0        8.0   
Altadena              2.0        5.0        5.0        5.0        6.0   
Arcadia               2.0        3.0        4.0        4.0        6.0   
Baldwin Hills         1.0        3.0        3.0        3.0        4.0   
Beverly Hills         5.0       12.0       12.0       13.0       15.0   

               3/27/2020  3/28/2020  3/29/2020  3/30/2020 3/31/2020  ...  \
Alhambra             8.0       10.0       10.0       11.0        13  ...   
Altadena             9.0       10.0       10.0       10.0        12  ...   
Arcadia              6.0        8.0        9.0        9.0        10  ...   
Baldwin Hills        5.0        7.0        9.0        9.0        11  ...   
Beverly Hills       21.0       21.0       24.0       28.0        35  ...   

              4/27/2020 Rate4/27/2020 4/28/2020 Rate4/28/2020 4/29/2020  \
Alhambra           75.0     8

               03/20/2020  3/23/2020  3/24/2020  3/25/2020  3/26/2020  \
Alhambra              3.0        4.0        5.0        6.0        8.0   
Altadena              2.0        5.0        5.0        5.0        6.0   
Arcadia               2.0        3.0        4.0        4.0        6.0   
Baldwin Hills         1.0        3.0        3.0        3.0        4.0   
Beverly Hills         5.0       12.0       12.0       13.0       15.0   

               3/27/2020  3/28/2020  3/29/2020  3/30/2020 3/31/2020  ...  \
Alhambra             8.0       10.0       10.0       11.0        13  ...   
Altadena             9.0       10.0       10.0       10.0        12  ...   
Arcadia              6.0        8.0        9.0        9.0        10  ...   
Baldwin Hills        5.0        7.0        9.0        9.0        11  ...   
Beverly Hills       21.0       21.0       24.0       28.0        35  ...   

              5/4/2020 Rate5/4/2020 5/5/2020 Rate5/5/2020 5/6/2020  \
Alhambra          87.0   100.32000

ValueError: Shape of passed values is (416, 91), indices imply (397, 91)

In [10]:
strdate = '0527'
df0 = pd.read_csv('data/covid_'+strdate+'.csv').set_index('city') 
mymerged = nbd.set_index('name').join(df0)
mymerged.to_file("./data/combined_reports_regs_"+strdate+".json",index=True, driver="GeoJSON")

In [9]:
lastcol = 'Rate'+str(month)+'/'+day+'/2020'
df[~df[lastcol].str.contains('suppressed', na=True)].sort_values(by=lastcol, ascending=False).to_csv(
        "./data/combined_reports_"+strdate+"_nototal.csv",
    index=True,
    encoding="utf-8"
)


#df.lastcol
#df[~df.C.str.contains("XYZ")].sort_values(by='Rate'+str(month)+'/'+day+'/2020', ascending=False)

KeyError: 'Rate5/27/2020'

In [None]:
day, month

#Write out files for Flourish plots

df.drop(['Total', 'Los Angeles - AGGREGATE']).to_csv(
#df[~df["index"].str.contains("Under Investigation") & ~df["index"].str.contains("Total") & ~df["index"].str.contains("AGGREGATE")].to_csv(
    "./data/combined_reports_"+strdate+"_nototal.csv",
    index=True,
    encoding="utf-8"
)





#df.fillna(0).T.to_csv(
#    "./data/combined_reports_"+strdate+".csv",
#    index=True,
#    encoding="utf-8"
#)


#mymerged = nbd.set_index('name').join(df)
#mymerged.to_file(
#    "./data/combined_reports_regs_"+strdate+".json",
#    index=True, driver="GeoJSON"
#)

#### Check merged output
pd.set_option('display.max_rows', None)
mymerged

In [None]:
#Separate test
df = pd.read_csv('data/covid_0330.csv')
merged = nbd.merge(df, left_on="name", right_on="city", indicator=True)
nbd.set_index('name')
df.set_index('city')
#df['city'].equals(nbd['name'])
pd.concat([nbd, df]).drop_duplicates(keep=False)

#Test out plotting a barchart
fig, ax = plt.subplots(1,1, figsize=(12, 20))
df = pd.read_csv('data/covid_0324.csv')
df.set_index('city', inplace=True, drop=True)
df.drop(['Under Investigation']).sort_values(by=['count'],ascending=1).plot(kind='barh', ax=ax, legend=False)
df['count'].max()