In [1]:
import pandas as pd

city_df = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-place-totals.csv")
city_df.x = city_df.x.abs() * -1  # Correcting a mistake in longitude
city_df.x = city_df.x.fillna(0)
city_df.y = city_df.y.fillna(0)
city_df

Unnamed: 0,date,county,fips,place,confirmed_cases,note,x,y
0,2020-03-16,Los Angeles,37,Alhambra,2,,-118.135504,34.083961
1,2020-03-16,Los Angeles,37,Arcadia,1,,-118.037297,34.134186
2,2020-03-16,Los Angeles,37,Beverly Hills,1,,-118.402109,34.078543
3,2020-03-16,Los Angeles,37,Boyle Heights,5,,-118.205330,34.038150
4,2020-03-16,Los Angeles,37,Carson,1,,-118.255878,33.837391
...,...,...,...,...,...,...,...,...
32856,2020-05-16,Tulare,107,Woodlake,12,,-119.096931,36.413681
32857,2020-05-16,Yolo,113,Davis,21,,-121.738056,38.553889
32858,2020-05-16,Yolo,113,West Sacramento,62,,-121.530278,38.580556
32859,2020-05-16,Yolo,113,Winters and unincorporated,12,,-121.970833,38.525000


New python package for finding zipcode called uszipcode
Can find zipcode based on coordinates
https://pypi.org/project/uszipcode/

In [2]:
from uszipcode import SearchEngine
search = SearchEngine(simple_zipcode=True)
result = search.by_coordinates(38.678611, -121.773333, radius=30, returns=1)
result[0]

SimpleZipcode(zipcode='95776', zipcode_type='Standard', major_city='Woodland', post_office_city='Woodland, CA', common_city_list=['Woodland'], county='Yolo County', state='CA', lat=38.7, lng=-121.7, timezone='Pacific', radius_in_miles=11.0, area_code_list=['530'], population=21902, population_density=237.0, land_area_in_sqmi=92.43, water_area_in_sqmi=1.12, housing_units=6824, occupied_housing_units=6464, median_home_value=248000, median_household_income=61599, bounds_west=-121.781382, bounds_east=-121.629202, bounds_north=38.803028, bounds_south=38.553296)

Select unique cities and look for zip code in a new table

In [3]:
city_data_df = pd.DataFrame()
city_data_df = pd.concat([city_df.county, city_df.place, city_df.x, city_df.y], axis=1)
city_data_df = city_data_df.drop_duplicates()

def search_zip(df):
    # If place name contains zip code, returns the zip code directly
    if df.place[0:5].isnumeric():
        return df.place[0:5]
    if df.y == 0:
        return 0
    result = search.by_coordinates(df.y, df.x, radius=30, returns=1)
    return int(result[0].zipcode)

city_data_df['zip'] = city_data_df.apply(search_zip, axis=1)
city_data_df = city_data_df.reset_index(drop=True)
city_data_df['city_id'] = city_data_df.index
city_data_df

Unnamed: 0,county,place,x,y,zip,city_id
0,Los Angeles,Alhambra,-118.135504,34.083961,91803,0
1,Los Angeles,Arcadia,-118.037297,34.134186,91006,1
2,Los Angeles,Beverly Hills,-118.402109,34.078543,90212,2
3,Los Angeles,Boyle Heights,-118.205330,34.038150,90033,3
4,Los Angeles,Carson,-118.255878,33.837391,90745,4
...,...,...,...,...,...,...
944,El Dorado,"Diamond Springs, El Dorado",-120.815000,38.694722,95619,944
945,Fresno,Caruthers,-119.834562,36.542564,93609,945
946,Los Angeles,East Pasadena,-118.082551,34.161732,91024,946
947,Los Angeles,San Pasqual,-118.102533,34.139229,91775,947


In [4]:
city_df = pd.merge(city_df, city_data_df, "inner", on=['county', 'place', 'x', 'y'])
city_df

Unnamed: 0,date,county,fips,place,confirmed_cases,note,x,y,zip,city_id
0,2020-03-16,Los Angeles,37,Alhambra,2,,-118.135504,34.083961,91803,0
1,2020-03-17,Los Angeles,37,Alhambra,1,,-118.135504,34.083961,91803,0
2,2020-03-18,Los Angeles,37,Alhambra,1,,-118.135504,34.083961,91803,0
3,2020-03-19,Los Angeles,37,Alhambra,2,,-118.135504,34.083961,91803,0
4,2020-03-20,Los Angeles,37,Alhambra,3,,-118.135504,34.083961,91803,0
...,...,...,...,...,...,...,...,...,...,...
32856,2020-05-13,Los Angeles,37,San Pasqual,1,,-118.102533,34.139229,91775,947
32857,2020-05-14,Los Angeles,37,San Pasqual,1,,-118.102533,34.139229,91775,947
32858,2020-05-15,Los Angeles,37,San Pasqual,1,,-118.102533,34.139229,91775,947
32859,2020-05-16,Los Angeles,37,San Pasqual,1,,-118.102533,34.139229,91775,947


Compute daily cases, rate and rate SMA7 of each city

In [5]:
import numpy as np

# Suppress warnings
pd.options.mode.chained_assignment = None

city_id = city_df.city_id.unique()

for i in city_id:
    temp_df = city_df[city_df.city_id == i]
    temp_df['daily_cases'] = temp_df['confirmed_cases'].diff().fillna(0)
    temp_df['rate'] = temp_df['confirmed_cases'].pct_change().fillna(0)
    temp_df['rate7day'] = temp_df['rate'].rolling(window=7).mean().fillna(0)
    temp_df['ratio7day'] = (temp_df.rate / temp_df.rate7day).fillna(0)
    temp_df = temp_df.replace(np.inf, 0)
    temp_df = temp_df.replace(-np.inf, 0)
    if i == 0:
        new_df = temp_df
    else:
        new_df = pd.concat([new_df, temp_df])
new_df

Unnamed: 0,date,county,fips,place,confirmed_cases,note,x,y,zip,city_id,daily_cases,rate,rate7day,ratio7day
0,2020-03-16,Los Angeles,37,Alhambra,2,,-118.135504,34.083961,91803,0,0.0,0.0,0.0,0.0
1,2020-03-17,Los Angeles,37,Alhambra,1,,-118.135504,34.083961,91803,0,-1.0,-0.5,0.0,0.0
2,2020-03-18,Los Angeles,37,Alhambra,1,,-118.135504,34.083961,91803,0,0.0,0.0,0.0,0.0
3,2020-03-19,Los Angeles,37,Alhambra,2,,-118.135504,34.083961,91803,0,1.0,1.0,0.0,0.0
4,2020-03-20,Los Angeles,37,Alhambra,3,,-118.135504,34.083961,91803,0,1.0,0.5,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32856,2020-05-13,Los Angeles,37,San Pasqual,1,,-118.102533,34.139229,91775,947,0.0,0.0,0.0,0.0
32857,2020-05-14,Los Angeles,37,San Pasqual,1,,-118.102533,34.139229,91775,947,0.0,0.0,0.0,0.0
32858,2020-05-15,Los Angeles,37,San Pasqual,1,,-118.102533,34.139229,91775,947,0.0,0.0,0.0,0.0
32859,2020-05-16,Los Angeles,37,San Pasqual,1,,-118.102533,34.139229,91775,947,0.0,0.0,0.0,0.0


In [6]:
new_df.to_csv("city_data.csv", index=False)

Merging medicare data by zip

In [7]:
medicare_df = pd.read_csv('output_medicare_group_sumavg.csv')
new_df = pd.merge(new_df, medicare_df, how='inner', on='zip')
new_df


Unnamed: 0,date,county,fips,place,confirmed_cases,note,x,y,zip,city_id,...,PCT_BEN_Chronic_Obstructive_Pulmonary_Disease,PCT_BEN_Depression,PCT_BEN_Diabetes,PCT_BEN_Hyperlipidemia,PCT_BEN_Hypertension,PCT_BEN_Ischemic_Heart_Disease,PCT_BEN_Osteoporosis,PCT_BEN_Rheumatoid_Arthritis_Osteoarthritis,PCT_BEN_Schizophrenia_Other_Psychotic_Disorders,PCT_BEN_Stroke
0,2020-03-16,Los Angeles,37,Alhambra,2,,-118.135504,34.083961,91803,0,...,16.043478,20.130435,40.956522,48.173913,61.478261,37.913043,21.173913,44.086957,3.260870,3.565217
1,2020-03-17,Los Angeles,37,Alhambra,1,,-118.135504,34.083961,91803,0,...,16.043478,20.130435,40.956522,48.173913,61.478261,37.913043,21.173913,44.086957,3.260870,3.565217
2,2020-03-18,Los Angeles,37,Alhambra,1,,-118.135504,34.083961,91803,0,...,16.043478,20.130435,40.956522,48.173913,61.478261,37.913043,21.173913,44.086957,3.260870,3.565217
3,2020-03-19,Los Angeles,37,Alhambra,2,,-118.135504,34.083961,91803,0,...,16.043478,20.130435,40.956522,48.173913,61.478261,37.913043,21.173913,44.086957,3.260870,3.565217
4,2020-03-20,Los Angeles,37,Alhambra,3,,-118.135504,34.083961,91803,0,...,16.043478,20.130435,40.956522,48.173913,61.478261,37.913043,21.173913,44.086957,3.260870,3.565217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29811,2020-05-13,Fresno,19,Caruthers,5,,-119.834562,36.542564,93609,945,...,0.000000,0.000000,24.000000,45.000000,71.000000,31.000000,0.000000,57.000000,0.000000,0.000000
29812,2020-05-14,Fresno,19,Caruthers,5,,-119.834562,36.542564,93609,945,...,0.000000,0.000000,24.000000,45.000000,71.000000,31.000000,0.000000,57.000000,0.000000,0.000000
29813,2020-05-15,Fresno,19,Caruthers,5,,-119.834562,36.542564,93609,945,...,0.000000,0.000000,24.000000,45.000000,71.000000,31.000000,0.000000,57.000000,0.000000,0.000000
29814,2020-05-16,Fresno,19,Caruthers,5,,-119.834562,36.542564,93609,945,...,0.000000,0.000000,24.000000,45.000000,71.000000,31.000000,0.000000,57.000000,0.000000,0.000000
