## 9.1 Follows previous assignments in importing crime data, as well as zip code data. Merges data sets depending on the correspondence of crime events to a particular region. Constructs a new dataframe consisting of Zip Code, Geometry, and Incident Count in 2019, then plots it in a folium choropleth map

In [1]:
import pandas as pd
from shapely.geometry import Point, Polygon, MultiPolygon
import geopandas as gpd
import numpy as np
import pyproj
import folium

zip_gpd = gpd.read_file("ZIP_Codes.geojson")

bcd_complete = pd.read_csv("bcd19-20.csv", low_memory=False, parse_dates=["OCCURRED_ON_DATE"])
bcd_complete.dropna(subset=['Lat','Long'], inplace=True)
bcd_complete = bcd_complete[(bcd_complete.Lat != 0) & (abs(bcd_complete.Lat) != 90) & 
                            (abs(bcd_complete.Long) != 90) & (bcd_complete.Long != 0)]

bcd_geo = gpd.GeoDataFrame(bcd_complete, crs=4326, geometry=gpd.points_from_xy(bcd_complete.Long,bcd_complete.Lat))
zip_gpd = zip_gpd.to_crs(4326)

bcd_zip = gpd.sjoin(bcd_geo,zip_gpd,predicate="within",how='right')
bcd_zip = bcd_zip[bcd_zip['YEAR'] == 2019]

crimes_zip = gpd.GeoDataFrame(columns=['Zip', 'Location', 'Crimes'], geometry='Location')
for row, objid in enumerate(list(set(bcd_zip['OBJECTID']))):
    bcd_slice = bcd_zip[bcd_zip['OBJECTID'] == objid]

    crimes_zip.loc[row,'Zip'] = str(bcd_slice.iloc[0]["ZIP5"])
    crimes_zip.loc[:,'Location'].loc[row] = bcd_slice.iloc[0]["geometry"]
    crimes_zip.loc[row,'Crimes'] = bcd_slice['INCIDENT_NUMBER'].count()

clon, clat = bcd_complete['Long'].mean(), bcd_complete['Lat'].mean()
m = folium.Map(location=(clat, clon), zoom_start=12, width=800, height=800)

# using choropleth means matching to geojson using string index
# 'Zip' is already a string, but certain zip codes repeat and are therefore
# not distinguishable, requiring some adjustment
c = crimes_zip.groupby(['Zip','Zip']).cumcount()
c = c.replace(0, '').astype(str)
crimes_zip["Zip"] += c

geoloc = gpd.GeoSeries(crimes_zip.set_index('Zip')['Location']).to_json()

folium.Choropleth(
    geo_data = geoloc,
    name = 'Choropleth',
    data = crimes_zip,
    columns = ['Zip','Crimes'],
    key_on = 'feature.id',
    fill_color = 'YlOrRd',
    fill_opacity = 0.5,
    line_opacity = 1,
    legend_name = 'Crime Count (2019)',
    smooth_factor=  0
).add_to(m)

display(m)

## 9.2 Downloads three csv's. Merges life expectancy and birth rate data by country code and year. Reformats the population data from wide to long then also merges it with prior data. Recodes columns and drops rows with any missing values. Plots animated bubble graph, with birth rate charted against life expectancy and size of bubble defined by total population over the course of 1960-2021.

In [2]:
import plotly.express as px

df_le = pd.read_csv('life-expectancy.csv')
df_br = pd.read_csv('crude-birth-rate.csv')
df_p = pd.read_csv('API_SP_POP_TOTL.csv',skiprows=4)

df_tot = pd.merge(df_le,df_br,left_on=['Code','Year'],right_on=['Code','Year'],how='left')

df_p = df_p.drop('Unnamed: 66',axis=1)
listcol = df_p.columns[4:66]
df_p = pd.melt(df_p, id_vars='Country Code', value_vars=listcol)
df_p = df_p.rename(columns={'Country Code':'Code', 'variable':'Year', 'value':'Total Population'})
df_p['Year'] = df_p['Year'].astype('int64')

df_tot = pd.merge(df_tot,df_p,left_on=['Code','Year'],right_on=['Code','Year'],how='left')
df_tot = df_tot.rename(columns={'Entity_x':'Country','Birth rate - sex: all - age: all - variant: estimates':'Birth rate'})
df_tot = df_tot.drop(columns='Entity_y')
df_tot = df_tot.dropna(how='any')

fig1 = px.scatter(df_tot,x="Birth rate", y="Life expectancy",animation_frame="Year", animation_group="Code",
 size="Total Population",color_continuous_scale=px.colors.sequential.Agsunset,range_color=(0,1500000000),
 hover_name="Country", color="Total Population",size_max=45,range_x=[0,60], range_y=[0,110]
 )
fig1.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 700
fig1.show()