In [None]:
import sys
sys.path.append('../')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from config import const



plt.style.use('seaborn-v0_8')

year = 2017

sample_address = pd.read_pickle(f'../data/processed/address.pickle')
state_abbr = pd.read_csv(f'../data/raw/state.csv')

sample_address = pd.merge(sample_address, state_abbr, left_on='State', right_on='Abbr', how='left')
sample_address.drop('Abbr', axis=1, inplace=True)
sample_address = sample_address.rename(columns={'Full Name': 'State_Name'})
sample_address

In [None]:
df_count_by_state = (
    sample_address
    .groupby(['State', 'State_Name'])
    .agg(
        count = ('ID', 'count'),
    )
    .sort_values('count', ascending=False)
    .reset_index()
)
df_count_by_state = df_count_by_state[:20]

fig, ax1 = plt.subplots(figsize=(15,6))
pal = sns.color_palette(f"blend:{const.colors[0]},{const.colors[-1]}", len(df_count_by_state))
pal = pd.Series(pal)
rank = df_count_by_state['count'].rank(ascending=False, method='first').astype(int) - 1
sns.barplot(x='State_Name', y='count', data=df_count_by_state, ax=ax1, palette=pal[::-1][rank].tolist(), hue='State_Name',legend=False)

ax1.set_xticks(range(len(df_count_by_state['State_Name'])))
ax1.set_xticklabels(df_count_by_state['State_Name'], rotation=45, ha='right')
ax1.tick_params(axis='x', labelsize=20)
ax1.tick_params(axis='y', labelsize=20)

plt.xlabel('', fontsize=20)
plt.ylabel('', fontsize=20)
plt.show()

In [None]:
sample_coordinates = pd.read_pickle(f'../data/processed/coordinates.pickle')

sample_address_coordinates = pd.merge(sample_coordinates, sample_address, on='ID', how='inner')
sample_address_coordinates

In [None]:
cities_by_accidents = sample_address_coordinates['City'].value_counts().reset_index()

df_gb_state_city = (
    sample_address_coordinates
    .groupby(['State','State_Name','City'])
    .agg(
        count_state_city=('ID', 'count')
    )
    .reset_index()
    .sort_values('count_state_city', ascending=False)
)
df_gb_state_city

In [None]:
df_top20_city = (
    df_gb_state_city
    .groupby('City')
    .agg(
        count_city=('count_state_city', 'sum')
    )
    .reset_index()
    .sort_values('count_city', ascending=False)
).head(20)

fig, ax1 = plt.subplots(figsize=(15,6))
pal = sns.color_palette(f"blend:{const.colors[0]},{const.colors[-1]}", len(df_top20_city))
pal = pd.Series(pal)
rank = df_top20_city['count_city'].rank(ascending=False, method='first').astype(int) - 1
sns.barplot(x='City', y='count_city', data=df_top20_city, ax=ax1, palette=pal[::-1][rank].tolist(), hue='City',legend=False)

ax1.tick_params(axis='x', labelsize=20)
ax1.tick_params(axis='y', labelsize=20)

ax1.set_xticks(range(len(df_top20_city['City'])))
ax1.set_xticklabels(df_top20_city['City'], rotation=45, ha='right')

plt.xlabel('', fontsize=20)
plt.ylabel('', fontsize=20)
plt.show()

In [None]:
sample_address_coordinates_ca = sample_address_coordinates[sample_address_coordinates['State'] == 'CA']
df_gb_county_ca = (
    sample_address_coordinates_ca
    .groupby(['County'])
    .agg(
        count=('ID', 'count')
    )
    .reset_index()
    .sort_values('count', ascending=False)
).head(10)

fig, ax1 = plt.subplots(figsize=(15,6))
pal = sns.color_palette(f"blend:{const.colors[0]},{const.colors[-1]}", len(df_gb_county_ca))
pal = pd.Series(pal)
rank = df_gb_county_ca['count'].rank(ascending=False, method='first').astype(int) - 1
sns.barplot(x='County', y='count', data=df_gb_county_ca, ax=ax1, palette=pal[::-1][rank].tolist(), hue='County',legend=False)

ax1.tick_params(axis='x', labelsize=20)
ax1.tick_params(axis='y', labelsize=20)

ax1.set_xticks(range(len(df_gb_county_ca['County'])))
ax1.set_xticklabels(df_gb_county_ca['County'], rotation=45, ha='right')

plt.xlabel('', fontsize=20)
plt.ylabel('', fontsize=20)
plt.show()
# ax = xx[:10].plot(x='County', y='count', kind='bar', rot=45)
# aa = ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

In [None]:
df_top10_street = (
    sample_address_coordinates
    .groupby('Street')
    .agg(
        count=('ID', 'count')
    )
    .reset_index()
    .sort_values('count', ascending=False)
).head(20)['Street']
df_top10_street

In [None]:
import plotly.express as px

fig = px.density_mapbox(
    sample_address_coordinates[(sample_address_coordinates['Street'].isin(df_top10_street))],
    lat='Start_Lat',
    lon='Start_Lng',
    hover_data=['State', 'County', 'City', 'Street'],
    radius=10,
    zoom=3.5,
    height= 600,
)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
sample_address_coordinates_ca = sample_address_coordinates[(sample_address_coordinates['State'] == 'CA') & (sample_address_coordinates['County'] == 'Los Angeles')]
df_top10_street_ca = (
    sample_address_coordinates_ca[~(sample_address_coordinates_ca['Street'].isin(df_top10_street))]
    .groupby('Street')
    .agg(
        count=('ID', 'count')
    )
    .reset_index()
    .sort_values('count', ascending=False)
).head(10)['Street']
df_top10_street_ca


fig = px.density_mapbox(
    sample_address_coordinates_ca[(sample_address_coordinates_ca['Street'].isin(df_top10_street_ca))],
    lat='Start_Lat',
    lon='Start_Lng',
    hover_data=['State', 'County', 'City', 'Street'],
    radius=10,
    zoom=8,
    height= 600,
)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
sample_address_coordinates_ca = sample_address_coordinates[(sample_address_coordinates['State'] == 'CA') ]
df_top10_street_ca = (
    sample_address_coordinates_ca[~(sample_address_coordinates_ca['Street'].isin(df_top10_street))]
    .groupby('Street')
    .agg(
        count=('ID', 'count')
    )
    .reset_index()
    .sort_values('count', ascending=False)
).head(10)['Street']
df_top10_street_ca


fig = px.density_mapbox(
    sample_address_coordinates_ca[(sample_address_coordinates_ca['Street'].isin(df_top10_street_ca))],
    lat='Start_Lat',
    lon='Start_Lng',
    hover_data=['State', 'County', 'City', 'Street'],
    radius=10,
    zoom=8,
    height= 600,
)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()