# Imports & settings

In [None]:
from altair_saver import save
import streamlit as st
import altair as alt
import folium
import pandas as pd
import numpy as np
import pickle
import time
import os

In [None]:
def load_processed_data(file_path):
    """
        Loads the processed datasets stored in pickle foramt, if they don't exist, it preprocesses the raw data.
    """

    if os.path.exists(file_path):
        with open(file_path, 'rb') as file:
            data = pickle.load(file)
        print(f"Preprocessed dataset {file_path} already exists. The dataset is loaded.")

        return data

    else:
        print(f"You have to run the preprocessing notebook before conduct any visualization.")

summer_collisions_2018 = load_processed_data('preprocessed_data/summer_collisions_2018.pkl')
summer_collisions_2020 = load_processed_data('preprocessed_data/summer_collisions_2020.pkl')
summer_weather_2018 = load_processed_data('preprocessed_data/summer_weather_2018.pkl')
summer_weather_2020 = load_processed_data('preprocessed_data/summer_weather_2020.pkl')

# Visualizations

In [None]:
alt.data_transformers.disable_max_rows()

In [None]:
# Combines the two datasets.
summer_collisions = pd.concat([summer_collisions_2018, summer_collisions_2020])

In [None]:
# Create a grouped bar chart.
alt.Chart(summer_collisions).mark_bar().encode(
    x=alt.X('DAY TYPE:N', axis=alt.Axis(title='Day Type', labelAngle=0)),
    y=alt.Y('count():Q', axis=alt.Axis(title='Number of collisions')),
    color=alt.Color('COVID-19 RESTRICTIONS:N', legend=alt.Legend(title='Dictated restrictions'))
).properties(
    title='Vehicle collisions and Covid-19 restrictions',
    width=200
)


In [None]:
# Extract the hour from the 'CRASH TIME' column
summer_collisions['Hour'] = pd.to_datetime(summer_collisions['CRASH TIME']).dt.hour

In [None]:
alt.Chart(summer_collisions).mark_bar().encode(
    x=alt.X('Hour:O', axis=alt.Axis(title='Hour of the day')),
    y=alt.Y('count():Q', axis=alt.Axis(title='Number of collisions')),
).properties(
    title='Distribution of vehicle collisions by hour of the day',
    width=600
)

In [None]:
# Group by ZIP code and calculate the total number of collisions and average coordinates in each ZIP code
zip_grouped = summer_collisions.groupby('ZIP CODE').agg({
    'LATITUDE': 'first',
    'LONGITUDE': 'mean',
    'CRASH TIME': 'count'
}).reset_index()

# Create a Folium map of NYC using OpenStreetMap tiles
nyc_map = folium.Map(location=[40.7128, -74.0060], tiles='OpenStreetMap', zoom_start=11)

# Add markers for each ZIP code with aggregated information
for index, row in zip_grouped.iterrows():
    folium.CircleMarker(
        location=[row['LATITUDE'], row['LONGITUDE']], 
        radius=row['CRASH TIME'] * 0.1,  # Adjust the radius based on the number of collisions
        fill=True, 
        color='blue'
    ).add_to(nyc_map)

# Save the Folium map as an HTML file
nyc_map.save('nyc_map_grouped.html')

In [None]:
zip_grouped.head()

In [None]:
# Melt the DataFrame to transform it into a long format
melted_data = pd.melt(summer_collisions, value_vars=['VEHICLE TYPE CODE 1', 'VEHICLE TYPE CODE 2', 'VEHICLE TYPE CODE 3', 'VEHICLE TYPE CODE 4', 'VEHICLE TYPE CODE 5'], var_name='VEHICLE TYPE')
melted_data.describe()

In [None]:
alt.Chart(melted_data).mark_bar().encode(
    x=alt.X('VEHICLE TYPE:N', axis=alt.Axis(title='Vehicle Type Code')),
    y=alt.Y('count():Q', axis=alt.Axis(title='Frequency')),
).properties(
    title='Frequency of Vehicle Type Codes in Collisions',
    width=600
)

In [None]:
grouped_data = summer_collisions.groupby('ZIP CODE').agg({'LATITUDE': 'mean', 'LONGITUDE': 'mean', 'CRASH TIME': 'count'}).reset_index()
grouped_data = grouped_data.rename(columns={'CRASH TIME': 'COLLISIONS'})

In [None]:
grouped_data.head()

In [None]:
# Scatter plot for aggregated car crashes
points = alt.Chart(grouped_data).mark_circle().encode(
    longitude='LONGITUDE:Q',
    latitude='LATITUDE:Q',
    size='COLLISIONS:Q',
    tooltip=['ZIP CODE:N', 'COLLISIONS:Q']
).properties(
    title='NYC Car Crashes by ZIP Code'
)

In [None]:
# Create a folium map centered around NYC with OSM tiles
nyc_map = folium.Map(location=[40.7128, -74.0060], zoom_start=12, tiles='OpenStreetMap')

In [None]:
altair_json = points.to_json()
# Add Altair chart as an HTML iframe to Folium map
folium.IFrame(html=altair_json, width=600, height=400).add_to(nyc_map)

# Save the combined map as an HTML file
nyc_map.save('nyc_combined_map.html')

# Open the HTML file in your web browser to visualize the map
import webbrowser
webbrowser.open('nyc_combined_map.html')