# VI: Second Practical Work

**Authors:** Gerard Comas & Marc Franquesa.


## Data Processing
Processing all datasets in this notebook

In [None]:
# Initial imports
import pandas as pd
import numpy as np
import altair as alt
import geopandas as gpd
import warnings
from shapely.geometry import shape, Point
import math

warnings.simplefilter(action="ignore", category=FutureWarning)

### Collisions dataset

In [None]:
# read the dataset
collisions = pd.read_csv("./original-data/collisions.csv")
collisions["CRASH DATETIME"] = pd.to_datetime(collisions["CRASH DATETIME"])

collisions.head()

In [None]:
# select only the values from 2018
collisions = collisions[collisions["CRASH DATETIME"] < "2019-01-01"] 

# select only the columns we need
collisions = collisions[["CRASH DATETIME", "LATITUDE", "LONGITUDE", "ORIGINAL VEHICLE"]]

In [None]:
categories = {
    "Taxi": ["Taxi"],
    "Ambulance": ["Ambulance", "AMBUL", "Ambul", "ambul", "AMB", "AMBU", "AMBULANCE"],
    "Fire truck": ["Fire", "FIRET", "FIRE", "FDNY", "fdny", "FD tr", "fd tr", "firet", "fire"],
}

reverse_categories = {val: key for key, values in categories.items() for val in values}

# Assume df is your DataFrame and 'column_name' is the column you want to classify
collisions["VEHICLE"] = collisions["ORIGINAL VEHICLE"].map(reverse_categories)

collisions = collisions.dropna(subset=["VEHICLE"])

collisions = collisions[["CRASH DATETIME", "LATITUDE", "LONGITUDE", "VEHICLE"]]

In [None]:
collisions.head()

In [None]:
# Lets add emojis
vehicle_emojis = {
    "Taxi": "🚕",
    "Ambulance": "🚑",
    "Fire truck": "🚒",
}

collisions["VEHICLE EMOJI"] = collisions["VEHICLE"].map(vehicle_emojis)

collisions.head()

In [None]:
# Add day information
collisions["CRASH DAY"] = collisions["CRASH DATETIME"].dt.strftime("%Y-%m-%d")
collisions["CRASH WEEKDAY"] = collisions["CRASH DATETIME"].dt.day_name()
collisions["CRASH WEEK NUMBER"] = collisions["CRASH DATETIME"].dt.isocalendar().week
collisions.head()

### Weather dataset

In [None]:
weather = pd.read_csv("./original-data/weather2018.csv")

In [None]:
weather = weather[["datetime", "icon"]]
weather["WEATHER"] = weather["icon"]
weather["WEATHER"].unique()

In [None]:
weather_emojis = {
    "rain" : "🌧",
    "clear-day" : "☀️",
    "cloudy" : "☁️",
    "partly-cloudy-day" : "⛅️",
}

weather["WEATHER EMOJI"] = weather["WEATHER"].map(weather_emojis)

In [None]:
weather.head()

### Collisions + Weather

In [None]:
# merge the collisions and weather dataframes on the "CRASH DAY" and "datetime" columns
collisions_weather = pd.merge(collisions, weather, left_on="CRASH DAY", right_on="datetime")

### NY Map

In [None]:
map_data = gpd.read_file(f"./original-data/map.geojson")

collisions_weather["BOROUGH"] = collisions_weather.apply(lambda x: [-1] if pd.isnull(x["LATITUDE"]) or pd.isnull(x["LONGITUDE"]) else np.where(map_data.contains(Point(x["LONGITUDE"], x["LATITUDE"])))[0], axis=1)

collisions_weather["BOROUGH"] = collisions_weather["BOROUGH"].apply(lambda x: -1 if len(x) == 0 else x[0]).replace(-1, np.nan)

collisions_weather.head()

In [None]:
map_data["COLLISIONS"] = collisions_weather.groupby(["BOROUGH"]).size()

map_data.head()

In [None]:
boroughs = {
    0.0: "Staten Island",
    1.0: "Bronx",
    2.0: "Queens",
    3.0: "Manhattan",
    4.0: "Brooklyn"
}

collisions_weather["BOROUGH"] = collisions_weather["BOROUGH"].map(boroughs)

collisions_weather = collisions_weather[["CRASH DATETIME", "CRASH DAY", "CRASH WEEK NUMBER", "CRASH WEEKDAY", "BOROUGH", "VEHICLE", "VEHICLE EMOJI", "WEATHER", "WEATHER EMOJI"]]

collisions_weather.to_csv("./processed-data/collisions_weather.csv", index=False)

In [None]:
# Convert to epsh = 4326
# map_data["geometry"] = map_data["geometry"].to_crs(epsg=4326)

map_data["AREA"] = map_data["geometry"].area

map_data["AREA PROPORTION"] = map_data["AREA"] / map_data["AREA"].sum()

# Value found online (wikipedia)
map_data["AREA KM2"] = 783.84 * map_data["AREA PROPORTION"]

map_data["COLLISIONS / KM2"] = map_data["COLLISIONS"] / map_data["AREA KM2"]

In [None]:
map_data.to_file("processed-data/map.geojson", driver="GeoJSON")