In [None]:
import pandas as pd
import numpy as np

## Get ids, labels, some road features

In [None]:
accident_df = pd.read_csv("accidents-route.csv")

In [None]:
accident_df.columns

In [None]:
accident_df.head()

In [None]:
infraction_types = accident_df["code-infraction"].unique()
infraction_types

We will only try to predict collisions.

In [None]:
collisions = ['Collision matérielle de 2 000 $ et -',
       'Collision matérielle de + de 2 000 $', 'Collision avec blessé',
       'Collision mortelle', 'Collision hors route de + de 2 000 $',
       'Collision hors route avec blessé',
       'Collision hors route de 2 000 $ et -',
       'Collision mortelle hors route']

collisions_df = accident_df[accident_df["code-infraction"].isin(collisions)]

In [None]:
print(collisions_df.shape)
collisions_df.head(3)

In [None]:
collisions_2018 = collisions_df[collisions_df["annee-infraction"].isin([2015,2016,2017,2018,2019])]
print(collisions_2018.shape)
collisions_2018.head(3)

In [None]:
collisions_2018.columns

In [None]:
from pyexpat import features


features = ["code-infraction","mois-infraction","type-rue","rue","type-rue-2","annee-infraction"]
collisions_2018 = collisions_2018[features].dropna()

In [None]:
collisions_2018.head()

We now want to find how many collisions happen on a particular road in a given year and month.

In [None]:
type_rue = collisions_2018["type-rue-2"].unique()
rues = collisions_2018["rue"].unique()

In [None]:
collisions_2018["id"] = collisions_2018["rue"] + "@" + collisions_2018["annee-infraction"].astype(str) + "@" + collisions_2018["mois-infraction"].astype(str)

In [None]:
uniques = collisions_2018["id"].unique()

In [None]:
# count = collisions_2018["id"].value_counts()
_count = collisions_2018.groupby(["id"])["id"].count()


In [None]:
months = [int(i.split("@")[2]) for i in uniques]
year = [int(i.split("@")[1]) for i in uniques]
road = [i.split("@")[0] for i in uniques]
road_type = [collisions_2018[collisions_2018["id"] == i]["type-rue"].iloc[0] for i in uniques]

In [None]:
count = pd.DataFrame({"id":uniques,"count":_count[uniques],"month":months,"road":road,"road_type":road_type,"year":year})
count.reset_index(drop=True, inplace=True)
count.head()

In [None]:
count.shape

## Getting speed limits in the df

---


In [None]:
speed_raw = pd.read_csv("limite-vitesse-2014.csv")
print(speed_raw.shape)
speed_raw.head(5)

In [None]:
odonymes = speed_raw["ODONYME"].unique()

In [None]:
lim_speed_by_od = {}
for index,row in speed_raw.iterrows():
    if row[8] not in lim_speed_by_od.keys():
        lim_speed_by_od[row[8]] = row[-1]

In [None]:
set(lim_speed_by_od.keys()) == set(odonymes)

In [None]:
check = []
welp = []
matches_dict = {}
for i in rues:
    for j in odonymes:
        if i.lower() in j.lower():
            check.append(True)
            welp.append(True)
            matches_dict[i] = lim_speed_by_od[j]
            break
    check.append(False)

In [None]:
count["speed_limit"] = count["road"].map(matches_dict)
count["speed_limit"] = count["speed_limit"].fillna(0)

In [None]:
count.head()

In [None]:
count = count.dropna()

In [None]:
count

## Add weather data
---
The weather data was compiled in another notebook so we only have to match with the right months and years.

In [None]:
weather_raw = pd.read_csv("weather_compilation.csv")
weather_raw.head(5)
weather_raw["month"] = weather_raw["month"].astype(int)
weather_raw["year"] = weather_raw["year"].astype(int)

In [None]:
weather_2018 = weather_raw[weather_raw["year"].isin([2015,2016,2017,2018,2019])]
weather_2018["month_year"] = weather_2018["month"].astype(str) + "_" +weather_2018["year"].astype(str)
weather_2018

In [None]:
count["month_year"] = count["month"].astype(str) +  "_" + count["year"].astype(str)

In [None]:
count

In [None]:
for i in ["temp","rain","snow"]:
    d = {i:j for i,j in zip(weather_2018["month_year"],weather_2018[i])}
    count[i] = count["month_year"].map(d)

In [None]:
count

In [None]:
count = count.dropna()

In [None]:
count.to_csv("features_label2.csv")

In [None]:
len(count["speed_limit"].unique())