# Montreal Fire


## Data Acquisition

Once the whole data acquisition pipeline is completed. You need to manually upload the data file to HuggingFace and update the task's code to use the new version.


### Download

In [1]:
import pandas as pd

from datetime import datetime

# Load the two CSV files
# Source: https://www.donneesquebec.ca/recherche/dataset/vmtl-interventions-service-securite-incendie-montreal

# Download data
# ... past two years
df1 = pd.read_csv("https://donnees.montreal.ca/dataset/2fc8a2b9-1556-410e-a118-c46e97e9f19e/resource/71e86320-e35c-4b4c-878a-e52124294355/download/donneesouvertes-interventions-sim.csv")
# ... 2015 - 2022
df2 = pd.read_csv("https://donnees.montreal.ca/dataset/2fc8a2b9-1556-410e-a118-c46e97e9f19e/resource/005e4eb6-0377-45bf-a911-7077fd3b5ed0/download/donneesouvertes-interventions-sim_2015_2022.csv")

# Concatenate the dataframes
combined_df = pd.concat([df1, df2], ignore_index=True)

# Uniformize dates and convert to datetime
combined_df["CREATION_DATE_TIME"] = pd.to_datetime(combined_df["CREATION_DATE_TIME"].apply(lambda x: x.replace("/", "-").split("T")[0]))

# Remove present month (to avoid incomplete data) -- Optional
# XXX: In the current version of the data (August 2024), we included the current month since the day was 27
# combined_df = combined_df.loc[combined_df["CREATION_DATE_TIME"] < "2024-08-01"]

# Show some stats
print("Min date:", combined_df["CREATION_DATE_TIME"].min())
print("Max date:", combined_df["CREATION_DATE_TIME"].max())
print("Number of entries:", len(combined_df))

combined_df.head()

Min date: 2015-01-01 00:00:00
Max date: 2024-08-25 00:00:00
Number of entries: 1093883


Unnamed: 0,INCIDENT_NBR,CREATION_DATE_TIME,INCIDENT_TYPE_DESC,DESCRIPTION_GROUPE,CASERNE,NOM_VILLE,NOM_ARROND,DIVISION,NOMBRE_UNITES,MTM8_X,MTM8_Y,LONGITUDE,LATITUDE
0,1808,2023-01-07,Premier répondant,1-REPOND,73,Montréal,Saint-Laurent,3,1.0,291515.4,5042217.3,-73.670048,45.519748
1,11457,2023-02-05,Appel de Cie de détection,Alarmes-incendies,25,Montréal,Ville-Marie,6,4.0,298482.0,5040264.1,-73.580848,45.50227
2,16508,2023-02-19,Premier répondant,1-REPOND,64,Montréal,Lachine,2,1.0,290911.5,5032451.0,-73.677501,45.431855
3,18702,2023-02-26,Ascenseur,SANS FEU,30,Montréal,Le Plateau-Mont-Royal,6,7.0,299264.4,5041129.8,-73.570845,45.510067
4,18705,2023-02-26,Premier répondant,1-REPOND,34,Montréal,Côte-des-Neiges-Notre-Dame-de-Grâce,3,1.0,295041.2,5038167.0,-73.624835,45.483359


### Preprocessing

In [2]:
# Regroup all kinds of nautical rescues
combined_df["INCIDENT_TYPE_DESC"] = combined_df["INCIDENT_TYPE_DESC"].apply(lambda x: "Sauvetage Nautique" if "sauv. nautique" in x.lower() else x)

# Convert to english
combined_df["INCIDENT_TYPE_DESC"] = combined_df["INCIDENT_TYPE_DESC"].apply(lambda x: x.replace("Déchets en feu", "Trash on fire")
                                                                                       .replace("Sauvetage Nautique", "Nautical rescue")
                                                                                       .replace("Feu de champ *", "Field fire")
                                          )

### Save to disk

In [4]:
combined_df.to_csv(f"montreal_fire_{datetime.now().strftime('%Y-%m-%d')}.csv")