In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Weather

Source donnée : https://www.ncei.noaa.gov/cdo-web/customoptions et https://www.infoclimat.fr/previsions-meteo/details/STA/07156/paris-montsouris.html pour les prévisions

PRCP = précipitation (en pouce)

SNWD = épaisseur de neige (snow depth) en pouce

TMAX = température maximale (en degré F)

NAME = nom de la station

In [None]:
weather = pd.read_csv("data/raw_data/weather.csv")
weather = weather.loc[weather["NAME"] == "PARIS MONTSOURIS, FR"]
weather["SNWD"] = weather["SNWD"].replace(np.NaN, 0.)
weather["DATE"] = pd.to_datetime(weather["DATE"])

#Remplacer les nans de TMAX à partir des valeurs de TMIN et TAVG
weather["TMAX Backup"] = 2*weather["TAVG"] - weather["TMIN"]
weather["TMAX"] = np.where(weather["TMAX"] == weather["TMAX"], weather["TMAX"], weather["TMAX Backup"])
weather["TMAX Backup 2"] = weather["TAVG"] * np.mean(weather["TMAX"]/weather["TAVG"])
weather["TMAX"] = np.where(weather["TMAX"] == weather["TMAX"], weather["TMAX"], weather["TMAX Backup 2"])

weather = weather.drop(columns=["STATION", "LATITUDE", "LONGITUDE", "ELEVATION", "PRCP_ATTRIBUTES", "SNWD_ATTRIBUTES", "TAVG_ATTRIBUTES", "TMAX_ATTRIBUTES", "TMIN_ATTRIBUTES", "TMAX Backup", "TMAX Backup 2", "TAVG", "TMIN"])
weather

In [None]:
weather_forcast = pd.DataFrame(
    [["PARIS MONTSOURIS, FR", pd.to_datetime("2024-12-06"), 0.08, 0., 54.],
     ["PARIS MONTSOURIS, FR", pd.to_datetime("2024-12-07"), 0.08, 0., 52.],
     ["PARIS MONTSOURIS, FR", pd.to_datetime("2024-12-08"), 0.04, 0., 41.],
     ["PARIS MONTSOURIS, FR", pd.to_datetime("2024-12-09"), 0.03, 0., 43.],
     ["PARIS MONTSOURIS, FR", pd.to_datetime("2024-12-10"), 0., 0., 43.]
     ],
    columns = ["NAME", "DATE", "PRCP", "SNWD", "TMAX"])
weather_forcast

In [None]:
weather_forcast.to_csv("data/weather_forcast.csv")

# Holidays

In [None]:
holidays = pd.read_csv("data/raw_data/holidays.csv")
holidays["date"] = pd.to_datetime(holidays["date"])
holidays

In [None]:
bank_holidays = pd.read_csv("data/raw_data/om-referentiel-jours-feries.csv", sep = ";")
bank_holidays["date"] = pd.to_datetime(bank_holidays["date"])
bank_holidays = bank_holidays.sort_values("date").reset_index().drop(columns = ["index", "année"])
bank_holidays = bank_holidays.set_index("date")

#Add all dates
bank_holidays = bank_holidays.reindex(index = pd.date_range(pd.to_datetime("2003-01-01"), pd.to_datetime("2024-12-20"), freq = pd.Timedelta(1, "day")))

# 0-1 format
bank_holidays["nom_jour_ferie"] = bank_holidays["férié"].copy()
bank_holidays["férié"] = np.where(bank_holidays["férié"] == bank_holidays["férié"], 1, 0)
bank_holidays.columns = ["ferie", "nom_jour_ferie"]
bank_holidays.index.names = ["date"]

bank_holidays

In [None]:
#Group holidays and bank holidays in a single array
holidays = pd.merge(holidays, bank_holidays.reset_index(), left_on = "date", right_on = "date", how="inner")
holidays = holidays.set_index("date").replace({True: 1, False: 0})
holidays

In [None]:
holidays.to_csv("data/holidays_bank_holidays.csv")