# Datathon BCG GAMMA 

### Librairies & Paramètres

In [1]:
import pandas as pd
import numpy as np
from fbprophet import Prophet

In [2]:
START_FORECAST = pd.Timestamp("2020-12-11 01:00:00") # date de début de prédiction
END_FORECAST = pd.Timestamp("2020-12-16 23:00:00") # date de fin de prédiction

PARAMS = { # paramètres à utiliser pour chaque time series
    "champs": {
        "debit": {
            "use_ext": True,
            "seasonality": "additive",
            "training_start": pd.Timestamp("2020-09-01 00:00:00")
        },
        "taux": {
            "use_ext": True,
            "seasonality": "multiplicative",
            "training_start": pd.Timestamp("2020-01-01 00:00:00")
        }
    },
    "conv": {
        "debit": {
            "use_ext": True,
            "seasonality": "additive",
            "training_start": pd.Timestamp("2020-03-01 00:00:00")
        },
        "taux": {
            "use_ext": False,
            "seasonality": "multiplicative",
            "training_start": pd.Timestamp("2020-05-01 00:00:00")
        }
    },
    "sts": {
        "debit": {
            "use_ext": True,
            "seasonality": "additive",
            "training_start": pd.Timestamp("2020-03-01 00:00:00")
        },
        "taux": {
            "use_ext": True,
            "seasonality": "multiplicative",
            "training_start": pd.Timestamp("2020-03-01 00:00:00")
        }
    }
}

### Import des données

#### En local

In [3]:
dataframes = {arc: pd.read_csv(f"data/processed_{arc}.csv") for arc in ["champs", "conv", "sts"]}

#### Sur Google Colab

##### a. Import des fichiers

Importer les fichiers "processed_champs.csv", "processed_conv.csv" et "processed_sts.csv" du dossier data.

In [7]:
from google.colab import files
uploaded = files.upload()

ModuleNotFoundError: No module named 'google.colab'

##### b. Lecture des données

In [3]:
dataframes = {arc: pd.read_csv(f"processed_{arc}.csv") for arc in ["champs", "conv", "sts"]}

### Modèle

In [4]:
class ProphetModel():

    """ Prédicteur basé sur le modèle Prophet de Facebook."""

    def __init__(self, start_train, start_pred, end_pred, use_ext, seasonality):
        """
        Arg(s):
         - start_train: <pandas.Timestamp> date de début d'entraînement;        
         - start_pred: <pandas.Timestamp> date de début de prédiction;
         - end_pred: <pandas.Timestamp> date de fin de prédiction;
         - use_ext: <bool> utiliser les variables exogènes ou non;
         - seasonality: <str> type de saisonnalité à utiliser ("additive" ou "multiplicative").
        """
        self.start_train = start_train
        self.start_pred = start_pred 
        self.end_pred = end_pred
        self.use_ext = use_ext
        self.prophet = Prophet(seasonality_mode=seasonality)
    

    def prepare(self, data, target, date_column="date_heure"):
        """
        Prépare les données pour le modèle.
        -------
        Arg(s):
         - data: <pandas.DataFrame> données de l'arc à prédire;
         - target: <str> quantité à prédire;
         - date_column: <str> colonne contenant les dates.
        """
        self.target = target
        if not self.use_ext:
            self.df = data[[date_column, target]]
            self.df.date_heure = pd.to_datetime(self.df.date_heure)
            self.df.rename({"date_heure": "ds", target: "y"}, axis=1, inplace=True)
        else:
            self.df = data.drop(["Unnamed: 0", "date", "debit"*(target!="debit") + "taux"*(target!="taux")], axis=1)
            self.df.date_heure = pd.to_datetime(self.df.date_heure)
            self.df.rename({"date_heure": "ds", target: "y", "holidays": "holidays_"}, axis=1, inplace=True)
        self.df.fillna(0, inplace=True)
        self.df_train = self.df.loc[(self.df.ds>=self.start_train) & (self.df.ds<self.start_pred)]
        self.df_test = self.df.loc[(self.df.ds>=self.start_pred) & (self.df.ds<=self.end_pred)]
    

    def fit(self):
        if self.use_ext:
            for col in self.df_train.columns:
                if col not in ["ds", "y"]:
                    self.prophet.add_regressor(col)
        self.prophet.fit(self.df_train)
    
    
    def predict(self):
        future = self.df_test.drop("y", axis=1)
        self.forecast = self.prophet.predict(future)
        self.y_pred = self.forecast.loc[(self.forecast.ds>=self.start_pred) & (self.forecast.ds<=self.end_pred)][["ds", "yhat"]].rename({"yhat": self.target}, axis=1)
        self.y_pred.loc[self.y_pred[self.target]<0, self.target] = 0
        self.y_pred.set_index("ds", inplace=True)

        return self.y_pred

    

### Prédictions

In [5]:
def run_forecast(start=START_FORECAST, end=END_FORECAST, params=PARAMS):
    """
    Réalise la prédiction pour chaque arc sur la période souhaitée et avec les paramètres spécifiés.
    """

    output = pd.DataFrame()

    for arc in ["champs", "conv", "sts"]:
        results = pd.DataFrame({"Arc": arc, "Datetime": pd.date_range(start, end, freq="H")})

        for target in ["debit", "taux"]:
            print(f"\nForecasting '{target}' for '{arc}'")
            param = params[arc][target]
            model = ProphetModel(param["training_start"], start, end, param["use_ext"], param["seasonality"])
            model.prepare(dataframes[arc], target)
            model.fit()
            y_pred = model.predict()
            results[target] = y_pred.reset_index()[target]
        
        output = pd.concat([output, results], axis=0)

    output.rename({"debit": "Débit horaire", "taux": "Taux d'occupation"}, axis=1, inplace=True)
    output.replace({"champs": "Champs-Elysées", "conv": "Convention", "sts": "Saints-Pères"}, inplace=True)

    return output

In [6]:
output = run_forecast()
output.to_csv("prediction.csv", index=False, encoding="utf-8")


Forecasting 'debit' for 'champs'


INFO:numexpr.utils:NumExpr defaulting to 8 threads.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.



Forecasting 'taux' for 'champs'


INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.



Forecasting 'debit' for 'conv'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.



Forecasting 'taux' for 'conv'


INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.



Forecasting 'debit' for 'sts'


INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.



Forecasting 'taux' for 'sts'
