In [None]:
import pandas as pd
from pytorch_lightning import LightningDataModule
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch import sigmoid
from torch.utils.data import TensorDataset, random_split, DataLoader
%load_ext autoreload
%autoreload 2
output_cols = ['date_cible','perimetre_prod','type_prod','prev_FC']
USELESS_COLS_BY_TYPE = dict(pv=['u100','ff100','v100'],
                            eolien=['t2m','ssrd'])

# Modèles nationaux

class WindDataModule(LightningDataModule):

    def __init__(self,df:pd.DataFrame, type_prod:str,batch_size:int):

        super().__init__()
        self.df = df
        self.type_prod=type_prod
        self.batch_size=batch_size

    def prepare_data(self):
        # download, split, etc...
        # only called on 1 GPU/TPU in distributed

        df = self.df[self.df.type_prod==self.type_prod].drop(columns=['perimetre_prod','type_prod','date_lancement','puissance_installee','comptage'],errors='ignore')
        # DROP USELESS COLS
        for useless_c in USELESS_COLS_BY_TYPE[self.type_prod]:
            df = df[df.columns.drop(list(df.filter(regex=useless_c)))]

        df_train = df[df.date_cible.dt.year < 2019]
        df_test = df[df.date_cible.dt.year == 2019]

        df_train, df_val = train_test_split(df_train, test_size=0.33)

        std_scaler = StandardScaler()

        self.x_train = torch.from_numpy(std_scaler.fit_transform(df_train.drop(columns=['FC','date_cible']))).float()
        self.x_test = torch.from_numpy(std_scaler.transform(df_test.drop(columns=['FC','date_cible']))).float()
        self.x_val = torch.from_numpy(std_scaler.transform(df_val.drop(columns=['FC','date_cible']))).float()
        self.y_train = torch.from_numpy(df_train['FC'].values).float()
        self.y_test = torch.from_numpy(df_test['FC'].values).float()
        self.y_val = torch.from_numpy(df_val['FC'].values).float()


    def train_dataloader(self):
        train_split = TensorDataset(self.x_train, self.y_train)
        return DataLoader(train_split, shuffle=True, batch_size=self.batch_size,num_workers=1)
    def val_dataloader(self):
        val_split = TensorDataset(self.x_val, self.y_val)
        return DataLoader(val_split,num_workers=1)
    def test_dataloader(self):
        test_split = TensorDataset(self.x_test, self.y_test)
        return DataLoader(test_split,num_workers=1)
    def predict_dataloader(self):
        test_split = TensorDataset(self.x_test, self.y_test)
        return DataLoader(test_split,num_workers=1)


def predict(model, x):
    model.eval()
    with torch.no_grad():
        out = model(x)
        return sigmoid(out).numpy()

In [None]:
## National Eolien


type_prod = 'eolien'
n_samples=30

import torch
from torch.nn import functional as F
import pytorch_lightning as pl
from ray.tune.integration.pytorch_lightning import TuneReportCallback

from ray import tune

class LightningMNISTRegressor(pl.LightningModule):
    def __init__(self, config,input_dim):
        super(LightningMNISTRegressor, self).__init__()

        self.lr = config["lr"]
        self.dropout_rate = config["dropout_rate"]
        layer_1_dim, layer_2_dim = config["layer_1"], config["layer_2"]
        self.batch_size = config["batch_size"]

        # Input shape is (batch_size,  n_dim)
        self.layer_1 = torch.nn.Linear(input_dim, layer_1_dim)
        self.drop_1 = torch.nn.Dropout(p=self.dropout_rate)
        self.layer_2 = torch.nn.Linear(layer_1_dim, layer_2_dim)
        self.drop_2 = torch.nn.Dropout(p=self.dropout_rate)
        self.layer_3 = torch.nn.Linear(layer_2_dim, 1)

    def forward(self, x):
        batch_size, d = x.size()
        x = x.view(batch_size, -1)
        x = self.drop_1(torch.relu(self.layer_1(x)))
        x = self.drop_2(torch.relu(self.layer_2(x)))
        x = self.layer_3(x)
        x = x.view(-1)
        return x

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        logits = self.forward(x)
        loss = F.binary_cross_entropy_with_logits(logits, y)
        self.log("ptl/train_loss", loss)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        logits = self.forward(x)
        loss = F.binary_cross_entropy_with_logits(logits, y)
        return {"val_loss": loss}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        self.log("ptl/val_loss", avg_loss)

In [None]:
def train_with_config(config, input_dim=29, base_df=None, num_gpus=1, disable_logging=False):
    model = LightningMNISTRegressor(config, input_dim)
    metrics = {"loss": "ptl/val_loss"}
    trainer = pl.Trainer(
        max_epochs=config['epochs'],
        gpus=num_gpus,
        progress_bar_refresh_rate=0,
        callbacks=[TuneReportCallback(metrics, on="validation_end")] if not disable_logging else None)


    trainer.fit(model, datamodule=WindDataModule(df=base_df, type_prod=type_prod, batch_size=config['batch_size']))
    return trainer

def hp_search(base_df, input_dim, num_samples=10, cpus_per_trial=1, gpus_per_trial=1,name='foo'):
    config = {
        "layer_1": tune.choice([2, 4, 8,16]),
        "layer_2": tune.choice([2, 4, 8,16]),
        "lr": tune.loguniform(1e-4, 1e-1),
        "dropout_rate": tune.uniform(0.0,0.4),
        "batch_size": tune.choice([32, 64, 128]),
        "epochs": tune.choice(range(10)),
    }

    trainable = tune.with_parameters(
        train_with_config,  input_dim=input_dim, base_df=base_df, num_gpus=gpus_per_trial)
    return tune.run(
        trainable,
        resources_per_trial={
            "cpu": cpus_per_trial,
            "gpu": gpus_per_trial
        },
        metric="loss",
        mode="min",
        config=config,
        num_samples=num_samples,

        name=name)

analysis = hp_search(national_df, input_dim=29, num_samples=n_samples, cpus_per_trial=1,gpus_per_trial=0,name="national_eolien")

In [None]:
best_trial_config = analysis.get_best_trial("loss", "min", "last").config
best_trainer = train_with_config( best_trial_config, 29, national_df, disable_logging=True, num_gpus=0)

In [None]:
#best_trainer.predict() # not working wtf

In [None]:
dm = WindDataModule(national_df, type_prod=type_prod, batch_size=32)
dm.prepare_data()
test_df['prev_FC'] = predict(best_trainer.model, dm.x_test)
test_df['type_prod'] =type_prod
test_df['perimetre_prod'] ='national'
overall_preds = pd.concat([overall_preds,test_df[output_cols]], axis=0)
