In [None]:
from typing import Any, Callable, Dict, Tuple

import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.datasets import fetch_covtype, load_iris
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, Dataset, random_split

# Model definition

We can create a deep linear model and learn Iris dataset.

In [None]:
class DeepLinear(nn.Module):
    def __init__(self, in_features: int, out_features: int) -> None:
        super(DeepLinear, self).__init__()

        self.linear1 = nn.Linear(in_features=in_features, out_features=1024, dtype=torch.float64)
        self.linear2 = nn.Linear(in_features=1024, out_features=1024, dtype=torch.float64)
        self.linear3 = nn.Linear(in_features=1024, out_features=512, dtype=torch.float64)
        self.fc = nn.Linear(in_features=512, out_features=out_features, dtype=torch.float64)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y = self.linear1(x)
        y = F.relu(y)
        y = self.linear2(y)
        y = F.relu(y)
        y = self.linear3(y)
        y = F.relu(y)
        return self.fc(y)

# Surrogate definition

In [None]:
class Surrogate(nn.Module):
    def __init__(self, in_features: int, out_features: int) -> None:
        super(Surrogate, self).__init__()

        self.linear = nn.Linear(in_features=in_features, out_features=out_features, dtype=torch.float64)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.linear(x)

# Training interface definition

In [None]:
class ModelSurrogateParallel(pl.LightningModule):
    def __init__(self, in_features: int, out_features: int, loss_function: Callable=F.cross_entropy, optimizer: torch.optim.Optimizer=torch.optim.Adam, 
               optimizer_params: Dict[str, Any]={'lr': 0.001}, **pl_module) -> None:
        super(ModelSurrogateParallel, self).__init__(**pl_module)

        self.loss_function = loss_function
        self.optimizer = optimizer
        self.optimizer_params = optimizer_params
        self.save_hyperparameters()

        self.deep_linear = DeepLinear(in_features, out_features)
        self.surrogate = Surrogate(in_features, out_features)
    
    def configure_optimizers(self) -> torch.optim.Optimizer:
        return self.optimizer(self.parameters(), **self.optimizer_params)

    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        y_model = self.deep_linear(x)
        y_surrogate = self.surrogate(x)
        return y_model, y_surrogate
    
    def training_step(self, train_batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int) -> torch.Tensor:
        inputs, targets = train_batch
        scores_model, scores_surrogate = self.forward(inputs)

        targets_model = torch.argmax(torch.softmax(scores_model, 1), 1).detach()

        loss_model = self.loss_function(scores_model, targets)
        loss_surrogate = self.loss_function(scores_surrogate, targets_model)

        self.log('train_loss_model', loss_model)
        self.log('train_loss_surrogate', loss_surrogate)
        
        return loss_model + loss_surrogate
    
    def validation_step(self, valid_batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int) -> torch.Tensor:
        inputs, targets = valid_batch
        scores_model, scores_surrogate = self.forward(inputs)

        targets_model = torch.argmax(torch.softmax(scores_model, 1), 1).detach()

        loss_model = self.loss_function(scores_model, targets)
        loss_surrogate = self.loss_function(scores_surrogate, targets_model)

        self.log('valid_loss_model', loss_model)
        self.log('valid_loss_surrogate', loss_surrogate)
        
        return loss_model + loss_surrogate
    
    def test_step(self, test_batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int) -> torch.Tensor:
        inputs, targets = test_batch
        scores_model, scores_surrogate = self.forward(inputs)

        targets_model = torch.argmax(torch.softmax(scores_model, 1), 1).detach()

        loss_model = self.loss_function(scores_model, targets)
        loss_surrogate = self.loss_function(scores_surrogate, targets_model)

        self.log('test_loss_model', loss_model)
        self.log('test_loss_surrogate', loss_surrogate)
        
        return loss_model + loss_surrogate
    
    def predict_step(self, batch: torch.Tensor, batch_idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        if len(batch) == 2:
            inputs, _ = batch
        else:
            inputs = batch
        scores_model, scores_surrogate = self.forward(inputs)

        y_model = torch.argmax(torch.softmax(scores_model, 1), 1)
        y_surrogate = torch.argmax(torch.softmax(scores_surrogate, 1), 1)

        return y_model, y_surrogate

# Dataset

In [None]:
class ClassicDataset(Dataset):
    def __init__(self, X: torch.Tensor, y: torch.Tensor) -> None:
        super(ClassicDataset, self).__init__()
        self.X = X
        self.y = y
    
    def __len__(self) -> int:
        return len(self.X)
    
    def __getitem__(self, index) -> Tuple[torch.Tensor, torch.Tensor]:
        return self.X[index], self.y[index]

# Loading data

In [None]:
X, y = fetch_covtype(return_X_y=True)

X = MinMaxScaler().fit_transform(X)
X = torch.DoubleTensor(X)
y = torch.LongTensor(y) - 1

dataset = ClassicDataset(X, y)

train_ratio = 0.8
dataset_size = len(dataset)
train_size = int(train_ratio * dataset_size)
test_size = dataset_size - train_size

train_set, test_set = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
test_loader = DataLoader(test_set, batch_size=128, shuffle=True)

# Training

In [None]:
network = ModelSurrogateParallel(
    in_features=X.shape[1],
    out_features=np.unique(y).shape[0]
)

trainer = pl.Trainer(max_epochs=10, accelerator='gpu')
trainer.fit(network, train_dataloaders=train_loader)

Notre modèle de substitution entrainé parallèlement aux predictions, arrivent à obtenir environ 71.58% sur le jeu d'entrainement et environ 71.86% sur le jeu de test, au niveau des prédictions similaires.

## Quel est le modèle le plus performant en exactitude ?

Sur les données de test, bien évidemment.

In [None]:
test_targets = []
test_model_predictions = []
test_surrogate_predictions = []
for samples, tgts in test_loader:
    lbls_model, lbls_surrogate = network.predict_step(samples, None)
    test_model_predictions.append(lbls_model)
    test_surrogate_predictions.append(lbls_surrogate)
    test_targets.append(tgts)

test_targets = torch.hstack(test_targets)
test_model_predictions = torch.hstack(test_model_predictions)
test_surrogate_predictions = torch.hstack(test_surrogate_predictions)

(test_model_predictions == test_targets).sum() / len(test_set) * 100, (test_targets == test_surrogate_predictions).sum() / len(test_set) * 100, (test_model_predictions == test_surrogate_predictions).sum() / len(test_set) * 100

In [None]:
train_targets = []
for _, tgts in train_loader:
    train_targets.append(tgts)

train_targets = []
train_model_predictions = []
train_surrogate_predictions = []
for samples, tgts in train_loader:
    lbls_model, lbls_surrogate = network.predict_step(samples, None)
    train_model_predictions.append(lbls_model)
    train_surrogate_predictions.append(lbls_surrogate)
    train_targets.append(tgts)

train_targets = torch.hstack(train_targets)
train_model_predictions = torch.hstack(train_model_predictions)
train_surrogate_predictions = torch.hstack(train_surrogate_predictions)

(train_model_predictions == train_targets).sum() / len(train_set) * 100, (train_targets == train_surrogate_predictions).sum() / len(train_set) * 100, (train_model_predictions == train_surrogate_predictions).sum() / len(train_set) * 100