## Imports

In [1]:
%load_ext autoreload
%autoreload 2

import sys
import os

import torch
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt

from collections import OrderedDict 
from sklearn import metrics
from torch.optim import Adam

sys.path.append(os.path.abspath(''))

import utils.more_torch_functions as mtf
import datasets

from utils.custom_loss import AsymBCELoss
from utils.custom_activations import StepActivation
from utils.modules import Parallel, MaxLayer
from utils.misc import train_model, cov_score
from compiling_nn.build_odd import compile_nn
from compiling_nn.utils_odd import pickle_bdd, unpickle_bdd

# torch.autograd.set_detect_anomaly(True)
SAVE_PATH = os.path.join(os.path.abspath(''), "backup")
PKL_PATH = os.path.join(SAVE_PATH, "bdd")
PTH_PATH = os.path.join(SAVE_PATH, "nn")


## Load data

In [2]:
np_x, np_y = datasets.LoanDataset.get_dataset(balancing=True, discretizing=True, hot_encoding=True)
x_data, y_data = torch.Tensor(np_x), torch.Tensor(np_y)
input_size = x_data.size(1)
print(x_data.size())

torch.Size([296, 24])


## Hooks

In [3]:
intermediate_outputs = {}
def get_intermediate_outputs(name):
    def hook(model, input, output):
        if model.training:
            intermediate_outputs.setdefault(name, dict())["train"] = output
        else:
            intermediate_outputs.setdefault(name, dict())["valid"] = output
    return hook

def true_label_for_backward(train, valid):
    def hook(model, input):
        if model.training:
            model.true_labels = train
        else:
            model.true_labels = valid
    return hook

# créer hook fonction de perte pour meilleur backward ? (comparer individuellement les sorties des réseaux ???)

## Networks

### Network parts

In [4]:
class ApproxNet(nn.Module):
    hl1 = 10

    def __init__(self):
        super().__init__()
        hl1 = self.hl1

        self.nn = nn.Sequential(OrderedDict([
            ('l1', nn.Linear(input_size,hl1)),
            ('a1', StepActivation()),
            ('l2', nn.Linear(hl1,1)),
            ('a2', StepActivation())
        ]))        

    def forward(self, x):
        x = self.nn(x)

        return x

class BigNet(nn.Module):
    hl1 = 50
    hl2 = 25

    def __init__(self):
        super().__init__()
        hl1 = self.hl1
        hl2 = self.hl2

        self. nn = nn.Sequential(OrderedDict([
            ('l1', nn.Linear(input_size,hl1)),
            ('a1', nn.Sigmoid()),
            ('l2', nn.Linear(hl1,hl2)),
            ('a2', nn.Sigmoid()),
            ('l3', nn.Linear(hl2,1)),
            ('a3', StepActivation()),
        ]))
    
    def forward(self, x):
        x = self.nn(x)

        return x

### Network definition

In [5]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()

        self.net = nn.Sequential(OrderedDict([
            ('nets', Parallel(OrderedDict([
                ('apx1', ApproxNet()),
                ('apx2', ApproxNet()),
                ('apx3', ApproxNet()),
            ]))),
            ('or_', MaxLayer()),
        ]))

    def forward(self, input):
        return self.net(input)
    
class BlankNet(nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.net = nn.Sequential(OrderedDict([
            ('nets', Parallel(OrderedDict([
            ]))),
            ('or_', MaxLayer()),
        ]))

    def forward(self, input):
        return self.net(input)

## Network evaluation

In [6]:
def eval_model(x_train, y_train, x_valid, y_valid, model, criterion, optimizer):
    train_model(x_train, y_train, model, criterion, optimizer, 500)
    model.eval()
    pred_train = model(x_train).detach()
    pred_valid = model(x_valid).detach()

    f1_train = metrics.f1_score(y_train, pred_train)
    cov_train = cov_score(y_train, pred_train)
    f1_valid = metrics.f1_score(y_valid, pred_valid)
    cov_valid = cov_score(y_valid, pred_valid)

    return f1_train, cov_train, f1_valid, cov_valid

def print_eval(x_train, y_train, x_valid, y_valid, model, criterion, optimizer):
    f1_train, cov_train, f1_valid, cov_valid = eval_model(x_train, y_train, x_valid, y_valid, model, criterion, optimizer)
    print(
        f"{'':<15}{'Train':^15}{'Valid':^15}",
        f"{'F1 score':<15}{f1_train:^15.3f}{f1_valid:^15.3f}",
        f"{'Coverage [0]':<15}{cov_train[0]:^15.3f}{cov_valid[0]:^15.3f}",
        f"{'         [1]':<15}{cov_train[1]:^15.3f}{cov_valid[1]:^15.3f}",
        sep="\n"
    )

In [7]:
model = Net()
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr=1e-2, weight_decay=1e-6)

intermediate_outputs_handle = model.net.nets.register_forward_hook(get_intermediate_outputs("parallel_out"))
train_index, valid_index = torch.utils.data.random_split(range(x_data.size(0)), [0.7, 0.3])

x_train, y_train = x_data[train_index], y_data[train_index]
x_valid, y_valid = x_data[valid_index], y_data[valid_index]

print_eval(x_train, y_train, x_valid, y_valid, model, criterion, optimizer)

                    Train          Valid     
F1 score            0.925          0.733     
Coverage [0]        0.869          0.585     
         [1]        0.980          0.787     


In [8]:
for param in model.parameters():
    param.requires_grad = False

model.net.nets.add_module("nn", BigNet())
optimizer = Adam(model.net.nets.nn.parameters(), lr=1e-2, weight_decay=1e-6)

print_eval(x_train, y_train, x_valid, y_valid, model, criterion, optimizer)

                    Train          Valid     
F1 score            0.925          0.725     
Coverage [0]        0.869          0.561     
         [1]        0.980          0.787     


In [9]:
intermediate_outputs_handle.remove()
torch.save(model, os.path.join(PTH_PATH, f"3apx{ApproxNet.hl1}hl.pth"))

In [10]:
compiled_nn = []
for name, approx in model.net.nets.named_children():
    if name.startswith('apx'):
        print(f"compiling {name}")

        bdd = compile_nn(approx, verbose=True)
        compiled_nn.append(bdd)
        pickle_bdd(bdd, os.path.join(PKL_PATH, f"{name}.pkl"))

compiling apx1
converting to ODDs : DONE (3.68e+02)
combining ODDs : DONE (1.22e+03)
compiling apx2
converting to ODDs : DONE (5.00e+02)
combining ODDs : DONE (1.95e+03)
compiling apx3
converting to ODDs : DONE (4.01e+02)
combining ODDs : DONE (1.79e+03)


In [13]:
# for i, apx in enumerate(compiled_nn):
#     pickle_bdd(apx, os.path.join(pkl_path, f"apx{i+1}.pkl"))

In [12]:
for i, apx in enumerate(compiled_nn):
    unpickled_apx = unpickle_bdd(os.path.join(PKL_PATH, f"apx{i+1}.pkl"))
    print(apx is unpickled_apx)

True
True
True


### Note

```md
discrétisation -> méthode actuelle (ex: même chose : 400 -> [1,0,0,0] / 700 -> [0,1,0,0] / 1300 -> [0,0,1,0])
               -> monotonie (ex: si groupé par 500 : 400 -> [1,0,0,0] / 700 -> [1,1,0,0] / 1300 -> [1,1,1,0])
```