# Red de entrenamiento
Ammi Beltrán y Fernanda Borja

***

# Import de librerías 

In [1]:
# Libreria de preprocesamiento
import dataprep as prep
# 
import numpy as np
# import matplotlib.pyplot as plt
#
import torch
from torch import nn
#
# from tqdm import tqdm, trange
# import time

## Modelo

### Channel Augmenter

***
[Pendiente]
***

In [2]:
class Permute(nn.Module):
    def __init__(self, a, b, c):
        super(Permute, self).__init__()
        self.a = a
        self.b = b
        self.c = c
    def forward(self, x):
        return x.permute(self.a , self.b, self.c)

### Encoder convolucional

<img src="img/renc.png" alt="Drawing" style="width: 800px;"/>

In [3]:
class Convolutional_Enc(nn.Module):
    '''
    Encoder convolucional paper 2
    '''
    def __init__(self):
        super(Convolutional_Enc, self).__init__()
        # Reflection pad
        # nn.ReflectionPad1d()
        # We begin with 3 convolutional blocks (top to bottom)
        self.conv1 = nn.Sequential(
            # To fit kernel size = 128
            nn.ReflectionPad1d((63,64)),
            nn.Conv1d(in_channels = 1, out_channels = 100, kernel_size = 128),
        )
        self.conv2 = nn.Sequential(
            # To fit kernel size = 64
            nn.ReflectionPad1d((31,32)),
            nn.Conv1d(in_channels = 1, out_channels = 100, kernel_size = 64),
        )
        self.conv3 = nn.Sequential(
            # To fit kernel size = 16
            nn.ReflectionPad1d((7,8)),
            nn.Conv1d(in_channels = 1, out_channels = 50, kernel_size = 16),
        )
        # Repeated block
        self.iterable = nn.Sequential(
            Permute(0, 2, 1),
            nn.Linear(in_features = 250, out_features = 250),
            nn.ReLU(),
            Permute(0, 2, 1),
            nn.BatchNorm1d(num_features = 250), # [batch size, canales]
            nn.ReflectionPad1d((31, 32)), 
            nn.Conv1d(in_channels = 250, out_channels = 250, kernel_size = 64),
        )
        # Outer layer
        self.outer = nn.Sequential(
            nn.ReLU(),
            nn.BatchNorm1d(num_features = 250),
            nn.ReflectionPad1d((31, 32)),
            nn.Conv1d(in_channels = 250, out_channels = 4, kernel_size = 64),
        )

    def forward(self, x):
        # Pass trough convolutionals
        # print(x.shape)
        xc1 = self.conv1(x)
        # print(xc1.shape)
        xc2 = self.conv2(x)
        # print(xc2.shape)
        xc3 = self.conv3(x)
        # print(xc3.shape)
        # Concatenate
        cat = torch.cat((xc1, xc2, xc3), dim = 1)
        # print
        # print(cat.shape)
        # Iterable phase
        for i in range(4):
            cat = self.iterable(cat)
            # print(cat.shape)
        # End layer
        end = self.outer(cat)
        #
        return end

### Proyector

<img src="img/proyector.png" alt="Drawing" style="width: 800px;"/>

In [4]:
class Projector(nn.Module):
    def __init__(self):
        super(Projector, self).__init__()

        # LSTM's bidireccionales
        self.lstm1 = nn.LSTM(
            input_size = 4,
            hidden_size = 256,
            batch_first = True,
            bidirectional = True
        )
        self.lstm2 = nn.LSTM(
            input_size = 4,
            hidden_size = 128,
            batch_first = True,
            bidirectional = True
        )
        self.lstm3 = nn.LSTM(
            input_size = 4,
            hidden_size = 64,
            batch_first = True,
            bidirectional = True
        )
        # Outer Layer
        # Takes first and last output
        self.outer = nn.Sequential(
            nn.Linear(in_features = 4*(64 + 128 + 256), out_features = 128),
            nn.ReLU(),
            nn.Linear(in_features = 128, out_features = 32),
        )
    def forward(self, x):
        # Downsampling
        
        half = nn.functional.interpolate(x, scale_factor = 0.5)
        quarter = nn.functional.interpolate(x, scale_factor = 0.25)
        # Pass through LSTM
        x = x.permute(0, 2, 1)
        half = half.permute(0, 2, 1)
        quarter = quarter.permute(0, 2, 1)
        lstm1, (h_n, c_n) = self.lstm1(x)
        lstm2, (h_n, c_n) = self.lstm2(half)
        lstm3, (h_n, c_n) = self.lstm3(quarter)
        # print(lstm1.shape)
        # print(lstm2.shape)
        # print(lstm3.shape)
        # Get First and Last
        flo1 = lstm1[:, [0, -1], :].reshape(lstm1.shape[0], -1)
        flo2 = lstm2[:, [0, -1], :].reshape(lstm2.shape[0], -1)
        flo3 = lstm3[:, [0, -1], :].reshape(lstm3.shape[0], -1)
        # Concatenate
        cat = torch.cat((flo1, flo2, flo3), dim = -1)
        # print(cat.shape)
        # Last Layer
        end = self.outer(cat)
        #
        return end

### Clasificador

In [5]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        
        # LSTM's bidireccionales
        self.lstm1 = nn.LSTM(
            input_size = 4,
            hidden_size = 256,
            bidirectional = True
        )
        self.lstm2 = nn.LSTM(
            input_size = 4,
            hidden_size = 128,
            bidirectional = True
        )
        self.lstm3 = nn.LSTM(
            input_size = 4,
            hidden_size = 64,
            bidirectional = True
        )
        # Outer Layer
        # Takes first and last output
        self.outer = nn.Sequential(
            nn.Linear(in_features = 4*(64 + 128 + 256), out_features = 128),
            nn.ReLU(),
            nn.Linear(in_features = 128, out_features = 2), # num of classes
            nn.LogSoftmax()
        )

    def forward(self, x):
        # Downsampling
        half = nn.functional.interpolate(x, scale_factor = 0.5)
        quarter = nn.functional.interpolate(x, scale_factor = 0.25)
        # Pass through LSTM
        lstm1 = self.lstm1(x)
        lstm2 = self.lstm2(half)
        lstm3 = self.lstm3(quarter)
        # Get First and Last
        flo1 = lstm1[:, [0, -1], :]
        flo2 = lstm2[:, [0, -1], :]
        flo3 = lstm3[:, [0, -1], :]
        # Concatenate
        cat = torch.cat(flo1, flo2, flo3, dim = 1)
        # Last Layer
        end = self.outer(cat)
        
        #
        return end
# We use a negativelog-likelihood loss alongside the LogSoftmax
# layer to compute the cross-entropy loss.

***
### Pretext Task

In [6]:
class Pretext(nn.Module):
    def __init__(self):
        super(Pretext, self).__init__()
        self.encoder = Convolutional_Enc() 
        self.projector = Projector()
    def forward(self, x):
        first = self.encoder(x)
        print(first.shape)
        end = self.projector(first)
        return end

### Classifier task

In [7]:
class DownPretext(nn.Module):
    def __init__(self):
        super(Pretext, self).__init__()
        self.encoder = Convolutional_Enc() 
        self.projector = Classifier()
    def forward(self, x):
        first = self.encoder(x)
        end = self.projector(first)
        return end

***

## Downstream

In [8]:
# Por definicion el primer clasificador está construido ya c:

## Entrenamiento

Loss de nceLoss

### Pretext

In [9]:
from losses import InfoNceLoss
def preloss(model, batch, criterion = nn.CrossEntropyLoss(), device = "cuda"):
    x1, x2 = batch
    x1 = x1.reshape((19,1,4000)).type(torch.FloatTensor)
    x2 = x2.reshape((19,1,4000)).type(torch.FloatTensor)
    # print(device)
    model = model.to(device)
    x1, x2 = x1.to(device), x2.to(device)
    y1, y2 = model(x1), model(x2)
    del x1, x2
    loss = criterion(y1, y2)
    return loss

In [10]:
def train_epoch(model, train_loader, criterion, optimizer):
    # training mode
    model.train()
    lossSum = 0
    iters = 0
    for i, batch in enumerate(train_loader):
        # print(len(batch), len(batch[0]))
        # augmentate
        loss = preloss(model, batch, criterion)
        #
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f"Iter: {i + 1}/{len(train_loader)}, Loss:{loss}")
        lossSum += loss
        iters += 1
    trainloss = lossSum/iters
    print(f"Epoch train loss = {trainloss}")
    return trainloss

In [11]:
def pretext_train(model, epochs, train_loader, criterion, optimizer):
    cumulative_loss = []
    best_loss = np.inf
    for epoch in range(0, epochs):
        ep_loss = train_epoch(model, train_loader, criterion, optimizer)
        cumulative_loss.append(ep_loss)
        if (best_loss>ep_loss):
            print(f"Better params found in epoch = {epoch}, saved params")
            torch.save(model.state_dict(), f'bestEncoderParams.pt')
        model.load_state_dict(torch.load(f'bestEncoderParams.pt'))
    return cumulative_loss    

### Downstream

In [12]:
def downloss(model, batch, criterion = nn.NLLLoss(), device = "cuda"):
    x, y = batch
    model.train()
    model = model.to(device)
    x, y = x.to(device), y.to(device)
    y_pred = model(x)
    loss = criterion(y_pred, y)
    y_pred = torch.argmax(y_pred) # REVISAR FORMATO DE LABEL, POR AHORA ENTREGA 0 o 1, PODRIA SER [0, 1] y [1, 0]
    return loss, y, y_pred

In [13]:
def downtrain_epoch(model, train_dataset, criterion, optimizer):
    acc = 0
    e_loss = 0
    count = 0
    for i, batch in enumerate(train_dataset):
        loss, y, y_pred = downloss(model, batch, criterion)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        e_loss +=loss
        acc +=(y == y_pred)
        count +=1
        print(f"Iter: {i + 1}/{len(train_dataset)}, Loss:{loss}")
    acc = acc/count
    e_loss = e_loss/count
    print(f"Epoch train loss = {e_loss}")
    return e_loss, acc

In [14]:
def downvalidate(model, val_dataset, criterion):
    acc = 0
    e_loss = 0
    count = 0
    model.eval()
    with torch.no_grad():
        for i, batch in enumerate(val_dataset):
            loss, y, y_pred = downloss(model, batch, criterion)
            e_loss +=loss
            acc+=(y==y_pred)
            count +=1
        acc = acc/count
        e_loss = e_loss/count
    return e_loss, acc

In [15]:
def downtrain(model, epochs, train_dataset, val_dataset, criterion, optimizer):
    model.train()
    best_loss = np.inf
    train_curves = [[],[]] # [loss, accuracy]
    val_curves = [[],[]]
    for epoch in range(0, epochs):
        # Entrenamos
        print(f"Epoch nro {epoch +1}/{epochs}")
        e_loss, e_acc = downtrain(model, train_dataset, criterion, optimizer)
        train_curves[0].append(e_loss)
        train_curves[1].append(e_acc)
        # Validamos
        val_loss, val_acc = downvalidate(model, val_dataset, criterion)
        val_curves[0].append(val_loss)
        val_curves[1].append(val_acc)
        if val_loss < best_loss:
            torch.save(model.state_dict(), f'bestDownParams.pt')
        model.load_state_dict(torch.load(f'bestDownParams.pt'))

    return train_curves, val_curves

***

# Entrenamos!

In [16]:
# dataset.squeeze()

In [17]:
# for i, x, y in enumerate(dataset):
#     print(i)
#     print(x)

In [18]:
model = Pretext()
dataset = torch.load("test_batch.pt")
# x1 = dataset[0].reshape((19,1,4000)).type(torch.FloatTensor)
# x2 = dataset[1].reshape((19,1,4000)).type(torch.FloatTensor)
# dataset = torch.tensor(np.array(dataset))
# test = torch.tensor(np.array([dataset[0], dataset[1]]))
data = (dataset, dataset)
train_epoch(model, data, InfoNceLoss(), torch.optim.Adam(model.parameters()))
# perdida = preloss(model, (x1, x2), InfoNceLoss())
# print(dataset[0].shape)

torch.Size([19, 4, 4000])


torch.Size([19, 4, 4000])
Iter: 1/2, Loss:3.6087357997894287
torch.Size([19, 4, 4000])
torch.Size([19, 4, 4000])
Iter: 2/2, Loss:3.4893059730529785
Epoch train loss = 3.549020767211914


tensor(3.5490, device='cuda:0', grad_fn=<DivBackward0>)

In [19]:
# print(perdida)

In [20]:
# model(dataset[0].reshape((19,1,4000)).type(torch.FloatTensor))



In [21]:
# hparams = {
    
# }
# [[19, 4000], [19, 4000]]