In [1]:
import os
import cv2
import numpy as np
from torchmetrics import ConfusionMatrix
import torch
import torch.nn as nn
import torch.nn.functional as F


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [3]:
def lett_to_num(y):
    """Return a numeric encoding of the letters"""
    return [ord(lab) - 65 for lab in y]


In [4]:
class WeightedClassificationError():
    n_classes = 8
    Wmax=10
    is_lower_the_better = True
    minimum = 0.0
    maximum = np.inf  # 1 if normalisation by max(W)
    W = torch.tensor (
            [[0, 1, 6, 10, 10, 10, 10, 10],
                [1, 0, 3, 10, 10, 10, 10, 10],
                [6, 3, 0, 2, 9, 10, 10, 10],
                [10, 10, 2, 0, 9, 9, 10, 10],
                [10, 10, 9, 9, 0, 8, 8, 8],
                [10, 10, 10, 9, 8, 0, 9, 8],
                [10, 10, 10, 10, 8, 9, 0, 9],
                [10, 10, 10, 10, 8, 8, 9, 0],]
        )
    def __init__(
        self, name="WeightedClassificationError", precision=2, time_idx=0
    ):
        self.precision = precision

    def compute(self, y_true, y_pred):
        confmat = ConfusionMatrix(num_classes=8)
        loss = torch.sum(torch.multiply(confmat(y_pred,y_true),self.W))/ (self.n_classes * self.Wmax)
        return loss

    def __call__(self, y_true, y_pred):
        y_pred = y_pred
        y_true = y_true
        return self.compute(y_true, y_pred)

In [55]:
nb_frame=50
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 32, 11,stride=2)
        self.conv2 = nn.Conv2d(32, 64, 5,stride=1)
        self.conv3 = nn.Conv2d(64, 128, 3,stride=2)
        self.conv4 = nn.Conv2d(128, 256, 3,stride=2)
        
    def forward(self, i):
        x = i.view(-1, i.shape[2], i.shape[3], i.shape[4])
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = nn.AvgPool2d(3)(x)
        x = x.view(i.shape[0], i.shape[1], -1)
        return x
    
class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(20736, 100,60,dropout=0.3,batch_first=True)
        self.fc = nn.Linear(100*nb_frame, 8)
        
    def forward(self, x):
        print(x.shape)
        x, _ = self.lstm(x)
        print(x.shape)
        #x = x.view(x.shape[0], -1)
        x = F.softmax(self.fc(x),dim=1)
        return x    
    

net_cnn = CNN()
net_lstm = LSTM()


In [56]:
x = torch.rand((50, 10, 1, 256, 256)).to(device)
print(x.shape)

torch.Size([50, 10, 1, 256, 256])


In [57]:
features = net_cnn(x)
out = net_lstm(features)
print(features.shape)
print(out.shape)

RuntimeError: [enforce fail at C:\cb\pytorch_1000000000000\work\c10\core\impl\alloc_cpu.cpp:81] data. DefaultCPUAllocator: not enough memory: you tried to allocate 968256000 bytes.

In [None]:
def get_n_params(model):
    pp=0
    for p in list(model.parameters()):
        nn=1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp
print(get_n_params(net_cnn))
print(get_n_params(net_lstm))

In [None]:
def train(dataloader, cnn_model, lstm_model, criterion, optimizer, epochs):
    cnn_model.train()
    lstm_model.train()
    for epoch in range(epochs): 
        train_loss = 0.0
        
        for data in dataloader:
            optimizer.zero_grad()

            # forward + backward + optimize
            features = cnn_model(x)
            outputs = lstm_model(features)
            
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            # print statistics
            print(f"Epoch {epoch+1}\n-------------------------------")
            print(loss)
        train_loss = train_loss/len(Dataloader)
    print('Finished Training')

In [99]:
def validation(dataloader, cnn_model, lstm_model, criterion, optimizer, epochs):
    cnn_model.eval()
    lstm_model.eval()
    with torch.no_grad(): 
        for epoch in range(epochs): 
            running_loss=0.0
            for data in dataloader:
                optimizer.zero_grad()

                features = cnn_model(x)
                outputs = lstm_model(features)

                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss+=loss.item()
                # print statistics
                print(f"Epoch {epoch+1}\n-------------------------------")
                print(loss)
            running_loss=running_loss/len(Dataloader)
    return(running_loss)

In [101]:
def test_eval(dataloader, cnn_model, lstm_model, criterion, optimizer):
    cnn_model.eval()
    lstm_model.eval()
    with torch.no_grad():  
        eval_loss = 0.0
        for data in dataloader:
                
            features = cnn_model(x)
            outputs = lstm_model(features)

            loss = criterion(outputs, labels)
            eval_loss += loss.item() 
            print(loss)
        eval_loss = eval_loss / len(dataloader)
            