In [None]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
train_dataset = pd.read_csv('/content/drive/MyDrive/medical_tc_train.csv')
test_dataset = pd.read_csv('/content/drive/MyDrive/medical_tc_test.csv')

print(train_dataset[:5])

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
   condition_label                                   medical_abstract
0                5  Tissue changes around loose prostheses. A cani...
1                1  Neuropeptide Y and neuron-specific enolase lev...
2                2  Sexually transmitted diseases of the colon, re...
3                1  Lipolytic factors associated with murine and h...
4                3  Does carotid restenosis predict an increased r...


### 0.2 Importing packages

In [None]:
import nltk
import pickle
import pandas as pd
import requests
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.stem.porter import *
import numpy as np
from gensim.models import Word2Vec
import torch
import torch.nn as nn
import torch.optim as optim
import string
from nltk.tokenize import word_tokenize
import tensorflow
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.corpus import stopwords
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.models import Sequential
from torch.utils.data import DataLoader, TensorDataset
from nltk.corpus import wordnet
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('popular')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading collection 'popular'
[nltk_data]    | 
[nltk_data]    | Downloading package cmudict to /root/nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package gazetteers to /root/nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to /root/nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to /root/nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package inaugural to /root/nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data

True

### 0.3 Device agnostic code to run grid search on faster GPU

On GoogleColab, change runtime-type to utilise GPU power  

In [None]:
!nvidia-smi

Wed Dec 20 01:33:03 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   69C    P0              30W /  70W |    301MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

### 0.4 Preprocessing

In [None]:
text_train = train_dataset['medical_abstract']
y_train = train_dataset['condition_label']
text_train[:2]
text_test = test_dataset['medical_abstract']
y_test = test_dataset['condition_label']

In [None]:
#Function from preprocessing file
en_stop = set(stopwords.words('english'))
def preprocess(data):
    data_proc = []
    for doc in data['medical_abstract']:
        #lowercases document
        doc = doc.lower()
        #removes any non-letter characters
        doc = re.sub(r'\b[^a-zA-Z]+\b', ' ', doc)
        #tokenize
        toks = nltk.word_tokenize(doc)
        #remove tokens of lenth <= 1 (can be varied)
        toks = [tok for tok in toks if len(tok) > 1]
        #remove stopwords
        toks = [tok for tok in toks if tok not in en_stop]
        #lemmatize
        toks = [WordNetLemmatizer().lemmatize(tok) for tok in toks]
        data_proc.append(toks)
    return data_proc

In [None]:

X_trains  = preprocess(train_dataset)
X_tests = preprocess(test_dataset)
#print(X_trains[0])
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_trains)

X_train = tokenizer.texts_to_sequences(X_trains)
X_test = tokenizer.texts_to_sequences(X_tests)

vocab_size = len(tokenizer.word_index) + 1

# Pad the sequences to a fixed length
max_length = 100
X_train = pad_sequences(X_train, maxlen=max_length, padding='post')
#print(X_train[0])
X_test = pad_sequences(X_test, maxlen=max_length, padding='post')

### 0.5 Word2Vec model

In [None]:

import random
torch.manual_seed(302)
np.random.seed(302)
random.seed(302)
# Train the Word2Vec model
vocab_size = len(tokenizer.word_index) + 1
w2v_model = Word2Vec(X_trains, vector_size=100, window=5, min_count=1, workers=4)
print(w2v_model)
#w2v_model = Word2Vec(sentences, vector_size=100, window=5, min_count=5, workers=4)


# Create a weight matrix for the embedding layer
embedding_matrix = np.zeros((vocab_size, 100))
for word, i in tokenizer.word_index.items():
    if word in w2v_model.wv:
        embedding_matrix[i] = w2v_model.wv[word]
print(embedding_matrix.shape)


Word2Vec<vocab=30615, vector_size=100, alpha=0.025>
(30616, 100)


### 0.6 Evaluation metrics
* Could get rid of accuracy

In [None]:
#Accuracy (source mrdbuorke colab link)
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions.

    Args:
        y_true (torch.Tensor): Truth labels for predictions.
        y_pred (torch.Tensor): Predictions to be compared to predictions.

    Returns:
        [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
    """
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [None]:
from timeit import default_timer as timer
def print_train_time(start: float,
                     end: float,
                     device: torch.device = None):
  """Prints difference between start adn end time. """
  total_time = end - start
  print(f"Train time on {device}: {total_time:.3f} seconds")
  return total_time

start_time = timer()
# some code...
end_time = timer()
print_train_time(start=start_time, end= end_time, device = "cpu")

Train time on cpu: 0.000 seconds


4.386399996292312e-05

## 1 CNN base model and grid search...

Initial architecture using 2 conv layers and 2 max pool like in article [1]

In [None]:

X_train_tensor = torch.LongTensor(X_train).to(device)
y_train_tensor = torch.LongTensor(y_train).to(device)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32)



X_test_tensor = torch.LongTensor(X_test).to(device)
y_test_tensor = torch.LongTensor(y_test).to(device)
#print(X_test_tensor[0])
y_test_tensor = torch.LongTensor(y_test)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32)
test_loader = test_loader



In [None]:
num_classes =5
random.seed(302)
torch.cuda.manual_seed(302)
torch.manual_seed(302)
#create vector of labels 0,4

class CNNMulticlass(nn.Module):
    def __init__(self, vocab_size, embedding_dim, embedding_matrix, max_length):
        super(CNNMulticlass, self).__init__()
        #Embedding layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = False #keeps original embedding matrix

        #two convolutional layers and 2 max pool layers like paper[1]
        self.conv1 = nn.Conv1d(embedding_dim, 256, kernel_size=2)
        self.pool1 = nn.MaxPool1d(5)
        self.conv2 = nn.Conv1d(256, 256, kernel_size=2)
        self.pool2 = nn.MaxPool1d(5)
        #flattening for fully connected layers
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(256 * ((max_length - 4) // 25) , 256)
        self.fc2 = nn.Linear(256, num_classes)
        #softmax activation for final output
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
      #pass input through embedding layer
        x = self.embedding(x)
        #need to rearrange for conv layer
        x = x.permute(0, 2, 1)
        #RelU activations with conv and max pool layers
        x = self.pool1(nn.functional.relu(self.conv1(x)))
        x = self.pool2(nn.functional.relu(self.conv2(x)))
        x = self.flatten(x)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)

        return x



### 1.1 Testing learning rate

In [None]:
# Define a list of learning rates to experiment with
random.seed(302)
torch.cuda.manual_seed(302)
torch.manual_seed(302)
vocab_size = len(tokenizer.word_index) + 1

learning_rates = [0.001, 0.01, 0.1]  # You can modify this list of learning rates as needed

# Loop through each learning rate
for lr in learning_rates:
    # Create an instance of the CNN model
    model_multiclass = CNNMulticlass(vocab_size, 100, embedding_matrix, max_length).to(device)

    # Define optimizer and loss function with current learning rate
    optimizer = torch.optim.Adam(model_multiclass.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    from tqdm.auto import tqdm
    print(len(test_loader))
    train_time_start_on_cpu = timer()

    epochs = 10
    for epoch in tqdm(range(epochs)):
        train_loss = 0
        for batch, (inputs, labels) in enumerate(train_loader):
            model_multiclass.train()
            # Forward pass
            y_pred = model_multiclass(inputs).to(device)
            labels = labels.long() - 1
            loss = criterion(y_pred, labels)
            train_loss += loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_loss /= len(train_loader)

        # Testing
        test_loss, test_acc = 0, 0
        model_multiclass.eval()
        with torch.inference_mode():
          for X_test_tensor, y_test_tensor in test_loader:
                test_pred = model_multiclass(X_test_tensor).to(device)
                y_test_tensor = y_test_tensor - 1
                y_test_tensor = y_test_tensor.to(device)
                test_loss += criterion(test_pred, y_test_tensor)
                y_preds = test_pred.argmax(dim=1)
                test_acc += accuracy_fn(y_true=y_test_tensor, y_pred=y_preds)

          test_acc /= len(test_loader)
        print(f"\nTrain loss: {train_loss:.4f} | Test loss: {test_loss:.4f}, Test acc: {test_acc:.4f}")

    # Print learning rate and accuracy after completing 10 epochs
    print(f"Learning Rate: {lr}, Test Accuracy: {test_acc:.4f}")
    train_time_end_on_cpu = timer()
    total_train_time_model_multiclass = print_train_time(start=train_time_start_on_cpu,
                                                        end=train_time_end_on_cpu,
                                                        device=str(next(model_multiclass.parameters()).device))


91


  0%|          | 0/10 [00:00<?, ?it/s]


Train loss: 1.1307 | Test loss: 96.1740, Test acc: 55.0481

Train loss: 0.9947 | Test loss: 93.3316, Test acc: 56.5591

Train loss: 0.9425 | Test loss: 94.8153, Test acc: 56.9025

Train loss: 0.8935 | Test loss: 97.1063, Test acc: 56.4904

Train loss: 0.8401 | Test loss: 105.1270, Test acc: 54.8764

Train loss: 0.7750 | Test loss: 115.5620, Test acc: 53.9492

Train loss: 0.7091 | Test loss: 130.1849, Test acc: 52.4725

Train loss: 0.6582 | Test loss: 151.1048, Test acc: 48.2143

Train loss: 0.6123 | Test loss: 173.4201, Test acc: 48.2486

Train loss: 0.5765 | Test loss: 169.9090, Test acc: 48.4890
Learning Rate: 0.001, Test Accuracy: 48.4890
Train time on cuda:0: 10.805 seconds
91


  0%|          | 0/10 [00:00<?, ?it/s]


Train loss: 1.2226 | Test loss: 102.7702, Test acc: 51.8201

Train loss: 1.1005 | Test loss: 101.3611, Test acc: 53.1593

Train loss: 1.0627 | Test loss: 99.2265, Test acc: 53.6058

Train loss: 1.0423 | Test loss: 98.1471, Test acc: 56.1813

Train loss: 1.0268 | Test loss: 96.1343, Test acc: 57.5206

Train loss: 1.0215 | Test loss: 96.3325, Test acc: 57.1429

Train loss: 1.0113 | Test loss: 96.1797, Test acc: 55.8036

Train loss: 1.0124 | Test loss: 93.7814, Test acc: 58.8599

Train loss: 1.0070 | Test loss: 96.3491, Test acc: 53.7775

Train loss: 0.9934 | Test loss: 94.4929, Test acc: 57.8297
Learning Rate: 0.01, Test Accuracy: 57.8297
Train time on cuda:0: 10.968 seconds
91


  0%|          | 0/10 [00:00<?, ?it/s]


Train loss: 28.0848 | Test loss: 139.6654, Test acc: 33.1044

Train loss: 1.5377 | Test loss: 139.7317, Test acc: 33.1044

Train loss: 1.5380 | Test loss: 139.7595, Test acc: 33.1044

Train loss: 1.5381 | Test loss: 139.7738, Test acc: 33.1044

Train loss: 1.5382 | Test loss: 139.7820, Test acc: 33.1044

Train loss: 1.5382 | Test loss: 139.7869, Test acc: 33.1044

Train loss: 1.5383 | Test loss: 139.7900, Test acc: 33.1044

Train loss: 1.5383 | Test loss: 139.7921, Test acc: 33.1044

Train loss: 1.5383 | Test loss: 139.7934, Test acc: 33.1044

Train loss: 1.5383 | Test loss: 139.7943, Test acc: 33.1044
Learning Rate: 0.1, Test Accuracy: 33.1044
Train time on cuda:0: 11.196 seconds


### 1.2 Testing Kernel size

1.2.1 Kernel size 2

In [None]:
num_classes =5
random.seed(302)
torch.cuda.manual_seed(302)
torch.manual_seed(302)
#create vector of labels 0,4

class CNNMulticlass(nn.Module):
    def __init__(self, vocab_size, embedding_dim, embedding_matrix, max_length):
        super(CNNMulticlass, self).__init__()
        #Embedding layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = False #keeps original embedding matrix

        #two convolutional layers and 2 max pool layers like paper[1]
        self.conv1 = nn.Conv1d(embedding_dim, 256, kernel_size=2)
        self.pool1 = nn.MaxPool1d(5)
        self.conv2 = nn.Conv1d(256, 256, kernel_size=2)
        self.pool2 = nn.MaxPool1d(5)
        #flattening for fully connected layers
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(256 * ((max_length - 4) // 25) , 256)
        self.fc2 = nn.Linear(256, num_classes)
        #softmax activation for final output
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
      #pass input through embedding layer
        x = self.embedding(x)
        #need to rearrange for conv layer
        x = x.permute(0, 2, 1)
        #RelU activations with conv and max pool layers
        x = self.pool1(nn.functional.relu(self.conv1(x)))
        x = self.pool2(nn.functional.relu(self.conv2(x)))
        x = self.flatten(x)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)

        return x


model_multiclass = CNNMulticlass(vocab_size, 100, embedding_matrix, max_length).to(device)
optimizer = torch.optim.Adam(model_multiclass.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

from tqdm.auto import tqdm
print(len(test_loader))
train_time_start_on_cpu = timer()

epochs = 10
for epoch in tqdm(range(epochs)):
    train_loss = 0
    for batch, (inputs, labels) in enumerate(train_loader):
        model_multiclass.train()
        # Forward pass'=
        #print(inputs)
        y_pred = model_multiclass(inputs).to(device)
        #print(y_pred.softmax(dim=1))
        #Calculate loss
        labels = labels.long() -1
        #print(labels)
        loss = criterion(y_pred, labels)
        train_loss += loss
        #optimizer zero-grad
        optimizer.zero_grad()
        #loss backward
        loss.backward()
        #step
        optimizer.step()

    train_loss /= len(train_loader)

    ### Testing
    test_loss, test_acc = 0, 0
    model_multiclass.eval()
    with torch.inference_mode():
        for X_test_tensor, y_test_tensor in test_loader:
            #forward pass
            test_pred = model_multiclass(X_test_tensor).to(device)
            #print(y_test_tensor -1)[0,4]
            #calculate loss
            y_test_tensor = y_test_tensor-1 #.Long()
            y_test_tensor = y_test_tensor.to(device)
            test_loss += criterion(test_pred, y_test_tensor)
            #calculate accuracy
            y_preds = test_pred.argmax(dim=1)
            #print(y_preds)
            test_acc += accuracy_fn(y_true = y_test_tensor, y_pred = y_preds)
        # Calculate the test loss average per batch
        test_loss /= len(test_loader)

        # Calculate the test acc average per batch
        test_acc /= len(test_loader)



print(f"\nTrain loss: {train_loss:.4f} | Test loss: {test_loss:.4f}, Test acc: {test_acc:.4f}")
train_time_end_on_cpu = timer()
total_train_time_model_multiclass = print_train_time(start = train_time_start_on_cpu,
                                              end = train_time_end_on_cpu,
                                              device=str(next(model_multiclass.parameters()).device))

91


  0%|          | 0/10 [00:00<?, ?it/s]


Train loss: 1.0032 | Test loss: 1.0345, Test acc: 58.1731
Train time on cuda:0: 11.969 seconds


1.2.2 Kernel size 4

In [None]:
random.seed(302)
torch.cuda.manual_seed(302)
torch.manual_seed(302)
class CNNMulticlass2(nn.Module):
    def __init__(self, vocab_size, embedding_dim, embedding_matrix, max_length):
        super(CNNMulticlass2, self).__init__()
        # Embedding layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = False  # Keeps the original embedding matrix

        # Two convolutional layers and 2 max pool layers
        self.conv1 = nn.Conv1d(embedding_dim, 256, kernel_size=4)  # Updated kernel size to 5
        self.pool1 = nn.MaxPool1d(5)
        self.conv2 = nn.Conv1d(256, 256, kernel_size=4)  # Updated kernel size to 5
        self.pool2 = nn.MaxPool1d(5)

        # Flattening for fully connected layers
        self.flatten = nn.Flatten()

        # Calculate the input size for the first fully connected layer
        conv_output_size = 256 * ((max_length -4) // 25) # Adjust calculation based on the new kernel sizes

        self.fc1 = nn.Linear(conv_output_size, 256)
        self.fc2 = nn.Linear(256, num_classes)
        # Softmax activation for final output
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        # Pass input through embedding layer
        x = self.embedding(x)
        # Need to rearrange for conv layer
        x = x.permute(0, 2, 1)
        # ReLU activations with conv and max pool layers
        x = self.pool1(nn.functional.relu(self.conv1(x)))
        x = self.pool2(nn.functional.relu(self.conv2(x)))
        x = self.flatten(x)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)

        return x

from tqdm.auto import tqdm

model_multiclass2 = CNNMulticlass2(vocab_size, 100, embedding_matrix, max_length).to(device)
optimizer = torch.optim.Adam(model_multiclass2.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
train_time_start_on_cpu = timer()

epochs = 10
for epoch in tqdm(range(epochs)):
    train_loss = 0
    for batch, (inputs, labels) in enumerate(train_loader):
        model_multiclass2.train()
        # Forward pass
        y_pred = model_multiclass2(inputs).to(device)
        #Calculate loss
        labels = labels.long() -1
        loss = criterion(y_pred, labels)
        train_loss += loss
        #optimizer zero-grad
        optimizer.zero_grad()
        #loss backward
        loss.backward()
        #step
        optimizer.step()

    train_loss /= len(train_loader)

    ### Testing
    test_loss, test_acc = 0, 0
    model_multiclass2.eval()
    with torch.inference_mode():
        for X_test_tensor, y_test_tensor in test_loader:
            #forward pass
            test_pred = model_multiclass2(X_test_tensor).to(device)
            #calculate loss
            y_test_tensor = y_test_tensor-1 #.Long()
            y_test_tensor = y_test_tensor.to(device)
            test_loss += criterion(test_pred, y_test_tensor)
            #calculate accuracy
            y_preds = test_pred.argmax(dim=1)
            #print(y_preds)
            test_acc += accuracy_fn(y_true = y_test_tensor, y_pred = y_preds)
        # Calculate the test loss average per batch
        test_loss /= len(test_loader)

        # Calculate the test acc average per batch
        test_acc /= len(test_loader)


print(f"Test acc: {test_acc:.4f}")
train_time_end_on_cpu = timer()
total_train_time_model_multiclass2 = print_train_time(start = train_time_start_on_cpu,
                                              end = train_time_end_on_cpu,
                                              device=str(next(model_multiclass2.parameters()).device))


  0%|          | 0/10 [00:00<?, ?it/s]

Test acc: 54.4299
Train time on cuda:0: 11.022 seconds


1.2.3 Kernel size 5

In [None]:
torch.manual_seed(302)
np.random.seed(302)
random.seed(302)
class CNNMulticlass3(nn.Module):
    def __init__(self, vocab_size, embedding_dim, embedding_matrix, max_length):
        super(CNNMulticlass3, self).__init__()
        # Embedding layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = False  # Keeps the original embedding matrix

        # Two convolutional layers and 2 max pool layers with kernel size 5
        self.conv1 = nn.Conv1d(embedding_dim, 256, kernel_size=5)  # Updated kernel size to 5
        self.pool1 = nn.MaxPool1d(5)
        self.conv2 = nn.Conv1d(256, 256, kernel_size=5)  # Updated kernel size to 5
        self.pool2 = nn.MaxPool1d(5)

        # Flattening for fully connected layers
        self.flatten = nn.Flatten()

        # Calculate the input size for the first fully connected layer based on the new kernel sizes
        # Adjust the calculation according to the changes in the convolution and pooling layers
        conv_output_size = self._get_conv_output_size(embedding_dim, max_length)  # Assuming the stride for the pooling is the default 5

        self.fc1 = nn.Linear(conv_output_size, 256)
        self.fc2 = nn.Linear(256, num_classes)
        # Softmax activation for final output
        self.softmax = nn.Softmax(dim=1)

    def _get_conv_output_size(self, embedding_dim, max_length):
        # Assuming max_length is the sequence length
        x = torch.zeros(1, embedding_dim, max_length)  # Create a tensor with batch size 1
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        return x.view(1, -1).size(1)

    def forward(self, x):
        # Pass input through embedding layer
        x = self.embedding(x)
        # Need to rearrange for conv layer
        x = x.permute(0, 2, 1)
        # ReLU activations with conv and max pool layers
        x = self.pool1(nn.functional.relu(self.conv1(x)))
        x = self.pool2(nn.functional.relu(self.conv2(x)))
        x = self.flatten(x)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)

        return x

from tqdm.auto import tqdm

model_multiclass3 = CNNMulticlass3(vocab_size, 100, embedding_matrix, max_length).to(device)
optimizer = torch.optim.Adam(model_multiclass3.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
train_time_start_on_cpu = timer()

epochs = 10
for epoch in tqdm(range(epochs)):
    train_loss = 0
    for batch, (inputs, labels) in enumerate(train_loader):
        model_multiclass3.train()
        # Forward pass
        y_pred = model_multiclass3(inputs).to(device)
        #Calculate loss
        labels = labels.long() -1
        loss = criterion(y_pred, labels)
        train_loss += loss
        #optimizer zero-grad
        optimizer.zero_grad()
        #loss backward
        loss.backward()
        #step
        optimizer.step()

    train_loss /= len(train_loader)

    ### Testing
    test_loss, test_acc = 0, 0
    model_multiclass3.eval()
    with torch.inference_mode():
        for X_test_tensor, y_test_tensor in test_loader:
            #forward pass
            test_pred = model_multiclass3(X_test_tensor).to(device)
            #calculate loss
            y_test_tensor = y_test_tensor-1 #.Long()
            y_test_tensor = y_test_tensor.to(device)
            test_loss += criterion(test_pred, y_test_tensor)
            #calculate accuracy
            y_preds = test_pred.argmax(dim=1)
            #print(y_preds)
            test_acc += accuracy_fn(y_true = y_test_tensor, y_pred = y_preds)
        # Calculate the test loss average per batch
        test_loss /= len(test_loader)

        # Calculate the test acc average per batch
        test_acc /= len(test_loader)


print(f"Test acc: {test_acc:.4f}")
train_time_end_on_cpu = timer()
total_train_time_model_multiclass3 = print_train_time(start = train_time_start_on_cpu,
                                              end = train_time_end_on_cpu,
                                              device=str(next(model_multiclass3.parameters()).device))

  0%|          | 0/10 [00:00<?, ?it/s]

Test acc: 53.1593
Train time on cuda:0: 11.670 seconds
