In [1]:
# Print available filepaths for this competition
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [2]:
# Toggle off warnigns
import warnings
# Data frames and manipulation 
import pandas as pd
# Basic plots
from matplotlib import pyplot as plt 
# Advanced plots
import seaborn as sns
# Math operations 
import numpy as np
# Split train dataset 
from sklearn.model_selection import train_test_split 
# Test model performance 
from sklearn.metrics import accuracy_score
# Light Gradient Boosting Model Classifier
from lightgbm import LGBMClassifier
# Decicion Tree Classifier Model
from sklearn.tree import DecisionTreeClassifier
# Random Forest Model 
from sklearn.ensemble import RandomForestClassifier
# Extreme gradient boosting model
from xgboost import XGBClassifier
# Categorical Boosting Model 
from catboost import CatBoostClassifier, Pool
# KFold (Multiple model runs)
from sklearn.model_selection import StratifiedKFold
import torch
import torch.nn as nn

# Filter out warnings
warnings.filterwarnings('ignore')

## Data Read and Processing

In [3]:
train = pd.read_csv('./playground-series-s4e6/train.csv')
test = pd.read_csv('./playground-series-s4e6/test.csv')

In [4]:
train

Unnamed: 0,id,Marital status,Application mode,Application order,Course,Daytime/evening attendance,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,...,Curricular units 2nd sem (credited),Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,Target
0,0,1,1,1,9238,1,1,126.0,1,1,...,0,6,7,6,12.428571,0,11.1,0.6,2.02,Graduate
1,1,1,17,1,9238,1,1,125.0,1,19,...,0,6,9,0,0.000000,0,11.1,0.6,2.02,Dropout
2,2,1,17,2,9254,1,1,137.0,1,3,...,0,6,0,0,0.000000,0,16.2,0.3,-0.92,Dropout
3,3,1,1,3,9500,1,1,131.0,1,19,...,0,8,11,7,12.820000,0,11.1,0.6,2.02,Enrolled
4,4,1,1,2,9500,1,1,132.0,1,19,...,0,7,12,6,12.933333,0,7.6,2.6,0.32,Graduate
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76513,76513,1,17,1,9254,1,1,121.0,1,19,...,0,6,8,5,10.600000,0,13.9,-0.3,0.79,Graduate
76514,76514,1,1,6,9254,1,1,125.0,1,1,...,0,6,9,6,13.875000,0,9.4,-0.8,-3.12,Graduate
76515,76515,5,17,1,9085,1,1,138.0,1,37,...,0,5,8,5,11.400000,1,9.4,-0.8,-3.12,Enrolled
76516,76516,1,1,3,9070,1,1,136.0,1,38,...,0,6,0,0,0.000000,0,7.6,2.6,0.32,Dropout


In [5]:
# Encode the target variable to numerical for further analysis
train['Target'] = train['Target'].map({'Graduate': 0, 'Dropout': 1, 'Enrolled': 2})

In [6]:
# Replace spaces in column names in he train and test sets with an underscore
train.columns = train.columns.str.replace(' ', '_', regex=True)
test.columns = test.columns.str.replace(' ', '_', regex=True)

In [7]:
# Seperate target variable
X = train.drop(columns=['Target'])
y = train['Target']

In [27]:
X_tensor = torch.tensor(X.values[:,1:], dtype=torch.float32)
print(X_tensor.shape)
y_tensor = torch.tensor(y.values)
y_one_hot = nn.functional.one_hot(y_tensor, num_classes=3)
X_train = X_tensor
y_train = y_one_hot.to(torch.float32)
print(y_train.dtype,y_train.shape)

# min_vals, _ = torch.min(X_train, dim=0, keepdim=True)
# max_vals, _ = torch.max(X_train, dim=0, keepdim=True)
# X_train = (X_train - min_vals) / (max_vals - min_vals)

torch.Size([76518, 36])
torch.float32 torch.Size([76518, 3])


In [28]:
print(row_max)
print(row_min)

tensor([[6.0000e+00, 5.3000e+01, 9.0000e+00, 9.9910e+03, 1.0000e+00, 4.3000e+01,
         1.9000e+02, 1.0900e+02, 4.4000e+01, 4.4000e+01, 1.9400e+02, 1.9500e+02,
         1.9000e+02, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00,
         1.0000e+00, 7.0000e+01, 1.0000e+00, 2.0000e+01, 2.6000e+01, 4.5000e+01,
         2.6000e+01, 1.8875e+01, 1.2000e+01, 1.9000e+01, 2.3000e+01, 3.3000e+01,
         2.0000e+01, 1.8000e+01, 1.2000e+01, 1.6200e+01, 3.7000e+00, 3.5100e+00]])
tensor([[ 1.0000,  1.0000,  0.0000, 33.0000,  0.0000,  1.0000, 95.0000,  1.0000,
          1.0000,  1.0000,  0.0000,  0.0000, 95.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000, 17.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  7.6000, -0.8000, -4.0600]])


In [29]:
# normalize all data to range (0,1)
row_max = X_train.max(dim=0,keepdim=True).values
row_min = X_train.min(dim=0,keepdim=True).values
X_train = (X_train-row_min)/(row_max-row_min)

## Build Model and data loader
Previously the model was built upon the nn.transformerencoderlayer. The result was not good. Wnat to try self-implemented transformer structure

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init

In [30]:
# encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
# transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
# src = torch.rand(1,512)
# out = transformer_encoder(src)
# # output = encoder_layer(src)
# print(out.shape)

In [41]:
class ClassifierModel(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(ClassifierModel, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=16, kernel_size=5),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Conv1d(in_channels=16, out_channels=32, kernel_size=5),
            nn.ReLU()
        )
        self.mlp = nn.Linear(in_features=28*32,out_features=3)
        self.softmax = nn.Softmax()
        
        # self.embedding = nn.Linear(input_dim, dim_feedforward)
        # self.norm1 = nn.LayerNorm(dim_feedforward)
        # encoder_layer = nn.TransformerEncoderLayer(d_model=dim_feedforward, nhead=num_heads, dim_feedforward=dim_feedforward, dropout=dropout)
        # self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        # self.norm2 = nn.LayerNorm(dim_feedforward)
        # self.mlp = nn.Linear(dim_feedforward, dim_feedforward)
        # self.fc = nn.Linear(dim_feedforward, num_classes)
        # self.norm3 = nn.LayerNorm(dim_feedforward)
        # self.softmax = nn.Softmax()
        
    def _initialize_weights(self):
        # Initialize embedding layer
        init.xavier_uniform_(self.embedding.weight)
        if self.embedding.bias is not None:
            init.constant_(self.embedding.bias, 0)
        
        # Initialize transformer encoder layers
        for layer in self.transformer_encoder.layers:
            init.xavier_uniform_(layer.self_attention.in_proj_weight)
            init.xavier_uniform_(layer.self_attention.out_proj.weight)
            init.constant_(layer.self_attention.in_proj_bias, 0)
            init.constant_(layer.self_attention.out_proj.bias, 0)
            init.xavier_uniform_(layer.feed_forward[0].weight)
            init.xavier_uniform_(layer.feed_forward[2].weight)
            init.constant_(layer.feed_forward[0].bias, 0)
            init.constant_(layer.feed_forward[2].bias, 0)
        
        # Initialize fully connected layer with smaller values to start close to zero
        init.xavier_uniform_(self.fc.weight, gain=init.calculate_gain('sigmoid'))
        init.constant_(self.fc.bias, 0)
        
    def forward(self, x):
        # x = self.embedding(x)
        
        # x1 = self.norm1(x)
        # x1 = self.transformer_encoder(x1)
        # x1 = x1 + x
        # x2 = self.norm2(x1)
        # x2 = self.mlp(x2)      
        # x2 = x2 + x1
        # x2 = self.norm3(x2)
        # x2 = self.fc(x2)
        # output = self.softmax(x2)
        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        print(x2.shape)
        x3 = self.mlp(x2.view(-1,32*28))
        output = self.softmax(x3)
        return output



In [43]:
device = 'cuda'
input_dim = 36
num_classes = 3
model = ClassifierModel(input_dim, num_classes)
model.to(device)
# summary(model, input_size=(1,37))
src = torch.rand((32,1,36), device = device)
src.to(device)
src.get_device()
output = model(src)
print(output)

torch.Size([32, 32, 28])
tensor([[0.3037, 0.3626, 0.3336],
        [0.3179, 0.3569, 0.3251],
        [0.3330, 0.3378, 0.3292],
        [0.3108, 0.3611, 0.3281],
        [0.3231, 0.3498, 0.3271],
        [0.3203, 0.3546, 0.3252],
        [0.3213, 0.3526, 0.3261],
        [0.3109, 0.3681, 0.3211],
        [0.3157, 0.3648, 0.3195],
        [0.3199, 0.3601, 0.3200],
        [0.3348, 0.3457, 0.3195],
        [0.3140, 0.3696, 0.3164],
        [0.3067, 0.3590, 0.3342],
        [0.3181, 0.3540, 0.3279],
        [0.3129, 0.3589, 0.3282],
        [0.3144, 0.3593, 0.3262],
        [0.3063, 0.3630, 0.3307],
        [0.3069, 0.3677, 0.3254],
        [0.3309, 0.3515, 0.3176],
        [0.3333, 0.3499, 0.3168],
        [0.3386, 0.3541, 0.3073],
        [0.3173, 0.3575, 0.3253],
        [0.3213, 0.3553, 0.3234],
        [0.3187, 0.3495, 0.3318],
        [0.3146, 0.3576, 0.3278],
        [0.3151, 0.3670, 0.3179],
        [0.3187, 0.3555, 0.3258],
        [0.3157, 0.3620, 0.3223],
        [0.3279, 0.3574

In [119]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

batch_size = 4096
print(X_train.dtype, y_train.dtype)
dataset = CustomDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=256, shuffle=True)

train_size = int(0.9*len(dataset))
val_size =  len(dataset)-train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

torch.float32 torch.float32


## Training

In [114]:
from torchinfo import summary
from tqdm.notebook import tqdm
torch.cuda.empty_cache()
device = 'cuda'
input_dim = 37
num_classes = 3

model = TransformerClassifier(input_dim, num_classes, num_heads=4, num_layers=4, dim_feedforward=8, dropout=0.01)
model.to(device)
summary(model, input_size=(2, input_dim))

Layer (type:depth-idx)                        Output Shape              Param #
TransformerClassifier                         [2, 3]                    --
├─Linear: 1-1                                 [2, 8]                    304
├─LayerNorm: 1-2                              [2, 8]                    16
├─TransformerEncoder: 1-3                     [2, 8]                    --
│    └─ModuleList: 2-1                        --                        --
│    │    └─TransformerEncoderLayer: 3-1      [2, 8]                    464
│    │    └─TransformerEncoderLayer: 3-2      [2, 8]                    464
│    │    └─TransformerEncoderLayer: 3-3      [2, 8]                    464
│    │    └─TransformerEncoderLayer: 3-4      [2, 8]                    464
├─LayerNorm: 1-4                              [2, 8]                    16
├─Linear: 1-5                                 [2, 8]                    72
├─LayerNorm: 1-6                              [2, 8]                    16
├─Linear: 1-7  

In [121]:
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

num_epochs = 200
torch.autograd.set_detect_anomaly(True)

model.train()
for epoch in range(num_epochs):    
    running_loss = 0.0
    
    for inputs, targets in tqdm(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
            
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
    # print(running_loss)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

    # model.eval()
    # correct_predictions = 0
    # eval_loss = 0
    # for inputs, targets in tqdm(val_loader):
    #     if len(inputs)<batch_size:
    #         continue
    #     inputs, targets = inputs.to(device), targets.to(device)
    #     outputs = model(inputs)
    #     loss = criterion(outputs, targets)
    #     eval_loss += loss.item() * inputs.size(0)
    #     predicted_classes = torch.argmax(outputs, dim=1)
    #     ground_truth_classes = torch.argmax(targets, dim=1)
    #     correct_predictions += torch.sum(predicted_classes == ground_truth_classes).item()
    # accuracy = correct_predictions / len(val_loader)/batch_size  
    # eval_loss = eval_loss / len(val_loader.dataset)
    # print(f'Accuracy: {accuracy}; Loss: {eval_loss}')

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 1/200, Loss: 0.7217


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 2/200, Loss: 0.7215


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 3/200, Loss: 0.7214


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 4/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 5/200, Loss: 0.7215


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 6/200, Loss: 0.7213


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 7/200, Loss: 0.7214


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 8/200, Loss: 0.7213


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 9/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 10/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 11/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 12/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 13/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 14/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 15/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 16/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 17/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 18/200, Loss: 0.7215


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 19/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 20/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 21/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 22/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 23/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 24/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 25/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 26/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 27/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 28/200, Loss: 0.7213


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 29/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 30/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 31/200, Loss: 0.7213


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 32/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 33/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 34/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 35/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 36/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 37/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 38/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 39/200, Loss: 0.7214


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 40/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 41/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 42/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 43/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 44/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 45/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 46/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 47/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 48/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 49/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 50/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 51/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 52/200, Loss: 0.7213


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 53/200, Loss: 0.7213


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 54/200, Loss: 0.7208


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 55/200, Loss: 0.7208


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 56/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 57/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 58/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 59/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 60/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 61/200, Loss: 0.7213


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 62/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 63/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 64/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 65/200, Loss: 0.7213


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 66/200, Loss: 0.7208


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 67/200, Loss: 0.7208


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 68/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 69/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 70/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 71/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 72/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 73/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 74/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 75/200, Loss: 0.7208


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 76/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 77/200, Loss: 0.7208


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 78/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 79/200, Loss: 0.7208


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 80/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 81/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 82/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 83/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 84/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 85/200, Loss: 0.7208


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 86/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 87/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 88/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 89/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 90/200, Loss: 0.7208


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 91/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 92/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 93/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 94/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 95/200, Loss: 0.7207


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 96/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 97/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 98/200, Loss: 0.7207


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 99/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 100/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 101/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 102/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 103/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 104/200, Loss: 0.7207


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 105/200, Loss: 0.7207


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 106/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 107/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 108/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 109/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 110/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 111/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 112/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 113/200, Loss: 0.7208


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 114/200, Loss: 0.7206


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 115/200, Loss: 0.7208


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 116/200, Loss: 0.7212


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 117/200, Loss: 0.7210


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 118/200, Loss: 0.7211


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 119/200, Loss: 0.7207


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 120/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

Epoch 121/200, Loss: 0.7209


  0%|          | 0/17 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [103]:
val_loss = 0
for inputs, label in tqdm(val_loader):
    output = model(inputs.to(device))
    predicted_classes = torch.argmax(output, dim=1)
    ground_truth_classes = torch.argmax(label, dim=1)
    equal = predicted_classes.cpu() == ground_truth_classes.cpu()
    correct_predictions = torch.sum(equal).item()
    print(f'Accuracy in training set: {correct_predictions}')
    print(inputs.shape)
    loss = criterion(output, label.to('cuda'))
    val_loss+= loss.item() * inputs.size(0)
val_loss = val_loss/len(train_loader.dataset)
print(val_loss)

  0%|          | 0/3 [00:00<?, ?it/s]

Accuracy in training set: 24
torch.Size([32, 37])
Accuracy in training set: 26
torch.Size([32, 37])
Accuracy in training set: 0
torch.Size([1, 37])
0.08812510760294066
