# Dataset

In [1]:
import pandas as pd
import torch
from torch import nn
import numpy as np
from sklearn.model_selection import train_test_split

# Creating a dataframe
df = pd.read_csv('data/pokemons.csv')
df = df.drop(['Unnamed: 0', '#', 'Name', 'Type 2', 'Total'], axis=1)
df_numeric = df.copy()

# Creating the dictionary of classes
c = list(df['Type 1'].unique())
classes = torch.tensor([x for x in range(len(c))])

# Creating the numeric dataset
for idx, type in enumerate(c):
    df_numeric.loc[df['Type 1'] == type, 'Type 1'] = idx

# Separating the dataset
y = np.array(df_numeric['Type 1'], dtype=float)
df = df.drop('Type 1', axis=1)
df = (df - df.min()) / (df.max() - df.min())
matrix = df.to_numpy()
X = torch.from_numpy(matrix).type(torch.float)
y = torch.from_numpy(y).type(torch.LongTensor)

# Separating in train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

display(df.head(5))
display(df_numeric.head(5))


Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,0.173228,0.237838,0.179592,0.298913,0.195652,0.205128
1,0.232283,0.308108,0.236735,0.380435,0.26087,0.282051
2,0.311024,0.416216,0.318367,0.48913,0.347826,0.384615
3,0.311024,0.513514,0.481633,0.608696,0.434783,0.384615
4,0.149606,0.254054,0.155102,0.271739,0.130435,0.307692


Unnamed: 0,Type 1,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,0,45,49,49,65,65,45
1,0,60,62,63,80,80,60
2,0,80,82,83,100,100,80
3,0,80,100,123,122,120,80
4,1,39,52,43,60,50,65


In [2]:
# Checking if the GPU is availible to run the model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device availible: ', device)

device availible:  cpu


# Instaciating the model

In [3]:
# Model
class PokemonClassifier(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.sequence = nn.Sequential(
            nn.Linear(6, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 18),
        )
    
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        return self.sequence(X)

# Instaciating the model
model = PokemonClassifier()
display(model)

PokemonClassifier(
  (sequence): Sequential(
    (0): Linear(in_features=6, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=18, bias=True)
  )
)

In [4]:
# Calculate the accuracy
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

# Loss function
loss_fn = nn.CrossEntropyLoss()

# Optimizer
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.75)

# Write a training and evaluation loop for the model
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Training the model

In [5]:
epochs = 15000

for epoch in range(epochs):
    ### Training
    model.train()

    # 1. Forward
    pred = model(X_train).squeeze()

    # 2. Loss
    loss = loss_fn(pred, y_train)

    # 3. Backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


    ### Testing
    model.eval()
    with torch.inference_mode():
        test_preds = torch.softmax(model(X_test), dim=1).argmax(dim=1)
        test_acc = accuracy_fn(y_test, test_preds)

    if epoch % 1000 == 0 or epoch == epochs:
        print('Epoch: {:<5d}   | Loss: {:<.5f}   | Acc: {:.2f}%'.format(epoch, loss, test_acc))

Epoch: 0       | Loss: 2.88397   | Acc: 11.72%
Epoch: 1000    | Loss: 2.07831   | Acc: 18.83%
Epoch: 2000    | Loss: 1.70542   | Acc: 17.57%
Epoch: 3000    | Loss: 1.24862   | Acc: 15.06%
Epoch: 4000    | Loss: 0.74653   | Acc: 22.59%
Epoch: 5000    | Loss: 0.46869   | Acc: 19.25%
Epoch: 6000    | Loss: 0.12576   | Acc: 20.08%
Epoch: 7000    | Loss: 0.08391   | Acc: 20.50%
Epoch: 8000    | Loss: 0.06997   | Acc: 20.50%
Epoch: 9000    | Loss: 0.06312   | Acc: 19.25%
Epoch: 10000   | Loss: 0.06020   | Acc: 20.08%
Epoch: 11000   | Loss: 0.05741   | Acc: 19.67%
Epoch: 12000   | Loss: 0.05743   | Acc: 19.67%
Epoch: 13000   | Loss: 0.05508   | Acc: 19.25%
Epoch: 14000   | Loss: 0.05460   | Acc: 19.25%


# Testing after train

In [7]:
model.eval()
with torch.inference_mode():
    pred = torch.softmax(model(X_test), dim=1).argmax(dim=1)

print(f'Predictions:   {pred[:25].tolist()}')
print(f'Real:          {y_test[:25].tolist()}')

Predictions:   [4, 0, 9, 2, 7, 12, 0, 1, 1, 14, 2, 2, 1, 0, 2, 3, 1, 13, 7, 6, 4, 1, 7, 13, 2]
Real:          [6, 4, 1, 3, 0, 10, 1, 15, 2, 9, 0, 6, 9, 11, 5, 7, 1, 13, 3, 6, 4, 1, 3, 0, 14]
