In [87]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset

In [88]:
df = pd.read_csv('../../code/data/cleaned_pokemon.csv')

label_encoder = LabelEncoder()
'''
type1 and type2 are categories and they overlap
and thus the encodings must line up.
'''
df['type1'] = label_encoder.fit_transform(df['type1'])
df['type2'] = label_encoder.fit_transform(df['type2'])

In [89]:
df.head()

Unnamed: 0,attack,defense,hp,sp_attack,sp_defense,speed,base_total,type1,type2
0,49,49,45,65,65,45,318,9,13
1,62,63,60,80,80,60,405,9,13
2,100,123,80,122,120,80,625,9,13
3,52,43,39,60,50,65,309,6,18
4,64,58,58,80,65,80,405,6,18


In [90]:
df.dtypes

attack        int64
defense       int64
hp            int64
sp_attack     int64
sp_defense    int64
speed         int64
base_total    int64
type1         int64
type2         int64
dtype: object

In [91]:
'''
Datatypes are becoming an issue so lets convert to float 32 for ease of training purposes
'''

X = torch.tensor(df[['attack', 'defense', 'hp', 'sp_attack', 'sp_defense', 'speed', 'base_total']].values, dtype=torch.float32)
y = torch.tensor(df[['type1', 'type2']].values, dtype=torch.float32)

In [92]:
# Create a TensorDataset
dataset = TensorDataset(X, y)

# Define the batch size for your DataLoader
batch_size = 32  # Adjust this value based on your specific needs

# Create a DataLoader
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

The neural net

In [93]:
import torch.nn as nn
import torch.optim as optim

In [94]:
class MultiLabelClassifier(nn.Module):
    def __init__(self, input_dim):
        super(MultiLabelClassifier, self).__init__()
        self.fc = nn.Linear(input_dim, 2)  # 2 output units for two categories

    def forward(self, x):
        x = self.fc(x)
        return x

In [98]:

# # Sample DataFrame
# data = {
#     'attack': [80, 70, 90, 60, 75],
#     'defense': [70, 80, 65, 75, 85],
#     'hp': [60, 75, 80, 70, 65],
#     'sp_attack': [100, 90, 110, 95, 105],
#     'sp_defense': [80, 85, 75, 90, 70],
#     'speed': [70, 65, 80, 85, 95],
#     'base_total': [510, 500, 600, 490, 530],
#     'type1': ['Water', 'Fire', 'Grass', 'Electric', 'Rock'],
#     'type2': ['Ground', 'None', 'Poison', 'Flying', 'None']
# }

# df = pd.DataFrame(data)

# # Define your input features and target variables
# X = torch.tensor(df[['attack', 'defense', 'hp', 'sp_attack', 'sp_defense', 'speed', 'base_total']].values, dtype=torch.float32)

# # For multi-label classification, you can keep the target variables as integers
# y = torch.tensor(df[['type1', 'type2']].values, dtype=torch.int64)

# Split your data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# validation sets
# Split the temporary set into validation and test sets
X_val, X_val_test, y_val, y_val_test = train_test_split(X, y, test_size=0.5, random_state=73)

# Define a custom neural network model
class MultiLabelClassifier(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MultiLabelClassifier, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = self.fc(x)
        return x

# Create an instance of the model
input_dim = X.shape[1]  # Number of input features
output_dim = y.shape[1]  # Number of target variables
model = MultiLabelClassifier(input_dim, output_dim)

# Define a loss function and an optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for multi-label classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Create DataLoaders for training and testing
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)
val_data = TensorDataset(X_val, y_val)
batch_size = 32

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_data, batch_size=batch_size)

# Initialize variables for early stopping
best_val_loss = float('inf')  # Set to positive infinity initially
patience = 5  # Number of epochs to wait for improvement
early_stopping_counter = 0  # Counter for patience

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0

    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
        # Validation phase
    model.eval()
    with torch.no_grad():
        val_loss = 0.0
        for batch in val_loader:
            inputs, labels = batch
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
        val_loss /= len(val_loader)
        
    print(f"Epoch [{epoch + 1}/{num_epochs}] - Validation Loss: {val_loss:.4f}")
    
    # Check for early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stopping_counter = 0
    else:
        early_stopping_counter += 1
    
    if early_stopping_counter >= patience:
        print(f"Early stopping after {patience} epochs without improvement.")
        break  # Exit the training loop

    print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader)}")

# After training, you can evaluate the model on the test data and make predictions.
model.eval()
with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = model(inputs)
        # Perform evaluation or make predictions


Epoch [1/1000] - Validation Loss: 617.0262
Epoch 1, Loss: 836.8729766845703
Epoch [2/1000] - Validation Loss: 215.5016
Epoch 2, Loss: 412.2348518371582
Epoch [3/1000] - Validation Loss: 164.7538
Epoch 3, Loss: 162.99139518737792
Epoch [4/1000] - Validation Loss: 154.2097
Epoch 4, Loss: 152.1987049102783
Epoch [5/1000] - Validation Loss: 150.2054
Epoch 5, Loss: 144.42448196411132
Epoch [6/1000] - Validation Loss: 146.0616
Epoch 6, Loss: 141.0264965057373
Epoch [7/1000] - Validation Loss: 141.8393
Epoch 7, Loss: 137.1287063598633
Epoch [8/1000] - Validation Loss: 137.3720
Epoch 8, Loss: 133.36943473815919
Epoch [9/1000] - Validation Loss: 132.6318
Epoch 9, Loss: 129.54101371765137
Epoch [10/1000] - Validation Loss: 127.8958
Epoch 10, Loss: 124.46048278808594
Epoch [11/1000] - Validation Loss: 122.9370
Epoch 11, Loss: 119.68153266906738
Epoch [12/1000] - Validation Loss: 118.1330
Epoch 12, Loss: 114.90026206970215
Epoch [13/1000] - Validation Loss: 112.9648
Epoch 13, Loss: 110.63207664489

In [100]:
# Validation

# Assuming you have your trained model and a validation dataset loaded
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

# Set the model to evaluation mode (important to turn off dropout, batch normalization, etc.)
model.eval()

correct = 0
total = 0

with torch.no_grad():
    for data in val_loader:
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
# have to figure out the datatypes thing
print(f'Validation Accuracy: {accuracy:.2f}%')

RuntimeError: The size of tensor a (32) must match the size of tensor b (2) at non-singleton dimension 1