In [227]:
import sys
import os

# Add the parent directory of 'src' to sys.path
sys.path.append(os.path.abspath('../src'))

# Importação de bibliotecas
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from neural_network.utils import one_hot_encode, normalize_data, plot_confusion_matrix, plot_learning_curve_with_accuracy
from neural_network.losses import CategoricalCrossEntropyLoss
from neural_network.layers.dense_layer import DenseLayer
from neural_network.layers.dropout_layer import DropoutLayer
from neural_network.optimizer import GradientDescent

In [228]:
# Carregar o arquivo CSV
data = pd.read_csv('../data/multiclass/online_gaming_behavior.csv')

# Exibir primeiras linhas do dataset
data.head()

Unnamed: 0,PlayerID,Age,Gender,Location,GameGenre,PlayTimeHours,InGamePurchases,GameDifficulty,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked,EngagementLevel
0,9000,43,Male,Other,Strategy,16.271119,0,Medium,6,108,79,25,Medium
1,9001,29,Female,USA,Strategy,5.525961,0,Medium,5,144,11,10,Medium
2,9002,22,Female,USA,Sports,8.223755,0,Easy,16,142,35,41,High
3,9003,35,Male,USA,Action,5.265351,1,Easy,9,85,57,47,Medium
4,9004,33,Male,Europe,Action,15.531945,0,Medium,2,131,95,37,Medium


In [229]:
# Selecionar colunas categóricas
categorical_columns = ['Gender', 'Location', 'GameGenre', 'GameDifficulty', 'EngagementLevel']

# Mapear cada categoria para um número
for column in categorical_columns:
    data[column] = data[column].astype('category').cat.codes

# Obter o número de classes para cada coluna categórica
num_classes = {column: data[column].nunique() for column in categorical_columns}

# Aplicar One-Hot Encoding
for column in categorical_columns:
    one_hot_encoded = one_hot_encode(data[column].values, num_classes[column])

    # Adicionar as colunas one-hot ao dataframe
    for i in range(num_classes[column]):
        data[f"{column}_{i}"] = one_hot_encoded[:, i]

    # Remover a coluna categórica original
    data = data.drop(column, axis=1)

# Exibir o dataset transformado
data.head()

Unnamed: 0,PlayerID,Age,PlayTimeHours,InGamePurchases,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked,Gender_0,Gender_1,...,GameGenre_1,GameGenre_2,GameGenre_3,GameGenre_4,GameDifficulty_0,GameDifficulty_1,GameDifficulty_2,EngagementLevel_0,EngagementLevel_1,EngagementLevel_2
0,9000,43,16.271119,0,6,108,79,25,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
1,9001,29,5.525961,0,5,144,11,10,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
2,9002,22,8.223755,0,16,142,35,41,1.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
3,9003,35,5.265351,1,9,85,57,47,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
4,9004,33,15.531945,0,2,131,95,37,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0


In [230]:
# Definir X (entrada) e y (saída)
X = data.drop(['PlayerID', 'EngagementLevel_0', 'EngagementLevel_1', 'EngagementLevel_2'], axis=1).values
y = one_hot_encode(data[['EngagementLevel_0', 'EngagementLevel_1', 'EngagementLevel_2']].values.argmax(axis=1), 3)

# Normalizar os dados de entrada
X = normalize_data(X)

# Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [231]:
class NeuralNetwork:
    def __init__(self, layers, loss, optimizer):
        self.layers = layers
        self.loss = loss
        self.optimizer = optimizer
        
    def forward(self, X):
        output = X
        for layer in self.layers:
            output = layer.forward(output)
        return output
    
    def backward(self, y_true, y_pred):
        grad = self.loss.backward(y_true, y_pred)
        for layer in reversed(self.layers):
            grad = layer.backward(grad)
    
    def update(self):
        for layer in self.layers:
            params = layer.get_parameters()
            if params:
                grads = layer.get_gradients()
                weights_updated, biases_updated = self.optimizer.update(
                    params['weights'], params['biases'],
                    grads['weights'], grads['biases']
                )
                layer.set_parameters({'weights': weights_updated, 'biases': biases_updated})

In [232]:
# Definir a arquitetura da rede neural
input_size = X_train.shape[1]
output_size = y_train.shape[1]

layers = [
    DenseLayer(input_size=input_size, output_size=36, activation='leaky_relu'),
    DropoutLayer(rate=0.1),
    DenseLayer(input_size=36, output_size=output_size, activation='softmax')
]

# Definir a função de perda e otimizador
loss = CategoricalCrossEntropyLoss()
optimizer = GradientDescent(learning_rate=0.01)

# Criar o modelo
model = NeuralNetwork(layers=layers, loss=loss, optimizer=optimizer)

In [233]:
import time
start_time = time.time()

# Treinar a rede neural
epochs = 1600
for epoch in range(epochs):
    y_pred = model.forward(X_train)
    loss_value = loss.calculate(y_train, y_pred)
    model.backward(y_train, y_pred)
    model.update()

    # Exibir métricas a cada 50 épocas
    if (epoch + 1) % 50 == 0:
        predictions = y_pred.argmax(axis=1)
        y_true = y_train.argmax(axis=1)
        acc = accuracy_score(y_true, predictions)
        print(f"Epoch {epoch + 1}, Loss: {loss_value:.4f}, Accuracy: {acc:.4f}")

elapsed_time = time.time() - start_time
print(f"\nLearning Rate: {optimizer.learning_rate} \nCamadas: {len(model.layers)} \nElapsed Time: {elapsed_time:.2f}s")


Epoch 50, Loss: 1.3517, Accuracy: 0.3954
Epoch 100, Loss: 1.1992, Accuracy: 0.4415
Epoch 150, Loss: 1.1173, Accuracy: 0.4746
Epoch 200, Loss: 1.0593, Accuracy: 0.4998
Epoch 250, Loss: 1.0120, Accuracy: 0.5206
Epoch 300, Loss: 0.9724, Accuracy: 0.5412
Epoch 350, Loss: 0.9400, Accuracy: 0.5629
Epoch 400, Loss: 0.9144, Accuracy: 0.5821
Epoch 450, Loss: 0.8953, Accuracy: 0.6014
Epoch 500, Loss: 0.8823, Accuracy: 0.6176
Epoch 550, Loss: 0.8752, Accuracy: 0.6332
Epoch 600, Loss: 0.8740, Accuracy: 0.6480
Epoch 650, Loss: 0.8786, Accuracy: 0.6589
Epoch 700, Loss: 0.8892, Accuracy: 0.6696
Epoch 750, Loss: 0.9058, Accuracy: 0.6767
Epoch 800, Loss: 0.9283, Accuracy: 0.6845
Epoch 850, Loss: 0.9569, Accuracy: 0.6910
Epoch 900, Loss: 0.9914, Accuracy: 0.6970
Epoch 950, Loss: 1.0317, Accuracy: 0.7010
Epoch 1000, Loss: 1.0781, Accuracy: 0.7056
Epoch 1050, Loss: 1.1302, Accuracy: 0.7086
Epoch 1100, Loss: 1.1885, Accuracy: 0.7105
Epoch 1150, Loss: 1.2531, Accuracy: 0.7118
Epoch 1200, Loss: 1.3242, Accur

In [None]:
# Fazer previsões no conjunto de teste
y_pred_test = model.forward(X_test)
predictions_test = y_pred_test.argmax(axis=1)
y_true_test = y_test.argmax(axis=1)

# Calcular métricas de desempenho
print("\nClassification Report:")
print(classification_report(y_true_test, predictions_test))

# Acurácia
accuracy = accuracy_score(y_true_test, predictions_test)
print(f"Test Accuracy: {accuracy:.4f}")

In [None]:
# Usar a função plot_confusion_matrix
print("Confusion Matrix:")
plot_confusion_matrix(y_test, predictions_test, labels=["Low", "Medium", "High"])

In [None]:
# Usar a função plot_learning_curve
plot_learning_curve_with_accuracy(model, X_train, y_train, accuracy)