In [11]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch import nn, optim
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, f1_score

In [12]:
# Cargar los datasets
train_df = pd.read_csv('ccmusic/train/features.csv')
test_df = pd.read_csv('ccmusic/test/features.csv')
validation_df = pd.read_csv('ccmusic/validation/features.csv')

# Eliminar columnas no necesarias
train_df.drop(["audio_file"], axis=1, inplace=True)
test_df.drop(["audio_file"], axis=1, inplace=True)
validation_df.drop(["audio_file"], axis=1, inplace=True)
# Codificar las etiquetas
label_encoder = LabelEncoder()
train_df['label'] = label_encoder.fit_transform(train_df['label'])
test_df['label'] = label_encoder.transform(test_df['label'])
validation_df['label'] = label_encoder.transform(validation_df['label'])

# Separar las características y las etiquetas
X_train = train_df.drop('label', axis=1).values
y_train = train_df['label'].values
X_test = test_df.drop('label', axis=1).values
y_test = test_df['label'].values
X_val = validation_df.drop('label', axis=1).values
y_val = validation_df['label'].values

# Normalizar los datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

# Convertir a tensores de PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

# Crear DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [14]:
# Definición del modelo
class Classifier(nn.Module):
    def __init__(self, num_features, num_classes):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(num_features, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

num_features = X_train.shape[1]
num_classes = len(np.unique(y_train))
model = Classifier(num_features, num_classes)
print(model)

# Configuración de la función de pérdida y el optimizador
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Classifier(
  (fc1): Linear(in_features=21, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=2, bias=True)
)


In [15]:
# Función de entrenamiento con early stopping
def train_model(model, train_loader, val_loader, optimizer, criterion, num_epochs, patience=5):
    best_loss = float('inf')
    patience_counter = 0

    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        print(f'Epoch {epoch + 1}, Training Loss: {loss.item()}, Validation Loss: {val_loss}')

        if val_loss < best_loss:
            best_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
        if patience_counter >= patience:
            print('Stopping early due to no improvement')
            break

train_model(model, train_loader, val_loader, optimizer, criterion, num_epochs=50)

# Cargar el mejor modelo para evaluación
model.load_state_dict(torch.load('best_model.pth'))

Epoch 1, Training Loss: 0.19214338064193726, Validation Loss: 0.24798855185508728
Epoch 2, Training Loss: 0.09617410600185394, Validation Loss: 0.1489162395397822
Epoch 3, Training Loss: 0.06668626517057419, Validation Loss: 0.125129667421182
Epoch 4, Training Loss: 0.06241731345653534, Validation Loss: 0.11546906580527623
Epoch 5, Training Loss: 0.06514333933591843, Validation Loss: 0.10933728764454524
Epoch 6, Training Loss: 0.061320483684539795, Validation Loss: 0.10551605621973674
Epoch 7, Training Loss: 0.07443602383136749, Validation Loss: 0.10577847560246785
Epoch 8, Training Loss: 0.05509546771645546, Validation Loss: 0.09459401667118073
Epoch 9, Training Loss: 0.048574186861515045, Validation Loss: 0.09193692977229755
Epoch 10, Training Loss: 0.052683793008327484, Validation Loss: 0.09190963084499042
Epoch 11, Training Loss: 0.040546663105487823, Validation Loss: 0.08386822541554768
Epoch 12, Training Loss: 0.0399075448513031, Validation Loss: 0.08391079927484195
Epoch 13, Tra

<All keys matched successfully>

In [16]:
def evaluate_model(loader):
    model.eval()  # Poner el modelo en modo evaluación
    predictions, labels_list = [], []

    with torch.no_grad():
        for inputs, labels in loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            predictions.extend(predicted.cpu().numpy())  # Guarda las predicciones para calcular el F1
            labels_list.extend(labels.cpu().numpy())  # Guarda las etiquetas verdaderas

    accuracy = accuracy_score(labels_list, predictions) * 100
    f1 = f1_score(labels_list, predictions, average='weighted')  # Puedes cambiar 'weighted' por 'macro' o 'micro' según tus necesidades

    print(f'Accuracy on test set: {accuracy:.2f}%')
    print(f'F1 Score on test set: {f1:.2f}')

# Llamamos a la función de evaluación con el conjunto de prueba
evaluate_model(test_loader)

Accuracy on test set: 98.26%
F1 Score on test set: 0.98
