In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

datos = pd.read_feather("datos.feather")
columnas_a_eliminar = ["released_year", "released_month", "released_day", "track_name", "artist(s)_name"]
data = datos.drop(columnas_a_eliminar, axis=1)

data = pd.get_dummies(data)

X = data.drop("streams", axis=1).values
y = data["streams"].values.reshape(-1, 1)

scaler_X = StandardScaler()
scaler_y = StandardScaler()
X = scaler_X.fit_transform(X)
y = scaler_y.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

class CustomDataset(Dataset):
    def __init__(self, predictores, respuesta):
        self.predictores = torch.tensor(predictores, dtype=torch.float32)
        self.respuesta = torch.tensor(respuesta, dtype=torch.float32)
    
    def __len__(self):
        return len(self.respuesta)
    
    def __getitem__(self, idx):
        return self.predictores[idx], self.respuesta[idx]

train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Definición del Modelo
class RegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 1)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.fc3(x)
        return x

input_dim = X_train.shape[1]
model = RegressionModel(input_dim)

# Entrenamiento del Modelo
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 300

for epoch in range(num_epochs):
    model.train()
    for predictores, respuesta in train_loader:
        optimizer.zero_grad()
        outputs = model(predictores)
        loss = criterion(outputs, respuesta)
        loss.backward()
        optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")

model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for predictores, respuesta in test_loader:
        outputs = model(predictores)
        y_true.extend(respuesta.numpy())
        y_pred.extend(outputs.numpy())

y_true = scaler_y.inverse_transform(y_true)
y_pred = scaler_y.inverse_transform(y_pred)

r2 = r2_score(y_true, y_pred)
print(f"R^2 en el conjunto de prueba: {r2}")


Epoch 10/300, Loss: 0.13202418386936188
Epoch 20/300, Loss: 0.2934790849685669
Epoch 30/300, Loss: 0.16379229724407196
Epoch 40/300, Loss: 0.09083058685064316
Epoch 50/300, Loss: 0.11154761910438538
Epoch 60/300, Loss: 0.10939120501279831
Epoch 70/300, Loss: 0.09772422164678574
Epoch 80/300, Loss: 0.09612202644348145
Epoch 90/300, Loss: 0.11120454221963882
Epoch 100/300, Loss: 0.2430298924446106
Epoch 110/300, Loss: 0.07989563792943954
Epoch 120/300, Loss: 0.12170503288507462
Epoch 130/300, Loss: 0.1595032662153244
Epoch 140/300, Loss: 0.08169477432966232
Epoch 150/300, Loss: 0.04864491894841194
Epoch 160/300, Loss: 0.10377544164657593
Epoch 170/300, Loss: 0.05672154948115349
Epoch 180/300, Loss: 0.07346315681934357
Epoch 190/300, Loss: 0.17206841707229614
Epoch 200/300, Loss: 0.06297651678323746
Epoch 210/300, Loss: 0.027060816064476967
Epoch 220/300, Loss: 0.09915703535079956
Epoch 230/300, Loss: 0.05215732380747795
Epoch 240/300, Loss: 0.05722670629620552
Epoch 250/300, Loss: 0.0575