# DEEP LEARNING

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np

# Carga de datos
train_data = pd.read_csv(('data/train.csv'))
test_data = pd.read_csv(('data/test.csv'))

In [2]:
# Codificación de usuarios e ítems
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

train_users = torch.tensor(user_encoder.fit_transform(train_data['user']), dtype=torch.long)
train_items = torch.tensor(item_encoder.fit_transform(train_data['item']), dtype=torch.long)
train_ratings = torch.tensor(train_data['rating'].values, dtype=torch.float32)

# Mapeo de nuevos usuarios y items
user_mapping = {label: index for index, label in enumerate(user_encoder.classes_)}
item_mapping = {label: index for index, label in enumerate(item_encoder.classes_)}

In [3]:
def encode_with_mapping(values, mapping):
    return [mapping.get(val, len(mapping)) for val in values]  # Si no existe, asigna un nuevo índice


test_ids = torch.tensor(test_data['ID'].values, dtype=torch.long)
test_users = torch.tensor(encode_with_mapping(test_data['user'], user_mapping), dtype=torch.long)
test_items = torch.tensor(encode_with_mapping(test_data['item'], item_mapping), dtype=torch.long)

In [4]:
# Parámetros del modelo
n_users = len(user_encoder.classes_) + 1  # Añadimos 1 para usuarios desconocidos
n_items = len(item_encoder.classes_) + 1  # Añadimos 1 para ítems desconocidos
embedding_dim = 50

user_embedding = nn.Embedding(n_users, embedding_dim)
item_embedding = nn.Embedding(n_items, embedding_dim)

fc1 = nn.Linear(embedding_dim * 2, 128)
fc2 = nn.Linear(128, 64)
fc3 = nn.Linear(64, 1)

leaky_relu = nn.LeakyReLU(0.01)

criterion = nn.MSELoss()
optimizer = optim.Adam(list(user_embedding.parameters()) + list(item_embedding.parameters()) + list(fc1.parameters()) + list(fc2.parameters()) + list(fc3.parameters()), lr=0.001)

In [5]:
def forward(user, item):
    user_emb = user_embedding(user)
    item_emb = item_embedding(item)
    x = torch.cat([user_emb, item_emb], dim=1)
    x = leaky_relu(fc1(x))
    x = leaky_relu(fc2(x))
    x = fc3(x)
    return x.squeeze()

In [6]:
# Entrenamiento
batch_size = 64
n_epochs = 10

train_dataset = list(zip(train_users, train_items, train_ratings))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(n_epochs):
    epoch_loss = 0
    for users, items, ratings in train_loader:
        optimizer.zero_grad()
        predictions = forward(users, items)
        loss = criterion(predictions, ratings)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f'Epoch {epoch+1}/{n_epochs}, Loss: {epoch_loss/len(train_loader):.4f}')

KeyboardInterrupt: 

In [15]:
# Entrenamiento
model.train()
for epoch in range(n_epochs):
    total_loss = 0
    for batch in train_loader:
        users, items, ratings = batch
        users, items, ratings = users.to(device), items.to(device), ratings.to(device)

        inputs = torch.cat([users.unsqueeze(1).float(), items.unsqueeze(1).float()], dim=1)
        inputs = inputs.unsqueeze(1).repeat(1, 3, 1)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.view(-1), ratings)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f'Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(train_loader):.4f}')

RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[1, 64, 3, 2] to have 3 channels, but got 64 channels instead

In [None]:
# Predicción
model.eval()
predictions = []
with torch.no_grad():
    for i in range(0, len(test_users), batch_size):
        batch_users = test_users[i:i+batch_size]
        batch_items = test_items[i:i+batch_size]
        batch_ids = test_ids[i:i+batch_size]

        outputs = forward(batch_users, batch_items)
        predictions.extend(list(zip(batch_ids.numpy(), outputs.numpy())))

predictions_df = pd.DataFrame(predictions, columns=['ID', 'rating'])
predictions_df['rating'] = predictions_df['rating'].clip(1, 10)
predictions_df.to_csv('predictions.csv', index=False)

print('Predicciones guardadas en predictions.csv')

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np

# Carga de datos
train_data = pd.read_csv(('data/train.csv'))
test_data = pd.read_csv(('data/test.csv'))

# Codificación de usuarios e ítems
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

train_users = torch.tensor(user_encoder.fit_transform(train_data['user']), dtype=torch.long)
train_items = torch.tensor(item_encoder.fit_transform(train_data['item']), dtype=torch.long)
train_ratings = torch.tensor(train_data['rating'].values, dtype=torch.float32)

# Mapeo de nuevos usuarios y items
user_mapping = {label: index for index, label in enumerate(user_encoder.classes_)}
item_mapping = {label: index for index, label in enumerate(item_encoder.classes_)}


def encode_with_mapping(values, mapping):
    return [mapping.get(val, len(mapping)) for val in values]  # Si no existe, asigna un nuevo índice


test_ids = torch.tensor(test_data['ID'].values, dtype=torch.long)
test_users = torch.tensor(encode_with_mapping(test_data['user'], user_mapping), dtype=torch.long)
test_items = torch.tensor(encode_with_mapping(test_data['item'], item_mapping), dtype=torch.long)

# Parámetros del modelo
n_users = len(user_encoder.classes_) + 1  # Añadimos 1 para usuarios desconocidos
n_items = len(item_encoder.classes_) + 1  # Añadimos 1 para ítems desconocidos
embedding_dim = 50

user_embedding = nn.Embedding(n_users, embedding_dim)
item_embedding = nn.Embedding(n_items, embedding_dim)

fc1 = nn.Linear(embedding_dim * 2, 128)
fc2 = nn.Linear(128, 64)
fc3 = nn.Linear(64, 1)

leaky_relu = nn.LeakyReLU(0.01)

criterion = nn.MSELoss()
optimizer = optim.Adam(list(user_embedding.parameters()) + list(item_embedding.parameters()) + list(fc1.parameters()) + list(fc2.parameters()) + list(fc3.parameters()), lr=0.001)


def forward(user, item):
    user_emb = user_embedding(user)
    item_emb = item_embedding(item)
    x = torch.cat([user_emb, item_emb], dim=1)
    x = leaky_relu(fc1(x))
    x = leaky_relu(fc2(x))
    x = fc3(x)
    return x.squeeze()


# Entrenamiento
batch_size = 64
n_epochs = 10

train_dataset = list(zip(train_users, train_items, train_ratings))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(n_epochs):
    epoch_loss = 0
    for users, items, ratings in train_loader:
        optimizer.zero_grad()
        predictions = forward(users, items)
        loss = criterion(predictions, ratings)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f'Epoch {epoch+1}/{n_epochs}, Loss: {epoch_loss/len(train_loader):.4f}')


# Predicción
model.eval()
predictions = []
with torch.no_grad():
    for i in range(0, len(test_users), batch_size):
        batch_users = test_users[i:i+batch_size]
        batch_items = test_items[i:i+batch_size]
        batch_ids = test_ids[i:i+batch_size]

        outputs = forward(batch_users, batch_items)
        predictions.extend(list(zip(batch_ids.numpy(), outputs.numpy())))

predictions_df = pd.DataFrame(predictions, columns=['ID', 'rating'])
predictions_df['rating'] = predictions_df['rating'].clip(1, 10)
predictions_df.to_csv('predictions.csv', index=False)

print('Predicciones guardadas en predictions.csv')


KeyboardInterrupt: 

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np

# Carga de datos
train_data = pd.read_csv(('data/train.csv'))
test_data = pd.read_csv(('data/test.csv'))

# Codificación de usuarios e ítems
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

train_users = torch.tensor(user_encoder.fit_transform(train_data['user']), dtype=torch.long)
train_items = torch.tensor(item_encoder.fit_transform(train_data['item']), dtype=torch.long)
train_ratings = torch.tensor(train_data['rating'].values, dtype=torch.long)  # Ahora es de tipo long para clasificación

# Mapeo de nuevos usuarios e ítems
user_mapping = {label: index for index, label in enumerate(user_encoder.classes_)}
item_mapping = {label: index for index, label in enumerate(item_encoder.classes_)}

def encode_with_mapping(values, mapping):
    return [mapping.get(val, len(mapping)) for val in values]  # Si no existe, asigna un nuevo índice

test_ids = torch.tensor(test_data['ID'].values, dtype=torch.long)
test_users = torch.tensor(encode_with_mapping(test_data['user'], user_mapping), dtype=torch.long)
test_items = torch.tensor(encode_with_mapping(test_data['item'], item_mapping), dtype=torch.long)

# Definir el modelo como una clase de red neuronal
class RecommenderNet(nn.Module):
    def __init__(self, n_users, n_items, embedding_dim):
        super(RecommenderNet, self).__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.item_embedding = nn.Embedding(n_items, embedding_dim)
        self.fc1 = nn.Linear(embedding_dim * 2, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)  # 10 neuronas para las 10 calificaciones
        self.softmax = nn.Softmax(dim=1)

    def forward(self, user, item):
        user_emb = self.user_embedding(user)
        item_emb = self.item_embedding(item)
        x = torch.cat([user_emb, item_emb], dim=1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)  # Aplicamos softmax para obtener probabilidades
        return x

# Parámetros del modelo
n_users = len(user_encoder.classes_) + 1  # Añadimos 1 para usuarios desconocidos
n_items = len(item_encoder.classes_) + 1  # Añadimos 1 para ítems desconocidos
embedding_dim = 50

# Instanciar el modelo
model = RecommenderNet(n_users, n_items, embedding_dim)

# Configurar el optimizador y la función de pérdida
criterion = nn.CrossEntropyLoss()  # Usamos CrossEntropyLoss para clasificación
optimizer = optim.AdamW(model.parameters(), lr=0.005)  # Usamos AdamW y una tasa de aprendizaje más alta

# Entrenamiento
batch_size = 1024  # Usamos un tamaño de lote mayor para mejorar la eficiencia
n_epochs = 30  # Aumentamos el número de épocas

train_dataset = list(zip(train_users, train_items, train_ratings))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)  # Ajustamos num_workers para optimizar la carga de datos

for epoch in range(n_epochs):
    epoch_loss = 0
    for users, items, ratings in train_loader:
        optimizer.zero_grad()
        predictions = model(users, items)
        loss = criterion(predictions, ratings - 1)  # Restamos 1 de las etiquetas para que estén en el rango 0-9
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f'Epoch {epoch+1}/{n_epochs}, Loss: {epoch_loss/len(train_loader):.4f}')


# Predicción
model.eval()  # Poner el modelo en modo evaluación
predictions = []
with torch.no_grad():
    for i in range(0, len(test_users), batch_size):
        batch_users = test_users[i:i+batch_size]
        batch_items = test_items[i:i+batch_size]
        batch_ids = test_ids[i:i+batch_size]

        outputs = model(batch_users, batch_items)
        predicted_ratings = torch.argmax(outputs, dim=1) + 1  # Obtenemos la calificación predicha (1-10)
        predictions.extend(list(zip(batch_ids.numpy(), predicted_ratings.numpy())))

predictions_df = pd.DataFrame(predictions, columns=['ID', 'rating'])
predictions_df.to_csv('submission.csv', index=False)

print('Predicciones guardadas en predictions.csv')


Epoch 1/30, Loss: 2.1777
Epoch 2/30, Loss: 2.1190
Epoch 3/30, Loss: 2.0582
Epoch 4/30, Loss: 2.0092
Epoch 5/30, Loss: 1.9761
Epoch 6/30, Loss: 1.9554
Epoch 7/30, Loss: 1.9405
Epoch 8/30, Loss: 1.9292
Epoch 9/30, Loss: 1.9200
Epoch 10/30, Loss: 1.9118
Epoch 11/30, Loss: 1.9042
Epoch 12/30, Loss: 1.8972
Epoch 13/30, Loss: 1.8900
Epoch 14/30, Loss: 1.8849
Epoch 15/30, Loss: 1.8796
Epoch 16/30, Loss: 1.8748
Epoch 17/30, Loss: 1.8700
Epoch 18/30, Loss: 1.8659
Epoch 19/30, Loss: 1.8626
Epoch 20/30, Loss: 1.8590
Epoch 21/30, Loss: 1.8555
Epoch 22/30, Loss: 1.8526
Epoch 23/30, Loss: 1.8501
Epoch 24/30, Loss: 1.8475
Epoch 25/30, Loss: 1.8451
Epoch 26/30, Loss: 1.8424
Epoch 27/30, Loss: 1.8395
Epoch 28/30, Loss: 1.8370
Epoch 29/30, Loss: 1.8337
Epoch 30/30, Loss: 1.8309
Predicciones guardadas en predictions.csv


In [9]:
predictions_df

Unnamed: 0,ID,rating
0,0,10
1,1,8
2,2,7
3,3,9
4,4,9
...,...,...
43315,43315,10
43316,43316,10
43317,43317,8
43318,43318,7
