In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('https://files.grouplens.org/datasets/movielens/ml-100k/u.data', sep='\t', names=column_names)
df = df.drop('timestamp', axis=1)

# Split into train and test sets (80% train, 20% test)
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


In [7]:
import os  # Ensure this is at the very top
import torch
from torch import nn, optim
import pandas as pd

# Define the Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, num_items):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(num_items, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, num_items),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

def autoencoder_model(train_data, test_data, output_dir):
    num_items = train_data['item_id'].nunique()
    user_item_matrix = train_data.pivot(index='user_id', columns='item_id', values='rating').fillna(0).values
    model = Autoencoder(num_items)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    # Training
    for epoch in range(50):  # Adjust epochs as needed
        inputs = torch.FloatTensor(user_item_matrix)
        outputs = model(inputs)
        loss = criterion(outputs, inputs)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Prediction
    predictions = []
    for _, row in test_data.iterrows():
        user, item = row['user_id'], row['item_id']
        if user - 1 < len(user_item_matrix) and item - 1 < num_items:
            user_ratings = model(torch.FloatTensor(user_item_matrix[user - 1:user])).detach().numpy()
            predictions.append((user, item, user_ratings[0][item - 1]))
        else:
            # Handle cases where user or item is out of bounds
            predictions.append((user, item, None))  # or some default value

    # Save predictions to Google Drive
    pd.DataFrame(predictions, columns=['user_id', 'item_id', 'predicted_rating']).to_csv(
        output_dir + 'autoencoder_predictions.csv', index=False
    )

# Define output directory and run the function
output_dir = '/content/drive/MyDrive/submission/'
os.makedirs(output_dir, exist_ok=True)
autoencoder_model(train_df, test_df, output_dir)
