In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('https://files.grouplens.org/datasets/movielens/ml-100k/u.data', sep='\t', names=column_names)
df = df.drop('timestamp', axis=1)

# Split into train and test sets (80% train, 20% test)
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


In [5]:
import os  # Add this line to import the os library
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd

def collaborative_filtering(train_data, test_data, output_dir):
    # Create user-item matrix
    user_item_matrix = train_data.pivot(index='user_id', columns='item_id', values='rating').fillna(0)
    user_similarity = cosine_similarity(user_item_matrix)
    np.fill_diagonal(user_similarity, 0)

    user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

    # Predict ratings
    predictions = []
    for _, row in test_data.iterrows():
        user, item = row['user_id'], row['item_id']
        if item in user_item_matrix.columns:
            sim_scores = user_similarity_df.loc[user]
            item_ratings = user_item_matrix[item]
            weighted_sum = sum(sim_scores * item_ratings) / sum(sim_scores[item_ratings > 0])
            predictions.append((user, item, weighted_sum))

    # Save to CSV in Google Drive
    pd.DataFrame(predictions, columns=['user_id', 'item_id', 'predicted_rating']).to_csv(
        output_dir + 'collaborative_filtering_predictions.csv', index=False
    )

output_dir = '/content/drive/MyDrive/submission/'
os.makedirs(output_dir, exist_ok=True)  # Create the directory if it doesn’t exist
collaborative_filtering(train_df, test_df, output_dir)
