# CollabCraft

In [12]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cosine
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

### USER-USER COLLABORATIVE FILTERING

In [2]:
ratings_df = pd.read_csv("/kaggle/input/movielens-20m-dataset/rating.csv", usecols=["userId", "movieId", "rating"], nrows = 500)

ratings_df['userId'] = ratings_df['userId'].astype('category')
ratings_df['movieId'] = ratings_df['movieId'].astype('category')

user_item_matrix = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)

def user_user_collaborative_filtering(user_item_matrix, user_id, k=2):
    similarities = []
    user_ratings = user_item_matrix.loc[user_id]
    for index, row in user_item_matrix.iterrows():
        if index != user_id:
            similarity = 1 - cosine(user_ratings, row)
            similarities.append((index, similarity))
    similarities.sort(key=lambda x: x[1], reverse=True)

    top_k_similarities = similarities[:k]

    weighted_sum = np.zeros(user_item_matrix.shape[1])
    similarity_sum = 0
    for similar_user_id, similarity in top_k_similarities:
        weighted_sum += similarity * user_item_matrix.loc[similar_user_id]
        similarity_sum += similarity

    predicted_ratings = weighted_sum / similarity_sum
    return predicted_ratings

user_id = 1
predicted_ratings_user_user = user_user_collaborative_filtering(user_item_matrix, user_id)
print("Predicted ratings for user", user_id, ":", predicted_ratings_user_user)

Predicted ratings for user 1 : movieId
1        2.869406
2        0.000000
3        1.130594
6        0.000000
10       0.000000
           ...   
8507     0.000000
8636     0.000000
8690     0.000000
8961     0.000000
31696    0.000000
Name: 3, Length: 415, dtype: float64


### ITEM-ITEM COLLABORATIVE FILTERING

In [3]:
def item_item_collaborative_filtering(user_item_matrix, movie_id, k=2):
    similarities = []
    movie_ratings = user_item_matrix[movie_id]
    for column in user_item_matrix.columns:
        if column != movie_id:
            similarity = 1 - cosine(movie_ratings, user_item_matrix[column])
            similarities.append((column, similarity))
    similarities.sort(key=lambda x: x[1], reverse=True)

    top_k_similarities = similarities[:k]

    weighted_sum = np.zeros(user_item_matrix.shape[0])
    similarity_sum = 0
    for similar_movie_id, similarity in top_k_similarities:
        weighted_sum += similarity * user_item_matrix[similar_movie_id]
        similarity_sum += similarity

    predicted_ratings = weighted_sum / similarity_sum
    return predicted_ratings

movie_id = 1
predicted_ratings_item_item = item_item_collaborative_filtering(user_item_matrix, movie_id)
print("Predicted ratings for movie", movie_id, ":", predicted_ratings_item_item)

Predicted ratings for movie 1 : userId
1    0.0
2    0.0
3    3.5
4    0.0
5    0.0
Name: 173, dtype: float64


### Unit test for USER-USER COLLABORATIVE FILTERING

In [7]:
from numpy import testing as np_testing


def test_user_user_collaborative_filtering():

    data = {
        'userId': [1, 1, 2, 2, 3, 3],
        'movieId': [1, 2, 1, 3, 2, 4],
        'rating': [4, 3, 5, 2, 4, 1]
    }

    ratings_df = pd.DataFrame(data)
    ratings_df['userId'] = ratings_df['userId'].astype('category')
    ratings_df['movieId'] = ratings_df['movieId'].astype('category')
    user_item_matrix = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)


    user_id = 1
    expected_ratings = pd.Series([2.8, 1.76, 1.12])

    predicted_ratings = user_user_collaborative_filtering(user_item_matrix, user_id)

    np_testing.assert_allclose(predicted_ratings.iloc[:3], expected_ratings, atol=1e-1)


if __name__ == '__main__':
    test_user_user_collaborative_filtering()

### Unit test for ITEM-ITEM COLLABORATIVE FILTERING

In [11]:
def test_item_item_collaborative_filtering():
    data = {
        'userId': [1, 1, 2, 2, 3, 3],
        'movieId': [1, 2, 1, 3, 2, 4],
        'rating': [4, 3, 5, 2, 4, 1]
    }

    ratings_df = pd.DataFrame(data)
    ratings_df['userId'] = ratings_df['userId'].astype('category')
    ratings_df['movieId'] = ratings_df['movieId'].astype('category')
    user_item_matrix = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)

    movie_id = 1
    expected_ratings = pd.Series([0.97, 1.35, 1.29])

    predicted_ratings = item_item_collaborative_filtering(user_item_matrix, movie_id)

    np_testing.assert_allclose(predicted_ratings, expected_ratings, atol=1e-1)


if __name__ == '__main__':
    test_item_item_collaborative_filtering()