In [8]:

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

def load_and_filter_data(ratings_path, min_ratings=3):
    """Loads rating data, filters users and movies based on minimum ratings, and returns the filtered DataFrame."""
    ratings = pd.read_csv(ratings_path)
    
    user_counts = ratings['userId'].value_counts()
    valid_users = user_counts[user_counts >= min_ratings].index
    item_counts = ratings['movieId'].value_counts()
    valid_items = item_counts[item_counts >= min_ratings].index

    filtered_ratings = ratings[
        ratings['userId'].isin(valid_users) & ratings['movieId'].isin(valid_items)
    ]
    return filtered_ratings

def create_user_item_matrix(ratings_df):
    """Creates a user-item matrix from the ratings DataFrame."""
    return ratings_df.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)

def compute_similarity(matrix, kind='user'):
    """Computes cosine similarity between users or items based on the user-item matrix."""
    if kind == 'user':
        similarity_matrix = cosine_similarity(matrix)
        return pd.DataFrame(similarity_matrix, index=matrix.index, columns=matrix.index)
    elif kind == 'item':
        similarity_matrix = cosine_similarity(matrix.T)
        return pd.DataFrame(similarity_matrix, index=matrix.columns, columns=matrix.columns)
    else:
        raise ValueError("Invalid kind. Choose 'user' or 'item'.")

def main():
    """Main function to load, filter, and compute user/item similarity, then display the first few rows."""
    ratings_path = 'ratings.csv'
    filtered_ratings = load_and_filter_data(ratings_path, min_ratings=3)
    user_item_matrix = create_user_item_matrix(filtered_ratings)

    user_similarity = compute_similarity(user_item_matrix, kind='user')
    item_similarity = compute_similarity(user_item_matrix, kind='item')

    print("\nUser-User Cosine Similarity Matrix (first 5 rows):\n", user_similarity.round(2).head())
    print("\nItem-Item Cosine Similarity Matrix (first 5 rows):\n", item_similarity.round(2).head())

if __name__ == "__main__":
    main()


FileNotFoundError: [Errno 2] No such file or directory: 'ratings.csv'