In [None]:
#INSTALLING FROM CONDA
!conda install -c conda-forge scikit-surprise

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from surprise import Dataset, Reader, SVD, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy

In [None]:
#LOADING DATASET

def load_data():
    print("DOWNLOAD DATA..")
    url = 'https://raw.githubusercontent.com/ashishpatel26/MovieLens-Small-Dataset/master/ratings.csv'
    try:
        df = pd.read_csv(url)
        print("DATA LOADED FROM URL.")
        print(df.head())
        return df
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return None

In [None]:
# PREPROCESSING


def preprocess_data(df):
    print(f"Initial Data Shape: {df.shape}")
    df = df.dropna()
    print(f"Data After Dropping Nulls: {df.shape}")
    
    print(f"Total Unique Users: {df['userId'].nunique()}")
    print(f"Total Unique Movies: {df['movieId'].nunique()}")
    
    return df

In [None]:
# BUILDING COLABORATIVE FILTERING MODEL 


def build_model(df, model_type='SVD'):
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)
    trainset, testset = train_test_split(data, test_size=0.2)

    print(f"Training {model_type} Model...")
    if model_type == 'SVD':
        model = SVD()
    else:
        sim_options = {'name': 'cosine', 'user_based': True}
        model = KNNBasic(sim_options=sim_options)

    model.fit(trainset)
    predictions = model.test(testset)

    print(f"{model_type} RMSE: {accuracy.rmse(predictions)}")
    return model, predictions

In [None]:
#VIZUALIZATION

def visualize_results(predictions, model_type):
    actual_ratings = [pred.r_ui for pred in predictions]
    predicted_ratings = [pred.est for pred in predictions]
    
    plt.figure(figsize=(8, 6))
    sns.scatterplot(x=actual_ratings, y=predicted_ratings, alpha=0.6)
    plt.xlabel('Actual Ratings')
    plt.ylabel('Predicted Ratings')
    plt.title(f'{model_type} Model: Actual vs Predicted Ratings')
    plt.grid(True)
    plt.show()
    

In [None]:
#MAIN FUNCTION

def main():
    df = load_data()
    if df is None:
        return

    df = preprocess_data(df)
    
    print("\nBuilding and Evaluating SVD Model...")
    svd_model, svd_predictions = build_model(df, 'SVD')
    visualize_results(svd_predictions, 'SVD')
    
    print("\nBuilding and Evaluating KNN Model...")
    knn_model, knn_predictions = build_model(df, 'KNN')
    visualize_results(knn_predictions, 'KNN')

if __name__ == '__main__':
    main()
