In [None]:
import numpy as np
import pandas as pd
from surprise import Dataset, SVD, SVDpp, NMF
from surprise.model_selection import cross_validate, GridSearchCV

# Load the MovieLens 100k dataset
data = Dataset.load_builtin('ml-100k')

# Check the dataset format
df = pd.DataFrame(data.raw_ratings, columns=['user', 'item', 'rating', 'timestamp'])
print(df.head())

# Initialize the SVD algorithm
svd = SVD()

# Evaluate SVD performance using cross-validation
svd_cv_results = cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Define parameter grid for SVD
param_grid = {
    'n_factors': [50, 100, 150],
    'n_epochs': [20, 30],
    'lr_all': [0.002, 0.005],
    'reg_all': [0.02, 0.1]
}

# Perform GridSearchCV to find the best parameters
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

# Display the best RMSE score
print("Best RMSE score:", gs.best_score['rmse'])

# Display the best parameters
print("Best parameters:", gs.best_params['rmse'])

# Evaluate SVD++ algorithm
svdpp = SVDpp()
svdpp_results = cross_validate(svdpp, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Evaluate NMF algorithm
nmf = NMF()
nmf_results = cross_validate(nmf, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Example: Custom collaborative filtering function
def collaborative_filtering(X, y, learning_rate=0.01, iterations=1000):
    # Initialization of parameters
    m, n = X.shape
    theta = np.random.randn(n)
    
    for i in range(iterations):
        # Hypothesis
        prediction = np.dot(X, theta)
        # Loss function
        cost = (1 / (2 * m)) * np.sum((prediction - y) ** 2)
        # Gradient descent
        gradient = (1 / m) * np.dot(X.T, (prediction - y))
        theta = theta - learning_rate * gradient
        
        if i % 100 == 0:
            print(f"Iteration {i}: Cost = {cost}")
    
    return theta

  user item  rating  timestamp
0  196  242     3.0  881250949
1  186  302     3.0  891717742
2   22  377     1.0  878887116
3  244   51     2.0  880606923
4  166  346     1.0  886397596
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9351  0.9339  0.9359  0.9344  0.9378  0.9354  0.0014  
MAE (testset)     0.7392  0.7363  0.7365  0.7356  0.7390  0.7373  0.0015  
Fit time          0.79    0.80    0.80    0.80    0.79    0.80    0.01    
Test time         0.11    0.16    0.11    0.16    0.11    0.13    0.02    
Best RMSE score: 0.9341966771187171
Best parameters: {'n_factors': 150, 'n_epochs': 30, 'lr_all': 0.005, 'reg_all': 0.1}
Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9189  0.9313  0.9152  0.9158  0.9166  0.9196  0.0060  
MAE (testset)     0.7218  0.7287  0.7178  0.7180  0.7176  