In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load Terra Store dataset 
terra_store_data = pd.read_csv('../data/merged_data.csv')




In [2]:
import pandas as pd
from surprise import Dataset, Reader
from surprise.model_selection import GridSearchCV
from surprise.prediction_algorithms import SVD, SVDpp, NMF, SlopeOne, CoClustering, KNNBasic, KNNWithZScore


merged_data = terra_store_data
# Create a Surprise dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(merged_data[['customer_id', 'product_id', 'ratings']], reader)

# Define the parameter grid for GridSearchCV
param_grid = {
    'n_factors': [50, 100],
    'n_epochs': [10, 20],
    
}


# Create a list of models to search
models = [
    ('SVD', SVD),
    ('SVDpp', SVDpp),
    ('NMF', NMF),
    ('SlopeOne', SlopeOne),
    ('CoClustering', CoClustering),
    ('KNNBasic', KNNBasic),
    ('KNNWithZScore', KNNWithZScore)
]

# Perform GridSearchCV to find the best model and its parameters
best_rmse = float('inf')
best_mae = float('inf')
best_model = None
best_model_name = None

for model_name, model_class in models:
    print(f"Grid searching for model: {model_name}")
    if model_name == 'SlopeOne' or model_name == 'CoClustering':
        grid_search = GridSearchCV(model_class, {}, measures=['rmse', 'mae'], cv=3)
    else:
        grid_search = GridSearchCV(model_class, param_grid, measures=['rmse', 'mae'], cv=3)
    grid_search.fit(data)
    
    # Get the best RMSE and MAE scores, and the corresponding model
    if grid_search.best_score['rmse'] < best_rmse:
        best_rmse = grid_search.best_score['rmse']
        best_model = grid_search.best_estimator['rmse']
        best_model_name = model_name
    
    if grid_search.best_score['mae'] < best_mae:
        best_mae = grid_search.best_score['mae']

    # Print the best parameters and corresponding scores for each algorithm
    print(f"Best Parameters for {model_name}: RMSE - {grid_search.best_params['rmse']}, MAE - {grid_search.best_params['mae']}")
    print(f"Best RMSE Score for {model_name}: {grid_search.best_score['rmse']}")
    print(f"Best MAE Score for {model_name}: {grid_search.best_score['mae']}")
    print("\n")

# Print the best model, its name, and scores
print(f"Best Model: {best_model_name}")
print(f"Best RMSE Score: {best_rmse}")
print(f"Best MAE Score: {best_mae}")



Grid searching for model: SVD
Best Parameters for SVD: RMSE - {'n_factors': 100, 'n_epochs': 20}, MAE - {'n_factors': 100, 'n_epochs': 20}
Best RMSE Score for SVD: 0.7516125304473898
Best MAE Score for SVD: 0.6119826410277189


Grid searching for model: SVDpp
Best Parameters for SVDpp: RMSE - {'n_factors': 50, 'n_epochs': 20}, MAE - {'n_factors': 50, 'n_epochs': 20}
Best RMSE Score for SVDpp: 0.7333822660286132
Best MAE Score for SVDpp: 0.6034908646726288


Grid searching for model: NMF
Best Parameters for NMF: RMSE - {'n_factors': 50, 'n_epochs': 20}, MAE - {'n_factors': 50, 'n_epochs': 20}
Best RMSE Score for NMF: 1.0020907511853638
Best MAE Score for NMF: 0.8744124894422067


Grid searching for model: SlopeOne
Best Parameters for SlopeOne: RMSE - {}, MAE - {}
Best RMSE Score for SlopeOne: 0.9703301805369682
Best MAE Score for SlopeOne: 0.7894846486325359


Grid searching for model: CoClustering
Best Parameters for CoClustering: RMSE - {}, MAE - {}
Best RMSE Score for CoClustering: 0