In [12]:
import pandas as pd
from surprise import Dataset, SVD, Reader
from surprise.model_selection import train_test_split
from surprise import accuracy

In [13]:
file_path = r'C:\Users\rynoc\Python_Analysis\ml-1m.inter'
df = pd.read_csv(file_path, sep='\t')

df.rename(columns={'user_id:token':'userID', 'item_id:token':'itemID', 'rating:float':'rating'}, inplace=True)

df_clean = df.drop('timestamp:float', axis=1) 

df_clean['userID'] = df_clean['userID'].astype(str)
df_clean['itemID'] = df_clean['itemID'].astype(str)

print(df_clean.head())
print(df_clean.info())

  userID itemID  rating
0      1   1193       5
1      1    661       3
2      1    914       3
3      1   3408       4
4      1   2355       5
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000209 entries, 0 to 1000208
Data columns (total 3 columns):
 #   Column  Non-Null Count    Dtype 
---  ------  --------------    ----- 
 0   userID  1000209 non-null  object
 1   itemID  1000209 non-null  object
 2   rating  1000209 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 22.9+ MB
None


In [14]:

# Initialize the Surprise Reader with the appropriate rating scale (1 to 5 for MovieLens)
reader = Reader(rating_scale=(1, 5))

# Load the DataFrame into Surprise Dataset
data = Dataset.load_from_df(df_clean, reader)

# Split the data into training and test sets
trainset, testset = train_test_split(data, test_size=0.2)

# Initialize the SVD model
model = SVD()

# Train the model on the training data
model.fit(trainset)

# Test the model on the test data
predictions = model.test(testset)

# Evaluate the model using RMSE
accuracy.rmse(predictions)

# Display sample predictions
for pred in predictions[:5]:
    print(f"User: {pred.uid}, Item: {pred.iid}, Actual: {pred.r_ui}, Predicted: {pred.est:.2f}")

RMSE: 0.8729
User: 5038, Item: 2704, Actual: 4.0, Predicted: 2.77
User: 1847, Item: 1094, Actual: 5.0, Predicted: 4.05
User: 135, Item: 3285, Actual: 1.0, Predicted: 2.77
User: 1635, Item: 3082, Actual: 4.0, Predicted: 3.08
User: 840, Item: 2748, Actual: 1.0, Predicted: 2.17


In [15]:
RMSE = accuracy.rmse(predictions)
MSE = accuracy.mse(predictions)
MAE = accuracy.mae(predictions)
fcp = accuracy.fcp(predictions)

RMSE: 0.8729
MSE: 0.7619
MAE:  0.6851
FCP:  0.7433


In [17]:
def precision_recall_at_k(predictions, k=10, threshold=4.0):
    """
    Calculate precision and recall at k.

    Parameters:
    - predictions: List of predictions from the model.
    - k: Number of top recommendations to consider for precision/recall.
    - threshold: Minimum rating to consider an item as relevant.

    Returns:
    - precision: Precision score.
    - recall: Recall score.
    """
    # Mapping predictions to users
    user_est_true = {}
    for uid, iid, true_r, est, _ in predictions:
        if uid not in user_est_true:
            user_est_true[uid] = []
        user_est_true[uid].append((est, true_r))

    precisions = []
    recalls = []

    for uid, user_ratings in user_est_true.items():
        # Sort by estimated rating and take top k
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        top_k = user_ratings[:k]

        # Calculate number of relevant items in top k
        relevant_and_recommended = sum((true_r >= threshold) for (_, true_r) in top_k)
        relevant_items = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        precisions.append(relevant_and_recommended / k if k > 0 else 0)
        recalls.append(relevant_and_recommended / relevant_items if relevant_items > 0 else 0)

    # Average precision and recall across all users
    precision = sum(precisions) / len(precisions)
    recall = sum(recalls) / len(recalls)

    return precision, recall

precision, recall = precision_recall_at_k(predictions, k=5, threshold=4.0)
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")


Precision: 0.7835
Recall: 0.4386
