In [3]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
import joblib


In [4]:
df = pd.read_csv("electronics.csv")
df.head()


Unnamed: 0,item_id,user_id,rating,timestamp,model_attr,category,brand,year,user_attr,split
0,0,0,5.0,1999-06-13,Female,Portable Audio & Video,,1999,,0
1,0,1,5.0,1999-06-14,Female,Portable Audio & Video,,1999,,0
2,0,2,3.0,1999-06-17,Female,Portable Audio & Video,,1999,,0
3,0,3,1.0,1999-07-01,Female,Portable Audio & Video,,1999,,0
4,0,4,2.0,1999-07-06,Female,Portable Audio & Video,,1999,,0


In [5]:
reader = Reader(rating_scale=(1, 5))


In [6]:
data = Dataset.load_from_df(
    df[['user_id', 'item_id', 'rating']],
    reader
)


In [7]:
trainset, testset = train_test_split(
    data,
    test_size=0.2,
    random_state=42
)


In [8]:
model = SVD(
    n_factors=50,
    n_epochs=20,
    lr_all=0.005,
    reg_all=0.02
)


In [9]:
model.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1a51db97550>

In [10]:
predictions = model.test(testset)
rmse = accuracy.rmse(predictions)

print("RMSE:", rmse)


RMSE: 1.3042
RMSE: 1.304238819298353


In [11]:
joblib.dump(model, "svd_model.pkl")
print("âœ… Model saved successfully")


âœ… Model saved successfully


In [12]:
def recommend_products(model, user_id, df, top_n=5):

    all_items = df['item_id'].unique()
    rated_items = df[df['user_id'] == user_id]['item_id'].unique()

    items_to_predict = [
        item for item in all_items
        if item not in rated_items
    ]

    predictions = []

    for item in items_to_predict:
        pred = model.predict(user_id, item)
        predictions.append((item, pred.est))

    predictions.sort(key=lambda x: x[1], reverse=True)

    return predictions[:top_n]


In [13]:
user_id = 10

recommended = recommend_products(
    model,
    user_id,
    df,
    top_n=5
)

print("ðŸŽ¯ Recommended Products:")
for item_id, rating in recommended:
    print(f"Item: {item_id} | Predicted Rating: {round(rating,2)}")


ðŸŽ¯ Recommended Products:
Item: 3498 | Predicted Rating: 4.81
Item: 5171 | Predicted Rating: 4.8
Item: 4294 | Predicted Rating: 4.8
Item: 7780 | Predicted Rating: 4.79
Item: 5204 | Predicted Rating: 4.78


In [14]:
def recommend_products_with_details(model, user_id, df, top_n=5):

    all_items = df['item_id'].unique()
    rated_items = df[df['user_id'] == user_id]['item_id'].unique()

    items_to_predict = [
        item for item in all_items
        if item not in rated_items
    ]

    predictions = []

    for item in items_to_predict:
        pred_rating = model.predict(user_id, item).est

        item_info = df[df['item_id'] == item].iloc[0]

        predictions.append({
            "item_id": item,
            "category": item_info["category"],
            "brand": item_info["brand"],
            "predicted_rating": round(pred_rating, 2)
        })

    predictions = sorted(
        predictions,
        key=lambda x: x["predicted_rating"],
        reverse=True
    )

    return predictions[:top_n]


In [18]:
user_id = 10

recommendations = recommend_products_with_details(
    model,
    user_id,
    df,
    top_n=5
)

for rec in recommendations:
    print(
        f"Item: {rec['item_id']} | "
        f"Category: {rec['category']} | "
        f"Brand: {rec['brand']} | "
        f"Rating: {rec['predicted_rating']}"
    )


Item: 3498 | Category: Car Electronics & GPS | Brand: nan | Rating: 4.81
Item: 4294 | Category: Headphones | Brand: nan | Rating: 4.8
Item: 5171 | Category: Portable Audio & Video | Brand: Bose | Rating: 4.8
Item: 7780 | Category: Portable Audio & Video | Brand: nan | Rating: 4.79
Item: 5204 | Category: Accessories & Supplies | Brand: nan | Rating: 4.78
