<a href="https://colab.research.google.com/github/neeti098/SharpCareer/blob/main/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install surprise
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate
from sklearn.metrics import precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error
import pickle

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m623.0 kB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357263 sha256=cb67bdace4408152b089ec14f3ce89d550460e9a4229ce3bcaa77f6fe7f82bec
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully in

In [None]:
# Load the data
df = pd.read_csv('Purchase Data.csv')
df.head()
df['Review Rating'] = (df['Review Rating'] - 2.5) / 2.5
df['Purchase Amount (USD)'] = (df['Purchase Amount (USD)'] - 20) / 80
df.head()

Unnamed: 0,Customer ID,Age,Gender,Item Purchased,Category,Purchase Amount (USD),Location,Size,Color,Season,Review Rating,Subscription Status,Payment Method,Shipping Type,Discount Applied,Promo Code Used,Previous Purchases,Preferred Payment Method,Frequency of Purchases
0,1,55,Male,Blouse,Clothing,0.4125,Kentucky,L,Gray,Winter,0.24,Yes,Credit Card,Express,Yes,Yes,14,Venmo,Fortnightly
1,2,19,Male,Sweater,Clothing,0.55,Maine,L,Maroon,Winter,0.24,Yes,Bank Transfer,Express,Yes,Yes,2,Cash,Fortnightly
2,3,50,Male,Jeans,Clothing,0.6625,Massachusetts,S,Maroon,Spring,0.24,Yes,Cash,Free Shipping,Yes,Yes,23,Credit Card,Weekly
3,4,21,Male,Sandals,Footwear,0.875,Rhode Island,M,Maroon,Spring,0.4,Yes,PayPal,Next Day Air,Yes,Yes,49,PayPal,Weekly
4,5,45,Male,Blouse,Clothing,0.3625,Oregon,M,Turquoise,Spring,0.08,Yes,Cash,Free Shipping,Yes,Yes,31,PayPal,Annually


In [None]:
# Model Preparation
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['Customer ID', 'Item Purchased', 'Purchase Amount (USD)']], reader)

trainset, testset = train_test_split(data, test_size=0.25, random_state=42)

algo = SVD()

algo.fit(trainset)

with open('recommendation_model.pkl', 'wb') as f:
    pickle.dump(algo, f)


In [None]:
product_features = df[['Category', 'Size', 'Color', 'Season', 'Review Rating']].copy()
product_features = pd.get_dummies(product_features)

cosine_sim = cosine_similarity(product_features)

def get_content_based_recommendations(item_purchased, cosine_sim=cosine_sim):
    idx = df[df['Item Purchased'] == item_purchased].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    item_indices = [i[0] for i in sim_scores]
    return df['Item Purchased'].iloc[item_indices].tolist()

# Trial values
print(get_content_based_recommendations('Blouse'))

['Sweater', 'T-shirt', 'Jeans', 'Blouse', 'Dress', 'Blouse', 'Blouse', 'Pants', 'Hoodie', 'Shirt']


In [None]:
# Hybrid recommendation function
def hybrid_recommendations(customer_id, item_purchased, algo, cosine_sim, weight_cf=0.25, weight_cb=0.75):
    # Collaborative filtering prediction
    cf_preds = algo.predict(customer_id, item_purchased).est

    # Content-based recommendation
    cb_preds = get_content_based_recommendations(item_purchased, cosine_sim)
    cb_preds_mean = df[df['Item Purchased'].isin(cb_preds)]['Purchase Amount (USD)'].mean()

    # Combine predictions with weighting
    hybrid_preds = weight_cf * cf_preds + weight_cb * cb_preds_mean
    return cf_preds, cb_preds_mean, hybrid_preds

# Trial values
print(hybrid_recommendations(1, 'Blouse', algo, cosine_sim))

(1, 0.5039291369754554, 0.6279468527315916)


In [None]:
def get_top_n_recommendations(user_id, algo, cosine_sim, top_n=10):
    hybrid_ratings = {}

    for item in df['Item Purchased'].unique():
        cf_pred, cb_pred, hybrid_pred = hybrid_recommendations(user_id, item, algo, cosine_sim)
        hybrid_ratings[item] = hybrid_pred

    sorted_items = sorted(hybrid_ratings.items(), key=lambda x: x[1], reverse=True)

    top_n_items = [item[0] for item in sorted_items[:top_n]]

    return top_n_items

# Trial Values
print(get_top_n_recommendations(2954, algo, cosine_sim))

['Shorts', 'Dress', 'T-shirt', 'Sneakers', 'Blouse', 'Socks', 'Hoodie', 'Shirt', 'Shoes', 'Sweater']


In [None]:
# Evaluation
y_true = []
y_cf_pred = []
y_cb_pred = []
y_hybrid_pred = []

for customer_id, item_purchased, true_rating in testset:
    y_true.append(true_rating)

    cf_pred, cb_pred, hybrid_pred = hybrid_recommendations(customer_id, item_purchased, algo, cosine_sim)

    y_cf_pred.append(cf_pred)
    y_cb_pred.append(cb_pred)
    y_hybrid_pred.append(hybrid_pred)

rmse_cf = np.sqrt(mean_squared_error(y_true, y_cf_pred))
rmse_cb = np.sqrt(mean_squared_error(y_true, y_cb_pred))
rmse_hybrid = np.sqrt(mean_squared_error(y_true, y_hybrid_pred))

mae_cf = mean_absolute_error(y_true, y_cf_pred)
mae_cb = mean_absolute_error(y_true, y_cb_pred)
mae_hybrid = mean_absolute_error(y_true, y_hybrid_pred)

print(f'CF RMSE: {rmse_cf}')
print(f'CB RMSE: {rmse_cb}')
print(f'Hybrid RMSE: {rmse_hybrid}\n')

print(f'CF MAE: {mae_cf}')
print(f'CB MAE: {mae_cb}')
print(f'Hybrid MAE: {mae_hybrid}')


CF RMSE: 0.5853390426682598
CB RMSE: 0.2922991717789064
Hybrid RMSE: 0.3203028494115089

CF MAE: 0.507051282051282
CB MAE: 0.254501358114788
Hybrid MAE: 0.2711250522532403
