<a href="https://colab.research.google.com/github/mehdihemmatyar/RecommendationSystem/blob/main/Snap_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install lightfm

In [2]:
import numpy as np
from lightfm import LightFM
from lightfm.data import Dataset

In [6]:
import pandas as pd

In [7]:
data = pd.read_csv("/content/drive/MyDrive/SnapUserProducts.csv")

In [5]:
# Create a Dataset object
dataset = Dataset()
dataset.fit((x[0] for x in data), (x[1] for x in data))

# Build the interaction matrix
(interactions, _) = dataset.build_interactions(((x[0], x[1]) for x in data))

# Create and train the model
model = LightFM(loss='warp')
model.fit(interactions, epochs=10)

<lightfm.lightfm.LightFM at 0x7fdcf2d4e8c0>

In [6]:
# Function to get recommendations for a customer ID
def get_recommendations(customer_id, num_recommendations=5):
    user_items = interactions.tocsr()
    scores = model.predict(customer_id, np.arange(interactions.shape[1]))
    top_items = np.argsort(-scores)[:num_recommendations]
    items = dataset.mapping()[2]

    # Return the recommended Product IDs
    return [items[item_id] for item_id in top_items]


In [None]:
# Test the recommender system
customer_id = 4130841
recommendations = get_recommendations(customer_id)

print(f"Recommended Product IDs for Customer ID {customer_id}:")
print(recommendations)

In [14]:
data.head()

Unnamed: 0,Customer ID,Product ID,rating
0,4130841,3626898,8
1,10010652,5875884,9
2,1963255,4250172,8
3,3483648,6853430,6
4,1274389,8908514,6


In [4]:
!pip install surprise

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3095450 sha256=220208a795b06d8617d81e48b3ad65a6fbdf7cce157c2bdadf7c7b114bdec301
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.

In [12]:
from surprise import Dataset
from surprise import SVD
from surprise import accuracy
from surprise.model_selection import train_test_split
import pandas as pd
from surprise import Reader


In [23]:
df = pd.DataFrame(data)

# Define the rating scale for Surprise
reader = Reader(rating_scale=(1, 10))

# Load the data into Surprise's Dataset format
dataset = Dataset.load_from_df(df[['Customer ID', 'Product ID', 'rating']], reader)

# Train the SVD model on the entire dataset
model = SVD()
trainset = dataset.build_full_trainset()
model.fit(trainset)

# Function to generate recommendations for a given customer ID
def generate_recommendations(customer_id):
    testset = trainset.build_anti_testset()
    testset = [data_tuple for data_tuple in testset if data_tuple[0] == customer_id]
    predictions = model.test(testset)
    recommendations = []
    for prediction in predictions:
        recommendations.append((prediction.iid, prediction.est))
    recommendations.sort(key=lambda x: x[1], reverse=True)
    return [product_id for product_id, _ in recommendations]


In [None]:
# Generate recommendations for each customer
all_recommendations = {}
for customer_id in df['Customer ID'].unique():
    recommendations = generate_recommendations(customer_id)
    all_recommendations[customer_id] = recommendations

# Print the recommendations for each customer
for customer_id, recommendations in all_recommendations.items():
    print(f"Recommended products for customer {customer_id}: {recommendations}")

In [12]:
df.head()

Unnamed: 0,customer_id,Products,rating
0,4130841,3626898,8
1,10010652,5875884,9
2,1963255,4250172,8
3,3483648,6853430,6
4,1274389,8908514,6


In [11]:
df.rename(columns={'Customer ID': 'customer_id'}, inplace=True)
df.rename(columns={'Product ID': 'Products'}, inplace=True)



In [20]:
import time
from collections import Counter
from multiprocessing import Pool

import numpy as np
import pandas as pd


def query(similar_customer) -> Counter:
    tmp_table = df.query(f'customer_id == {similar_customer.customer_id}')
    similar_customer_counter = Counter(tmp_table.loc[:, 'Products'].values)
    return similar_customer_counter


CSV_PATH = '/content/drive/MyDrive/SnapUserProducts.csv'
PREV_CARTS = 30
SIM_CARTS = 10
DF_SIM_SAMPLE = 4

target_customer_id = 4130841



#df = pd.read_csv(CSV_PATH, header=0)
t0 = time.time()

# prev carts
target_customer_table = df.query(f'customer_id == {target_customer_id}')
num_rows = len(target_customer_table)
weights = target_customer_table.loc[:, 'rating'].values
weights = weights / weights.sum()
size = min(PREV_CARTS, num_rows)
if size:
    prev_carts_indices = np.random.choice(range(weights.shape[0]), size=size, p=weights, replace=False)
    prev_carts = target_customer_table.iloc[prev_carts_indices].loc[:, 'Products'].values.tolist()
else:
    prev_carts = []

# similar carts
p = Pool(6)
similar_customer_counter = Counter()
for _, product_row in target_customer_table.iterrows():
    product_buyer_df = df.query(f'Products == {product_row.Products}')
    product_buyer_df = product_buyer_df.sample(n=min(DF_SIM_SAMPLE, len(product_buyer_df)))
    args_list = [
        similar_customer_row
        for _, similar_customer_row
        in product_buyer_df.iterrows()
    ]
    for result in p.imap_unordered(query, args_list):
        similar_customer_counter += result
sim_carts = similar_customer_counter.most_common(PREV_CARTS)
sim_carts = [k for k, _ in sim_carts]
print(time.time() - t0)

38.54516816139221


In [15]:
df.head()

Unnamed: 0,customer_id,Products,rating
0,4130841,3626898,8
1,10010652,5875884,9
2,1963255,4250172,8
3,3483648,6853430,6
4,1274389,8908514,6


In [21]:
sim_carts

[991375,
 1286589,
 3630089,
 3520616,
 4033068,
 3079168,
 991365,
 3520588,
 3630088,
 4033024,
 991332,
 6820334,
 991364,
 6320777,
 3999089,
 5965207,
 3078970,
 1286577,
 5966508,
 6320770,
 4268845,
 6320769,
 9216505,
 3079159,
 7083487,
 3520589,
 991353,
 5965203,
 3905971,
 6537204]