In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from gensim.models import Word2Vec
import numpy as np
from pprint import pprint

In [3]:
# load the dataset
df = pd.read_csv('E:\Vocational\Lighthouse Labs\Flex Course\C08_Machine Learning Application\exercise_recommender_engines\data\year_2009_2010.csv')


In [12]:
df.head()

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,12/1/09 7:45,6.95,13085.0,United Kingdom
1,489434,79323P,PINK CHERRY LIGHTS,12,12/1/09 7:45,6.75,13085.0,United Kingdom
2,489434,79323W,WHITE CHERRY LIGHTS,12,12/1/09 7:45,6.75,13085.0,United Kingdom
3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,12/1/09 7:45,2.1,13085.0,United Kingdom
4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,12/1/09 7:45,1.25,13085.0,United Kingdom


In [13]:
# group items bought together by the same customer into sentences
customer_sentences = df.groupby('Customer ID')['StockCode'].apply(list).tolist()

In [5]:
# split the data into train and test sets (10% test)
train_sentences, test_sentences = train_test_split(customer_sentences, test_size=0.1, random_state=42)

In [7]:
# build the Word2Vec model using train sentences
model = Word2Vec(sentences=train_sentences, vector_size=100, window=5, min_count=1, workers=4)

In [8]:
# get the vector representation of a product
def get_product_vector(stock_code):
    return model.wv[stock_code]

In [9]:
# recommend similar products based on the customer's first purchase
def recommend_similar_products(customer_first_purchase, top_n=5):
    similar_products = {}
    for item in customer_first_purchase:
        if item in model.wv:
            similar_products[item] = model.wv.most_similar(item, topn=top_n)
    return similar_products

In [10]:
# evaluate the recommender system on the test set
test_results = {}
for customer_sentence in test_sentences:
    customer_first_purchase = customer_sentence[:1]
    test_results[str(customer_first_purchase)] = recommend_similar_products(customer_first_purchase, top_n=5)

In [18]:
# print recommendations for a specific customer (e.g., customer with id 11)
customer_id = 13085
customer_first_purchase = df[df['Customer ID'] == customer_id]['StockCode'].unique().tolist()[:1]
recommendations = recommend_similar_products(customer_first_purchase, top_n=5)
pprint(f"Recommendations for customer {customer_id} based on their first purchase {customer_first_purchase}:")
pprint(recommendations)

"Recommendations for customer 13085 based on their first purchase ['85048']:"
{'85048': [('21349', 0.9805365204811096),
           ('22592', 0.9668772220611572),
           ('22187', 0.9633179306983948),
           ('84352', 0.9586672186851501),
           ('22589', 0.9510698318481445)]}
