-
Notifications
You must be signed in to change notification settings - Fork 73
/
ecommerce_product_recommendation.py
89 lines (56 loc) · 3.96 KB
/
ecommerce_product_recommendation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
df = pd.read_csv("ecommerce_data_electronics.csv")
print(df.head())
df['time_stamp'] = pd.to_datetime(df['time_stamp'])
NOW = df['time_stamp'].max()
rfmTable = df.groupby('card_name').agg({'time_stamp': lambda x: (NOW - x.max()).days, 'product_id': lambda x: len(x), 'price': lambda x: x.sum()})
rfmTable['time_stamp'] = rfmTable['time_stamp'].astype(int)
rfmTable.rename(columns={'time_stamp': 'recency',
'product_id': 'frequency',
'price': 'monetary_value'}, inplace=True)
print(rfmTable.head())
rfmTable['r_quartile'] = pd.qcut(rfmTable['recency'], q=4, labels=range(1,5), duplicates='raise')
rfmTable['f_quartile'] = pd.qcut(rfmTable['frequency'], q=4, labels=range(1,5), duplicates='drop')
rfmTable['m_quartile'] = pd.qcut(rfmTable['monetary_value'], q=4, labels=range(1,5), duplicates='drop')
rfm_data = rfmTable.reset_index()
print(rfm_data.head())
rfm_data['r_quartile'] = rfm_data['r_quartile'].astype(str)
rfm_data['f_quartile'] = rfm_data['f_quartile'].astype(str)
rfm_data['m_quartile'] = rfm_data['m_quartile'].astype(str)
rfm_data['RFM_score'] = rfm_data['r_quartile'] + rfm_data['f_quartile'] + rfm_data['m_quartile']
print(rfm_data.head())
rfm_data['customer_segment'] = 'Other'
rfm_data.loc[rfm_data['RFM_score'].isin(['334', '443', '444', '344', '434', '433', '343', '333']), 'customer_segment'] = 'Premium Customer' #nothing <= 2
rfm_data.loc[rfm_data['RFM_score'].isin(['244', '234', '232', '332', '143', '233', '243']), 'customer_segment'] = 'Repeat Customer' # f >= 3 & r or m >=3
rfm_data.loc[rfm_data['RFM_score'].isin(['424', '414', '144', '314', '324', '124', '224', '423', '413', '133', '323', '313', '134']), 'customer_segment'] = 'Top Spender' # m >= 3 & f or m >=3
rfm_data.loc[rfm_data['RFM_score'].isin([ '422', '223', '212', '122', '222', '132', '322', '312', '412', '123', '214']), 'customer_segment'] = 'At Risk Customer' # two or more <=2
rfm_data.loc[rfm_data['RFM_score'].isin(['411','111', '113', '114', '112', '211', '311']), 'customer_segment'] = 'Inactive Customer' # two or more =1
from collections import Counter
print(Counter(rfm_data['customer_segment']))
def generate_recommendations(target_customer, cohort, num_recommendations=5):
user_item_matrix = cohort.groupby('card_name')['product'].apply(lambda x: ', '.join(x)).reset_index()
user_item_matrix['product_descriptions'] = cohort.groupby('card_name')['product_description'].apply(lambda x: ', '.join(x)).reset_index()['product_description']
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(user_item_matrix['product_descriptions'])
similarity_matrix = cosine_similarity(tfidf_matrix)
target_customer_index = user_item_matrix[user_item_matrix['card_name'] == target_customer].index[0]
similar_customers = similarity_matrix[target_customer_index].argsort()[::-1][1:num_recommendations+1]
target_customer_purchases = set(user_item_matrix[user_item_matrix['card_name'] == target_customer]['product'].iloc[0].split(', '))
recommendations = []
for customer_index in similar_customers:
customer_purchases = set(user_item_matrix.iloc[customer_index]['product'].split(', '))
new_items = customer_purchases.difference(target_customer_purchases)
recommendations.extend(new_items)
return list(set(recommendations))[:num_recommendations]
rfm_data = rfm_data[rfm_data['customer_segment']== 'Premium Customer']
premium = list(set(rfm_data['card_name']))[:10]
df_premium = df[df['card_name'].isin(premium)]
print(df_premium.head())
recommendations = generate_recommendations("Ashley Perry", df_premium, num_recommendations=5)
print("Recommendations for Ashley Perry:")
print(recommendations)
recommendations = generate_recommendations("Clifford Stanley", df_premium, num_recommendations=5)
print("Recommendations for Clifford Stanley")
print(recommendations)