* InvoiceID : İşlemin benzersiz kimliği. Bir işlemde, müşteri aynı tarihte farklı SKU'lara sahip birkaç farklı ürün satın alabilir. Her bir ürün satışı ayrı bir kayıt olarak tutulur, ancak bu kayıtların hepsi aynı InvoiceID'yi paylaşır. Birlikte satılan ürünleri tespit etmek için de kullanılabilir.
* Date : Tarih ve saat formatında işlemin veya satışın tarihi belirtilir.
* ProductID : Satılan her bir öğe/ürün için benzersiz kimlik.
* TotalSales : Satış işleminden elde edilen gelir. Bir ürünün birim fiyatını hesaplamak için kolonlarda $\frac{TotalSales}{Quantity}$ işlemi yapılabilir.
* Discount : Yapılan işlemde uygulanan indirim miktarı. İndirim sonrası fiyat için $TotalSales - \frac{TotalSales\times Quantity}{100}$ işlemi yapılabilir.
* CustomerID : Her müşteri için benzersiz müşteri kimliği. Verisetinde müşteri, bir şirket şubesi veya bir bayi olabilir.
* Quantity : İşlemde satılan ürünlerin adedini belirtir.

<a id="headers"></a>
1. [Popularity-based Recommender System](#section-one)
2. [Content-based Recommender System](#section-two)
3. [Collaborative Filtering Recommender System - 1](#section-three)
4. [Collaborative Filtering Recommender System - 2](#section-four)
5. [Hybrid Recommender System](#section-five)

<a id="section-one"></a>
# [Popularity-based Recommender System](#headers)

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('/kaggle/input/retail-data-set/file_out2.csv')

In [None]:
df.drop("Unnamed: 0", axis = 1, inplace = True)

In [None]:
pd.DataFrame([{'products': len(df['ProductID'].value_counts()),
               'transactions': len(df['InvoiceID'].value_counts()),
               'customers': len(df['CustomerID'].value_counts()),
              }], columns = ['products', 'transactions', 'customers'], index = ['quantity'])

In [None]:
product_counts = df.groupby(['CustomerID', 'ProductID'])['Quantity'].sum().reset_index()
product_counts.rename(columns={'Quantity': 'Ratings'}, inplace=True)
ratings_df = product_counts[['CustomerID', 'ProductID', 'Ratings']]

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range =(1,5)) 
ratings_df['NormalizedRatings'] = scaler.fit_transform(ratings_df[['Ratings']])

In [None]:
user_ratings = ratings_df.pivot(index="ProductID", columns="CustomerID", values="NormalizedRatings")
correlation_matrix = user_ratings.corr()

In [None]:
means_similars = {} 
for i in range(len(ratings_df['CustomerID'].unique())): 
    correlation_matrix[i] = correlation_matrix[i].drop(i)
    means_similars[i] = correlation_matrix[i].mean() 

In [None]:
new_matrix = []
i = 0
for row in correlation_matrix:
    result = [mns for mns in correlation_matrix[row] if mns > means_similars[i]]
    new_matrix.append(result)
    i += 1
    if i == 507:
        break

In [None]:
correlation_matrix =pd.DataFrame(new_matrix)                                  

In [None]:
correlation_matrix = correlation_matrix.applymap(lambda x: 0 if pd.isna(x) or x < 0 else x)

In [None]:
sorted_ratings = ratings_df.sort_values(by='NormalizedRatings', ascending=False)

In [None]:
cleaned_values = [value for value in means_similars.values() if not np.isnan(value)]
means_similars_corrs = np.mean(cleaned_values)

In [None]:
targets_dict = {}
similar_customer_ids = list(set(list(sorted_ratings['CustomerID'])))

for i in set(list(df['ProductID'])):
    target_product_id = i
    target_product_ratings = user_ratings.loc[target_product_id, similar_customer_ids]    
    targets_dict[i] = np.sum(target_product_ratings)

In [None]:
df['ProductRating'] = None
df = df.set_index('ProductID')
df['ProductRating'] = df.index.map(targets_dict)
df['ProductRating'] = scaler.fit_transform(df[['ProductRating']])

In [None]:
def custom_round(x):
    return round(x * 2) / 2
df['ProductRating'] = df['ProductRating'].apply(custom_round)

In [None]:
Data = df.drop(columns = ['InvoiceID' , 'Date', 'TotalSales','Discount','Quantity'], axis = 1)

In [None]:
Data.head()

In [None]:
Data = Data.reset_index()

In [None]:
Data['ProductRating'].min(), Data['ProductRating'].max()

In [None]:
Data.groupby('CustomerID')['ProductRating'].mean().sort_values(ascending=False).head(10)  

In [None]:
with sns.axes_style('white'):
    g = sns.catplot(x="ProductRating", data=Data, aspect=2.0, kind='count')
    g.set_ylabels("Total number of ratings")

plt.show()

In [None]:
print("Toplam müşteri derecelendirmeleri :",Data.shape[0])
print("Toplam müşteri sayısı:", len(np.unique(Data.CustomerID)))
print("Toplam ürün sayısı  :", len(np.unique(Data.ProductID)))

In [None]:
Data.groupby('ProductID')['ProductRating'].mean().sort_values(ascending=False) 

In [None]:
grouped_data = Data.groupby(['CustomerID', 'ProductID'])['ProductRating'].mean().reset_index()
final_ratings_matrix = grouped_data.pivot(index='CustomerID', columns='ProductID', values='ProductRating').fillna(0)
final_ratings_matrix_T = final_ratings_matrix.transpose()

In [None]:
Data_new_grouped = Data.groupby('ProductID').agg({'CustomerID': 'count'}).reset_index()
Data_new_grouped.rename(columns = {'CustomerID': 'score'},inplace=True)
Data_new_grouped.head()

In [None]:
train_data_sort = Data.sort_values(['ProductRating', 'ProductID'], ascending = [0,1]) 
train_data_sort['Rank'] = train_data_sort['ProductRating'].rank(ascending=0, method='first') 
popularity_recommendations = train_data_sort.head(5) # En popüler ürünler

In [None]:
def recommend(user_id):     
    user_recommendations = popularity_recommendations 
    user_recommendations['userId'] = user_id 
    cols = user_recommendations.columns.tolist() 
    cols = cols[-1:] + cols[:-1] 
    user_recommendations = user_recommendations[cols] 

    return user_recommendations 

In [None]:
popular_products = pd.DataFrame(Data.groupby('ProductID')['ProductRating'].count())
most_popular = popular_products.sort_values('ProductRating', ascending=False)
most_popular.head(25).plot(kind = "bar")

<a id="section-two"></a>
# [Content-based - Recommender System](#headers)

In [None]:
import surprise
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import train_test_split
from collections import defaultdict
from surprise.model_selection import GridSearchCV
from scipy.sparse.linalg import svds

In [None]:
df = pd.read_csv('/kaggle/input/retail-data-set/file_out2.csv')

In [None]:
df.drop("Unnamed: 0", axis = 1, inplace = True)

In [None]:
product_counts = df.groupby(['CustomerID', 'ProductID'])['Quantity'].sum().reset_index()
product_counts.rename(columns={'Quantity': 'Ratings'}, inplace=True)

ratings_df = product_counts[['CustomerID', 'ProductID', 'Ratings']]


In [None]:
scaler = MinMaxScaler(feature_range =(1,5))
ratings_df['NormalizedRatings'] = scaler.fit_transform(ratings_df[['Ratings']])

In [None]:
user_ratings = ratings_df.pivot(index="ProductID", columns="CustomerID", values="NormalizedRatings")

correlation_matrix = user_ratings.corr()

In [None]:
means_similars = {} 
for i in range(len(ratings_df['CustomerID'].unique())): 
    correlation_matrix[i] = correlation_matrix[i].drop(i)
    means_similars[i] = correlation_matrix[i].mean() 

In [None]:
new_matrix = []
i = 0
for row in correlation_matrix:
    result = [mns for mns in correlation_matrix[row] if mns > means_similars[i]]
    new_matrix.append(result)
    i += 1
    if i == 507:
        break

In [None]:
correlation_matrix =pd.DataFrame(new_matrix)

In [None]:
correlation_matrix = correlation_matrix.applymap(lambda x: 0 if pd.isna(x) or x < 0 else x)

In [None]:
sorted_ratings = ratings_df.sort_values(by='NormalizedRatings', ascending=False)

In [None]:
cleaned_values = [value for value in means_similars.values() if not np.isnan(value)]
means_similars_corrs = np.mean(cleaned_values)                                       

In [None]:
targets_dict = {}

similar_customer_ids = list(set(list(sorted_ratings['CustomerID'])))

for i in set(list(df['ProductID'])):
    target_product_id = i
    target_product_ratings = user_ratings.loc[target_product_id, similar_customer_ids]    
    targets_dict[i] = np.sum(target_product_ratings)

In [None]:
df['ProductRating'] = None

In [None]:
df = df.set_index('ProductID')

In [None]:
df['ProductRating'] = df.index.map(targets_dict)

In [None]:
df['ProductRating'] = scaler.fit_transform(df[['ProductRating']])

In [None]:
def custom_round(x):
    return round(x * 2) / 2
df['ProductRating'] = df['ProductRating'].apply(custom_round)

In [None]:
Data = df.drop(columns = ['InvoiceID' , 'Date', 'TotalSales','Discount','Quantity'], axis = 1)

In [None]:
Data = Data.reset_index()

In [None]:
reader = Reader(rating_scale=(1, 5))
data1 = Dataset.load_from_df(Data,reader)

In [None]:
trainset, testset = train_test_split(data1, test_size=0.3,random_state=10)

In [None]:
algo = KNNWithMeans(k=50, sim_options={'name': 'cosine', 'user_based': True})
algo.fit(trainset)

In [None]:
test_pred = algo.test(testset)

In [None]:
def get_top_n(predictions, n=5):

    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [None]:
top_n = get_top_n(test_pred, n=5)

In [None]:
break_i = 0
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])
    break_i +=1
    if break_i == 5:
        break

In [None]:
pred = pd.DataFrame(test_pred)
pred[pred['uid'] == 576][['iid', 'r_ui','est']].sort_values(by = 'est',ascending = False).head(10)

In [None]:
data = data1
sim_options = {
    "name": ["msd", "cosine"],
    "min_support": [3, 4, 5],
    "user_based": [False, True],
}

param_grid = {"sim_options": sim_options}

gs = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse", "mae"], cv=3)
gs.fit(data)

print(gs.best_score["rmse"])
print(gs.best_score["mae"])
print(gs.best_params["rmse"])

In [None]:
user_arr = []
for i in range(len(test_pred)):
    if test_pred[i][0] not in user_arr:
        user_arr.append(test_pred[i][0])

In [None]:
FP = 0; TP = 0; FN =0; TN = 0

for i in test_pred:
    if i[3] != i[2]:
        FP += 1
    if i[3] == i[2]:
        TP += 1
for i in test_pred:
    if i[2] != i[3]:
        FN += 1
    if i[2] == i[3]:
        TN += 1

def calculate_metrics(TP, FP, FN, TN):
    accuracy = (TP + TN) / (TP + FP + FN + TN)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    fallout = FP / (FP + TN)
    miss_rate = FN / (TP + FN)
    f1_score = 2 * (precision * recall) / (precision + recall)
    
    return accuracy, precision, recall, fallout, miss_rate, f1_score


accuracy, precision, recall, fallout, miss_rate, f1_score = calculate_metrics(TP, FP, FN, TN)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Fallout:", fallout)
print("Miss Rate:", miss_rate)
print("F1 Score:", f1_score)

<a id="section-three"></a>
# [Collaborative Filtering Recommender System -1 ](#headers)

In [None]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
from sklearn.model_selection import train_test_split
from scipy.sparse.linalg import svds
from scipy.sparse import csr_matrix

In [None]:
df = pd.read_csv('/kaggle/input/retail-data-set/file_out2.csv')

pd.DataFrame([{'products': len(df['ProductID'].value_counts()),
               'transactions': len(df['InvoiceID'].value_counts()),
               'customers': len(df['CustomerID'].value_counts()),
              }], columns = ['products', 'transactions', 'customers'], index = ['quantity'])


product_counts = df.groupby(['CustomerID', 'ProductID'])['Quantity'].sum().reset_index()
product_counts.rename(columns={'Quantity': 'Ratings'}, inplace=True)


ratings_df = product_counts[['CustomerID', 'ProductID', 'Ratings']]

In [None]:
scaler = MinMaxScaler(feature_range =(1,5))
ratings_df['NormalizedRatings'] = scaler.fit_transform(ratings_df[['Ratings']])

user_ratings = ratings_df.pivot(index="ProductID", columns="CustomerID", values="NormalizedRatings")

correlation_matrix = user_ratings.corr()

means_similars = {} 

for i in range(len(ratings_df['CustomerID'].unique())): 
    correlation_matrix[i] = correlation_matrix[i].drop(i)
    means_similars[i] = correlation_matrix[i].mean() 
    
new_matrix = []
i = 0
for row in correlation_matrix:
    result = [mns for mns in correlation_matrix[row] if mns > means_similars[i]] 
    new_matrix.append(result)
    i += 1 
    if i == 507:
        break

correlation_matrix =pd.DataFrame(new_matrix)

correlation_matrix = correlation_matrix.applymap(lambda x: 0 if pd.isna(x) or x < 0 else x)
sorted_ratings = ratings_df.sort_values(by='NormalizedRatings', ascending=False)
cleaned_values = [value for value in means_similars.values() if not np.isnan(value)]
means_similars_corrs = np.mean(cleaned_values)                                       

In [None]:
targets_dict = {}
similar_customer_ids = list(sorted_ratings['CustomerID']) 

for i in set(list(df['ProductID'])):
    target_product_id = i
    target_product_ratings = user_ratings.loc[target_product_id, similar_customer_ids]
    targets_dict[i] = np.sum(target_product_ratings)

df['ProductRating'] = None
df = df.set_index('ProductID')
df['ProductRating'] = df.index.map(targets_dict)
df['ProductRating'] = scaler.fit_transform(df[['ProductRating']])

def custom_round(x):
    return round(x * 2) / 2

df['ProductRating'] = df['ProductRating'].apply(custom_round)
df.head()


In [None]:
Data = df.drop(columns = ['Unnamed: 0','InvoiceID' , 'Date', 'TotalSales','Discount','Quantity'], axis = 1)

In [None]:
Data = Data.reset_index()

In [None]:
trainset, testset = train_test_split(Data, test_size=0.3, random_state=10)

In [None]:
trainset = trainset.drop_duplicates(subset=["ProductID", "CustomerID"])

In [None]:
pivot_df = trainset.pivot(index = 'CustomerID', columns ='ProductID', values = 'ProductRating').fillna(0)
pivot_df.shape

In [None]:
pivot_df['user_index'] = np.arange(0, pivot_df.shape[0], 1)
pivot_df.set_index(['user_index'], inplace=True)

In [None]:
pivot_df_sparse = csr_matrix(pivot_df)

In [None]:
U, sigma, Vt = svds(pivot_df_sparse, k=10)
sigma = np.diag(sigma)

In [None]:
pivot_df.shape

In [None]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 

preds_df = pd.DataFrame(all_user_predicted_ratings, columns = pivot_df.columns)
preds_df.head()

In [None]:
def recommend_items(userID, pivot_df, preds_df, num_recommendations):
      
    user_idx = userID-1
    
    
    sorted_user_ratings = pivot_df.iloc[user_idx].sort_values(ascending=False)
    
    sorted_user_predictions = preds_df.iloc[user_idx].sort_values(ascending=False)


    temp = pd.concat([sorted_user_ratings, sorted_user_predictions], axis=1)
    temp.index.name = 'Recommended Items'
    temp.columns = ['user_ratings', 'user_predictions']
    
    temp = temp.loc[temp.user_ratings == 0]   
    temp = temp.sort_values('user_predictions', ascending=False)
    print('\nBelow are the recommended items for user(user_id = {}):\n'.format(userID))
    print(temp.head(num_recommendations))

In [None]:
userID = 20
num_recommendations = 5
recommend_items(userID, pivot_df, preds_df, num_recommendations)

In [None]:
final_ratings_matrix = trainset.pivot(index = 'CustomerID', columns ='ProductID', values = 'ProductRating').fillna(0)

In [None]:
rmse_df = pd.concat([final_ratings_matrix.mean(), preds_df.mean()], axis=1)
rmse_df.columns = ['Avg_actual_ratings', 'Avg_predicted_ratings']
print(rmse_df.shape)
rmse_df['item_index'] = np.arange(0, rmse_df.shape[0], 1)
rmse_df.head()

In [None]:
RMSE = round((((rmse_df.Avg_actual_ratings - rmse_df.Avg_predicted_ratings) ** 2).mean() ** 0.5), 5)
print('\nRMSE SVD Model = {} \n'.format(RMSE))

In [None]:
avg_actual_ratings = final_ratings_matrix.mean()
avg_predicted_ratings = preds_df.mean()

mae_df = pd.concat([avg_actual_ratings, avg_predicted_ratings], axis=1)
mae_df.columns = ['Avg_actual_ratings', 'Avg_predicted_ratings']

mae_df['item_index'] = np.arange(0, mae_df.shape[0], 1)


MAE = round((mae_df['Avg_actual_ratings'] - mae_df['Avg_predicted_ratings']).abs().mean(), 5)

print('\nMAE SVD Model = {}\n'.format(MAE))

In [None]:
userID = 129
num_recommendations = 5
recommend_items(userID, pivot_df, preds_df, num_recommendations)

In [None]:
from scipy.sparse.linalg import svds
final_ratings_matrix = trainset.pivot(index = 'CustomerID', columns ='ProductID', values = 'ProductRating').fillna(0)
rmse_mae_simple = {}
rmse_mae_normal = {}
rmse_mae_item = {}
rmse_mae_user = {}
rmse_mae_total = {}
def rmse_mae_calculate(U,sigma,Vt,k,svd):

    all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 

    preds_df = pd.DataFrame(all_user_predicted_ratings, columns = pivot_df.columns)

    rmse_df = pd.concat([final_ratings_matrix.mean(), preds_df.mean()], axis=1)
    rmse_df.columns = ['Avg_actual_ratings', 'Avg_predicted_ratings']

    rmse_df['item_index'] = np.arange(0, rmse_df.shape[0], 1)
    RMSE = round((((rmse_df.Avg_actual_ratings - rmse_df.Avg_predicted_ratings) ** 2).mean() ** 0.5), 5)

    avg_actual_ratings = final_ratings_matrix.mean()
    avg_predicted_ratings = preds_df.mean()

    mae_df = pd.concat([avg_actual_ratings, avg_predicted_ratings], axis=1)
    mae_df.columns = ['Avg_actual_ratings', 'Avg_predicted_ratings']
    mae_df['item_index'] = np.arange(0, mae_df.shape[0], 1)

    MAE = round((mae_df['Avg_actual_ratings'] - mae_df['Avg_predicted_ratings']).abs().mean(), 5)
    if svd == "simple":
        rmse_mae_simple[k] = [RMSE,MAE]
    if svd == "normal":
        rmse_mae_normal[k] = [RMSE,MAE]
    if svd == "miditem":
        rmse_mae_item[k] = [RMSE,MAE]
    if svd == "miduser":
        rmse_mae_user[k] = [RMSE,MAE]
    if svd == "midtotal":
        rmse_mae_total[k] = [RMSE,MAE]
    


In [None]:
for i in range(1,pivot_df.shape[0]):
    k = i
    # SimpleSVD
    pivot_df_sparse = csr_matrix(pivot_df)
    U_simple, sigma_simple, Vt_simple = svds(pivot_df_sparse, k=k)
    sigma_simple = np.diag(sigma_simple)
    rmse_mae_calculate(U_simple,sigma_simple,Vt_simple,k,"simple")
    # NormalSVD
    U_normal, sigma_normal, Vt_normal = svds(pivot_df_sparse, k=k)
    sigma_normal = np.diag(sigma_normal)
    U_normal = U_normal.dot(sigma_normal)
    rmse_mae_calculate(U_normal,sigma_normal,Vt_normal,k,"normal")

    # MidItemSVD
    U_mid_item, sigma_mid_item, Vt_mid_item = svds(pivot_df_sparse, k=k)
    sigma_mid_item = np.diag(sigma_mid_item)
    Vt_mid_item = sigma_mid_item.dot(Vt_mid_item)
    rmse_mae_calculate(U_mid_item,sigma_mid_item,Vt_mid_item,k,"miditem")

    # MidUserSVD
    U_mid_user, sigma_mid_user, Vt_mid_user = svds(pivot_df_sparse, k=k)
    sigma_mid_user = np.diag(sigma_mid_user)
    U_mid_user = U_mid_user.dot(sigma_mid_user)
    rmse_mae_calculate(U_mid_user,sigma_mid_user,Vt_mid_user,k,"miduser")

    # MidTotalSVD
    U_mid_total, sigma_mid_total, Vt_mid_total = svds(pivot_df_sparse, k=k)
    sigma_mid_total = np.diag(sigma_mid_total)
    U_mid_total = U_mid_total.dot(sigma_mid_total)
    Vt_mid_total = sigma_mid_total.dot(Vt_mid_total)
    rmse_mae_calculate(U_mid_total,sigma_mid_total,Vt_mid_total,k, "midtotal")


In [None]:
def plot_rmse_mae(dictionary, model_name):
    k_values = list(dictionary.keys())
    rmse_values = [item[0] for item in dictionary.values()]
    mae_values = [item[1] for item in dictionary.values()]

    plt.figure(figsize=(10, 5))
    plt.plot(k_values, rmse_values, label='RMSE')
    plt.plot(k_values, mae_values, label='MAE')
    plt.xlabel('K Değeri')
    plt.ylabel('Hata Değeri')
    plt.title(f'{model_name} Modeli İçin RMSE ve MAE Değerleri')
    plt.legend()
    plt.grid()
    plt.show()

plot_rmse_mae(rmse_mae_simple, 'SimpleSVD')
plot_rmse_mae(rmse_mae_normal, 'NormalSVD')
plot_rmse_mae(rmse_mae_item, 'MidItemSVD')
plot_rmse_mae(rmse_mae_user, 'MidUserSVD')
plot_rmse_mae(rmse_mae_total, 'MidTotalSVD')

<a id="section-four"></a>
# [Collaborative Filtering Recommender System - 2](#headers)

In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
df = pd.read_csv("/kaggle/input/retail-data-set/file_out2.csv")

In [None]:
df.describe()

In [None]:
df['Date'] = pd.to_datetime(df['Date'], format = '%Y-%m-%d')

In [None]:
sns.heatmap(df.corr(),annot=True, cmap="plasma")

In [None]:
df.drop("Unnamed: 0", axis = 1, inplace = True)

In [None]:
pd.DataFrame([{'products': len(df['ProductID'].value_counts()),
               'transactions': len(df['InvoiceID'].value_counts()),
               'customers': len(df['CustomerID'].value_counts()),
              }], columns = ['products', 'transactions', 'customers'], index = ['quantity'])

In [None]:
two_thirds_len = int(len(df) * 2 / 3)
filtered_df = df.iloc[two_thirds_len:]
df = df.iloc[:two_thirds_len]

In [None]:
recency_df = df.groupby(['CustomerID'],as_index=False)['Date'].max().copy()
recency_df.columns = ['CustomerID','LastPurchaseDate']
recency_df.head()

In [None]:
now =df['Date'].max()

In [None]:
recency_df['Recency'] = recency_df.LastPurchaseDate.apply(lambda x : (now - x).days)
recency_df.drop(columns=['LastPurchaseDate'],inplace=True)

In [None]:
frequency_df = df.copy()
frequency_df.drop_duplicates(subset=['CustomerID','InvoiceID'], keep="first", inplace=True)
frequency_df = frequency_df.groupby('CustomerID',as_index=False)['InvoiceID'].count()
frequency_df.columns = ['CustomerID','Frequency']
frequency_df.head()

In [None]:
monetary_df=df.groupby('CustomerID',as_index=False)['TotalSales'].sum().copy()
monetary_df.columns = ['CustomerID','Monetary']

In [None]:
rf = recency_df.merge(frequency_df,left_on='CustomerID',right_on='CustomerID').copy()

rfm = rf.merge(monetary_df,left_on='CustomerID',right_on='CustomerID').copy()

rfm.set_index('CustomerID',inplace=True)
rfm.head()

In [None]:
rfm_analysis = rfm.copy()

In [None]:
rfm["recency_score"] = pd.qcut(rfm["Recency"], 5, labels=[5, 4, 3, 2, 1])
rfm["frequency_score"] = pd.qcut(rfm["Frequency"].rank(method= "first"), 5, labels=[1, 2, 3, 4, 5])
rfm["monetary_score"] = pd.qcut(rfm["Monetary"], 5, labels=[1, 2, 3, 4, 5])

rfm.head()

In [None]:
rfm["RFM_SCORE"] = (rfm['recency_score'].astype(str) +
                        rfm['frequency_score'].astype(str))

In [None]:
seg_map = {
    r'[1-2][1-2]': 'hibernating',
    r'[1-2][3-4]': 'at_Risk',
    r'[1-2]5': 'cant_loose',
    r'3[1-2]': 'about_to_sleep',
    r'33': 'need_attention',
    r'[3-4][4-5]': 'loyal_customers',
    r'41': 'promising',
    r'51': 'new_customers',
    r'[4-5][2-3]': 'potential_loyalists',
    r'5[4-5]': 'champions'
}

rfm["segment"] = rfm["RFM_SCORE"].replace(seg_map, regex=True)

new_df = pd.DataFrame()

new_df["new_customer_id"] = rfm[rfm["segment"] == "new_customers"].index

new_df["new_customer_id"] = new_df["new_customer_id"].astype(int)


In [None]:
rfm.head()

In [None]:
rfm_segmentation = rfm_analysis.copy()

Nc = range(1, 20)
kmeans = [KMeans(n_clusters=i) for i in Nc]
score = [kmeans[i].fit(rfm_segmentation).score(rfm_segmentation) for i in range(len(kmeans))]

plt.figure(figsize=(10, 6))
plt.style.use('seaborn-whitegrid')

plt.plot(Nc, score, marker='o', linestyle='-', color='b')

plt.xlabel('Küme Sayısı')
plt.ylabel('Hata Skoru')
plt.title('Dirsek Metodu Uygulaması Sonucu Oluşan Grafik')

plt.grid(True, linestyle='--', alpha=0.5)

plt.annotate('Dirsek Noktası', xy=(7, score[3]), xytext=(8, score[1]),
             arrowprops=dict(facecolor='red', arrowstyle='->'))

plt.show()

In [None]:
kmeans = KMeans(n_clusters=3, random_state=0).fit(rfm_segmentation)

rfm_segmentation['cluster'] = kmeans.labels_
rfm_segmentation[rfm_segmentation.cluster == 0].head(5)

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score

range_n_clusters = [2, 3, 4, 5, 6, 7, 8]
silhouette_scores = []

for num_clusters in range_n_clusters:
    kmeans = KMeans(n_clusters=num_clusters, max_iter=50)
    kmeans.fit(rfm_segmentation)
    cluster_labels = kmeans.labels_
    silhouette_avg = silhouette_score(rfm_segmentation, cluster_labels)
    silhouette_scores.append(silhouette_avg)
    print("For n_clusters={0}, the silhouette score is {1}".format(num_clusters, silhouette_avg))


plt.figure(figsize=(8, 6))

plt.plot(range_n_clusters, silhouette_scores, marker='o', linestyle='-', color='b')

plt.xlabel('Küme Sayısı')
plt.ylabel('Silhouette Skoru')
plt.title('Silhouette Analizi')


plt.show()

In [None]:
kmeans = KMeans(n_clusters=7, random_state=0).fit(rfm_segmentation)

rfm_segmentation['cluster'] = kmeans.labels_
rfm_segmentation[rfm_segmentation.cluster == 0].head()

In [None]:
rfm_segmentation.head()

In [None]:
sample_rfm = pd.DataFrame(columns=['Recency', 'Frequency', 'Monetary'])
sample_dict = {}
for i in range(0, 7,2):
    if i != 2:
        sample_dict[i] = rfm_segmentation[rfm_segmentation.cluster == i].mean()
sample_rfm = pd.DataFrame(sample_dict).T.rename_axis('cluster')

In [None]:
sample_rfm.drop(columns = ['cluster'])

In [None]:
import matplotlib.pyplot as plt

cluster_labels = rfm_segmentation['cluster']

cluster_counts = cluster_labels.value_counts().sort_index()


plt.bar(cluster_counts.index, cluster_counts.values)

for i, count in enumerate(cluster_counts.values):
    plt.text(i, count, str(count), ha='center', va='bottom')

plt.xlabel('Küme Etiketi')
plt.ylabel('Kişi Sayısı')
plt.title('Müşteri Segmentasyonu sonucu küme dağılımı')

plt.xticks(cluster_counts.index)
plt.tight_layout()

plt.show()


In [None]:
merged_df = pd.merge(df, rfm_segmentation, on='CustomerID', how='left', suffixes=('', '_rfm'))

In [None]:
merged_df.drop(["Frequency","Recency","Monetary"], axis = 1, inplace = True)

In [None]:
merged_df.head()

In [None]:
invoiceIDs = merged_df['InvoiceID'].unique()
customer_vectors = {}

for i in invoiceIDs:
    grouped_df = merged_df[merged_df['InvoiceID'] == i][['CustomerID', 'ProductID', 'cluster','Quantity']]
    grouped_df = grouped_df.groupby('CustomerID')[['ProductID', 'cluster','Quantity']].agg(list).reset_index()
    
    for _, row in grouped_df.iterrows():
        customer_id = row['CustomerID']
        product_ids = row['ProductID']
        clusters = row['cluster']
        quantities = row['Quantity']
        
        if customer_id not in customer_vectors:
            customer_vectors[customer_id] = {'ProductID': [], 'cluster': [],'Quantity':[]}
        
        customer_vectors[customer_id]['ProductID'].extend(product_ids)
        customer_vectors[customer_id]['cluster'].extend(clusters)
        customer_vectors[customer_id]['Quantity'].extend(clusters)


In [None]:
dict_deneme = {}
index = 0
customer_mapping = {}
reverse_customer_mapping = {}
customer_vectors = {}
for customer_id in merged_df['CustomerID'].unique():
    customer_vectors[customer_id] = [merged_df[merged_df['CustomerID'] == customer_id]['ProductID'],
        merged_df[merged_df['CustomerID'] == customer_id]['cluster'],merged_df[merged_df['CustomerID'] == customer_id]['Quantity']]
    customer_mapping[customer_id] = index
    reverse_customer_mapping[index] = customer_id
    index += 1

def user_item_collaborative_filtering(user_id, top_n=5):
    
    max_length = max(len(customer_vectors[i][0]) for i in range(len(customer_vectors)) if i in customer_vectors)
    user_vector = np.array(
        list(customer_vectors[user_id][0]) + [0] * (max_length - len(customer_vectors[user_id][0]))).reshape(1, -1)
    stacked_vectors = []
    for i in range(len(customer_vectors)):
        if i in customer_vectors:
            vector = customer_vectors[i][0].tolist()
            padded_vector = vector + [0] * (max_length - len(vector))
            stacked_vectors.append(padded_vector)
    similarities = cosine_similarity(user_vector, np.vstack(stacked_vectors))
    similar_indices = similarities.argsort()[0][::-1][1:top_n+1]
    recommended_products = []
    dict_deneme[user_id] = similarities
    sim_deneme = similarities
    for index in similar_indices:
        similar_user_id = reverse_customer_mapping[index]
        recommended_products.extend(customer_vectors[similar_user_id][0])
    unique_recommended_products = list(set(recommended_products))

    return unique_recommended_products[:top_n]


In [None]:
user_item_collaborative_filtering(10,5)

In [None]:
# CustomerID = 10 olan müşterinin cos similarity matrisi;

\begin{bmatrix}
0.37989451 & 0.34049415 & 0.27580451 & 0.32970506 & 0.32970506 & 1. \\
0.64203932 & 0.16648815 & 0.13584646 & 0.60246378 & 0.15830603 & 0.29051442 \\
0.17058609 & 0.59438211 & 0.45106497 & 0.59938732 & 0.21845412 & 0.35465831 \\
0.52640233 & 0.2159947 & 0.74144242 & 0.62224665 & 0.65254985 & 0.32970506 \\
0.30274867 & 0.59536583 & 0.6832603 & 0.32970506 & 0.32970506 & 0.32970506 \\
0.32970506 & 0.32970506 & 0.32970506 & 0.06460176 & 0.16495207 & 0.46397446 \\
0.32970506 & 0.40771326 & 0.35774217 & 0.56552711 & 0.93767304 & 0.03145106 \\
0.32970506 & 0.15244143 & 0.68400311 & 0.61278208 & 0.22240525 & 0.12966975 \\
\vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
0.19486555 & 0.53028717 & 0.32970506 & 0.00854643 & 0.25302326 & 0.32970506 \\
0.16775086 & 0.68323489 & 0.32970506 & 0.32970506 & 0.44932967 & 0.53175956 \\
0.32970506 & 0.83259069 & 0.32970506 & 0.67791332 & 0.32970506 & 0.08794601 \\
0.32970506 & 0.53408345 & 0.14841566 & 0.21183881 & 0.32970506 & 0.35468924 \\
0.90626507 & 0.53028717 & 0.21393862 & 0.25942842 & 0.32970506 & 0.17153925 \\
0.53032476 & 0.24661407 & 0.3680497 & 0.33339727 & 0.08077612 & 0.10862351 \\
0.32970506 & 0.32970506 & 0.53028717 & 0.32445369 & 0.32970506 & 0.53028717
\end{bmatrix}


In [None]:
recommender_products_to_users = {}
for i in merged_df['CustomerID'].unique():
    recommender_products_to_users[i] = user_item_collaborative_filtering(i,5)

In [None]:
data = {'CustomerID': [], 'RecommendedProducts': []}


for customer_id, recommended_products in recommender_products_to_users.items():
    data['CustomerID'].append(customer_id)
    data['RecommendedProducts'].append(recommended_products)

recommender_products_to_users_df = pd.DataFrame(data)

In [None]:
customer_products_df = {'CustomerID': [], 'ProductID': []}
customer_ids = merged_df['CustomerID'].unique()

for customer_id in customer_ids:
    products = merged_df.loc[merged_df['CustomerID'] == customer_id, 'ProductID'].values.tolist()
    customer_products_df['CustomerID'].append(customer_id)
    customer_products_df['ProductID'].append(products)

customer_products_df = pd.DataFrame(customer_products_df)

In [None]:
recommender_products_to_users_df = recommender_products_to_users_df.sort_values(by='CustomerID')
filtered_df_grouped = filtered_df.groupby('CustomerID')['ProductID'].apply(list).reset_index()

In [None]:
customer_products_df.head()

In [None]:
recommender_products_to_users_df.head()

In [None]:
filtered_df_grouped.head()

In [None]:
merged_products = pd.merge(filtered_df_grouped, recommender_products_to_users_df, on='CustomerID')
result_df = merged_products[['CustomerID','ProductID', 'RecommendedProducts']]

In [None]:
result_analysis_df = pd.merge(result_df, rfm_segmentation, on='CustomerID', how='left', suffixes=('', '_rfm'))

In [None]:
result_analysis_df.drop(["Frequency","Recency","Monetary"], axis = 1, inplace = True)

In [None]:
result_analysis_df.head()

In [None]:
clusters_products_popularity_dict = {}
for m in range(len(list(result_analysis_df.cluster.unique()))):
    products_popularity_dict = {}
    for i in range(len(list(set(filtered_df['ProductID'])))):
        products_popularity_dict[list(set(filtered_df['ProductID']))[i]] = 0
    cluster_analysis = result_analysis_df[result_analysis_df['cluster']==m]
    for j in cluster_analysis['CustomerID'].unique():
        temp = list(cluster_analysis[cluster_analysis['CustomerID']==j]['ProductID'])[0]
        for k in temp:
            if k in products_popularity_dict.keys():
                purchased_products = products_popularity_dict[k]
                purchased_products += 1
                products_popularity_dict[k] = purchased_products
    clusters_products_popularity_dict[m] = products_popularity_dict

In [None]:
sorted_clusters_products_popularity_dict = {}
for i in range(len(list(result_analysis_df.cluster.unique()))):
    sorted_clusters_products_popularity_dict[i] = sorted(clusters_products_popularity_dict[i], key=clusters_products_popularity_dict[i].get, reverse=True)

In [None]:
# segmentlerin en fazla aldığı ürünler
for i in range(len(list(result_analysis_df.cluster.unique()))):
    print(sorted_clusters_products_popularity_dict[i][:10])

In [None]:
# segmentlerin en fazla aldığı ürünlerin sayısı
for i in range(len(list(result_analysis_df.cluster.unique()))):
    most_products = sorted(list(clusters_products_popularity_dict[i].values()), reverse=True)
    print(most_products[:15])

In [None]:
products_popularity_dict = {}
for i in range(len(list(set(filtered_df['ProductID'])))):
    products_popularity_dict[list(set(filtered_df['ProductID']))[i]] = 0

In [None]:
def check_recommended_products(df):
    almis = 0
    almamis = 0
    for index, row in df.iterrows():
        customer_id = row['CustomerID']
        recommended_products = row['RecommendedProducts']
        product_id = row['ProductID']
        for recommended_product in recommended_products:
            if recommended_product not in product_id:
                #print(f"CustomerID {customer_id} did not buy Recommended ProductID {recommended_product} ")
                almamis += 1
            else:
                print(f"CustomerID {customer_id} bought Recommended ProductID {recommended_product}")
                almis += 1
    print(f"Tasarlanan sistemde tavsiye edilen ürünlerden, müşteriler {almamis} tanesini almamış, {almis} tanesini almıştır.")
    

In [None]:
check_recommended_products(result_df)

In [None]:
result_data= []
for i in filtered_df_grouped['CustomerID']:
    if i in recommender_products_to_users_df['CustomerID']:
        customer_recommendations = recommender_products_to_users_df[recommender_products_to_users_df['CustomerID'] == i]['RecommendedProducts']
        customer_purchases = filtered_df_grouped[filtered_df_grouped['CustomerID'] == i]['ProductID']
        customer_purchases = customer_purchases.apply(set)
        customer_recommendations = customer_recommendations.apply(set)
        
        TP = len(customer_recommendations + customer_purchases)
        FP = len(customer_purchases - customer_recommendations)
        FN = len(customer_recommendations - customer_purchases)

        precision = TP / (TP + FP)
        recall = TP / (TP + FN)
        fallout = 0 if (FP == 0) else FP / (FP + len(customer_purchases))
        miss_rate = FN / (FN + TP)
        f1_score = 2 * (precision * recall) / (precision + recall)

        result_data.append([i, precision, recall, fallout, miss_rate, f1_score])

metrics_df = pd.DataFrame(result_data, columns=['CustomerID', 'Precision', 'Recall', 'Fallout', 'Miss Rate', 'F1 Score'])

In [None]:
metrics_df.drop(columns = ['CustomerID']).mean()

<a id="section-five"></a>
# [Hybrid Recommender System](#headers)

In [None]:
!pip install lifetimes
from lifetimes import BetaGeoFitter
from scipy.optimize import minimize
from lifetimes import GammaGammaFitter
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
merged_df = pd.merge(df, rfm_segmentation, on='CustomerID', how='inner')

In [None]:
test_df = filtered_df.copy()

In [None]:
merged_df.head()

In [None]:
rfm_df = rfm.copy()

In [None]:
merged_df['Date'] = pd.to_datetime(merged_df['Date'])

In [None]:
merged_df['T'] = (merged_df['Date'].max() - merged_df['Date']).dt.days

In [None]:
merged_df.drop(merged_df[merged_df['Recency'] > merged_df['T']].index, inplace=True)

In [None]:
test_df['Date'] = pd.to_datetime(test_df['Date'])

In [None]:
date_difference  = (test_df['Date'].max()-merged_df['Date'].max()).days

$$ f(t | r, α, β) = \frac{\Gamma(r+t)}{\Gamma(r)\Gamma(t+1)}\frac{α^rβ^t}{(α+β)^{r+t}}
  $$

$$ P(X = x | λ, r) = \binom{x + r - 1}{x} \left(\frac{λ}{λ+r}\right)^x \left(\frac{r}{λ+r}\right)^r
 $$

In [None]:
bgf = BetaGeoFitter(penalizer_coef=0.8)
bgf.fit(merged_df["Frequency"], merged_df["Recency"],merged_df['T'] ,merged_df["Monetary"])

In [None]:
gün = date_difference 
predicted_purchases = bgf.predict(gün, merged_df["Frequency"], merged_df["Recency"], merged_df["Monetary"])
predicted_df = pd.DataFrame({"PredictedPurchases": predicted_purchases}, index=merged_df.index)
predicted_df["CustomerID"] = merged_df["CustomerID"]

In [None]:
merge_predicted_df = pd.DataFrame({'PredictedPurchases': predicted_df['PredictedPurchases'], 'CustomerID': merged_df['CustomerID'], 'ProductID': merged_df['ProductID']})
merge_predicted_df = merge_predicted_df.sort_values(by='PredictedPurchases', ascending=True)


In [None]:
test_df_grouped = test_df.groupby('CustomerID')['ProductID'].apply(list).reset_index()

In [None]:
merge_predicted_df_grouped = merge_predicted_df.groupby('CustomerID')['ProductID'].apply(list).reset_index()

In [None]:
merge_predicted_df_grouped['ProductID'] = merge_predicted_df_grouped['ProductID'].apply(lambda x: list(set(x)))

In [None]:
merge_predicted_df_grouped[merge_predicted_df_grouped['CustomerID'] == 321]

In [None]:
test_df_grouped[test_df_grouped['CustomerID'] == 321]

In [None]:
pd.DataFrame(merge_predicted_df.sort_values(by=['PredictedPurchases'])).tail(6)

In [None]:
break_b = 0
break_a =10
for i in merge_predicted_df_grouped['CustomerID'].unique():
    if i in test_df_grouped['CustomerID'].unique():
        for j in set(list(test_df_grouped[test_df_grouped['CustomerID'] == i]['ProductID'])[0]):
            if j in set(list(merge_predicted_df_grouped[merge_predicted_df_grouped['CustomerID']==i]['ProductID'])[0]):    
                if list(set(merge_predicted_df[(merge_predicted_df['CustomerID']==i) & (merge_predicted_df['ProductID']==j)]['PredictedPurchases']))[0] < 0.01:
                    print(i," CustomerID'li müşteri ", j, ' ürününü ', list(set(merge_predicted_df[(merge_predicted_df['CustomerID']==i) & (merge_predicted_df['ProductID']==j)]['PredictedPurchases']))[0],' olasılığıyla almıştır.')
                    break_b += 1
                    if break_b == break_a:
                        break
    if break_b == break_a:
        break

In [None]:
customer_ids = []
actual_purchases = []
probs = []

for i in merge_predicted_df_grouped['CustomerID'].unique():
    if i in test_df_grouped['CustomerID'].unique():
        for j in set(list(test_df_grouped[test_df_grouped['CustomerID'] == i]['ProductID'])[0]):
            if j in set(list(merge_predicted_df_grouped[merge_predicted_df_grouped['CustomerID']==i]['ProductID'])[0]):    
                predicted_purchase = list(set(merge_predicted_df[(merge_predicted_df['CustomerID']==i) & (merge_predicted_df['ProductID']==j)]['PredictedPurchases']))[0]
                customer_ids.append(i)
                actual_purchases.append(j)
                probs.append(predicted_purchase)

In [None]:
scaler = MinMaxScaler()
product_counts = merged_df.groupby(['CustomerID', 'ProductID'])['Quantity'].sum().reset_index()
product_counts.rename(columns={'Quantity': 'Ratings'}, inplace=True)


ratings_df = product_counts[['CustomerID', 'ProductID', 'Ratings']]
rats = []
for i in range(len(actual_purchases)):
    rats.append(int(ratings_df[(ratings_df['CustomerID'] == customer_ids[i]) & (ratings_df['ProductID'] == actual_purchases[i])]['Ratings']))
rats = scaler.fit_transform(np.array(rats).reshape(-1,1)).flatten()


In [None]:
len(actual_purchases), len(probs) , len(customer_ids)

In [None]:
data = pd.DataFrame({'CustomerID': customer_ids, 'Satın Alınan Ürünler': actual_purchases})

sns.set(style="whitegrid")
plt.figure(figsize=(12, 10)) 
sns.lineplot(x='CustomerID', y='Satın Alınan Ürünler', data=data[:100], marker='o')
sns.lineplot(data=list(merge_predicted_df[merge_predicted_df['PredictedPurchases']>0]['ProductID'])[:47], marker='o')
plt.scatter(x=40, y= 1750, color='orange')
plt.text(41, 1730, 'Model tahmini', color='black', fontsize=12)
plt.scatter(x=40, y= 1700, color='blue')
plt.text(41, 1688, 'Test kümesi', color='black', fontsize=12)
plt.title('Tahmin Edilen / Alınan Ürünler')
plt.xlabel('CustomerID')
plt.ylabel('ProductID')

plt.show()

In [None]:
FP = 0
TP = 0
FN = 0
TN = 0
for i in merge_predicted_df['ProductID'].unique():
    if i not in test_df['ProductID'].unique():
        FP += 1
    if i in test_df['ProductID'].unique():
        TP += 1
for i in test_df['ProductID'].unique():
    if i not in merge_predicted_df['ProductID'].unique():
        FN += 1
    if i in merge_predicted_df['ProductID'].unique():
        TN += 1

In [None]:
accuracy, precision, recall, fallout, miss_rate, f1_score = calculate_metrics(TP, FP, FN, TN)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Fallout:", fallout)
print("Miss Rate:", miss_rate)
print("F1 Score:", f1_score)


In [None]:
filtered_df = merged_df[merged_df['CustomerID'].isin(customer_ids) & merged_df['ProductID'].isin(actual_purchases)]

In [None]:
ggf = GammaGammaFitter(penalizer_coef = 0.8)
ggf.fit(rfm_df['Frequency'], rfm_df['Monetary'])


In [None]:
expected_transaction_value = ggf.conditional_expected_average_profit(rfm_df["Frequency"], rfm_df["Monetary"])

customer_lifespan = 12  

clv = expected_transaction_value * customer_lifespan


In [None]:
clv_normalized = scaler.fit_transform(np.array(clv).reshape(-1,1))
np.set_printoptions(precision=4, suppress=True)

In [None]:
np.sort(clv_normalized.flatten())[::-1][:10]

In [None]:
correlation_matrix = merged_df[['Frequency', 'Monetary']].corr()

sns.heatmap(correlation_matrix, annot=True,  cmap = 'coolwarm',center=0)
plt.title('Korelasyon Isı Haritası')
plt.show()

In [None]:
results_df = pd.DataFrame({"CustomerID" : customer_ids[:371], "CLV" : clv_normalized.flatten() , "S" : rats[:371]}) 

In [None]:
results_df.drop_duplicates("CustomerID",inplace=True)

In [None]:
results_df.sort_values(by="CLV",ascending = False).head()

In [None]:
basket = merged_df.groupby('InvoiceID')['ProductID'].apply(list).tolist()
te = TransactionEncoder()
te_ary = te.fit(basket).transform(basket)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = apriori(df_encoded, min_support=0.0001, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
rules_df = pd.DataFrame(rules)
np.sort(rules_df['lift'])
rules_df.head()

In [None]:
rules_df.shape

In [None]:
rules_df.sort_values(by="lift",ascending = True)[1000:]

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(range(len(clv_normalized)), clv_normalized, c=clv_normalized ,cmap='YlGnBu', s=10)
plt.colorbar(label='CLV Değeri')
plt.title('CLV Değerleri Scatter Plot')
plt.ylabel('CLV Değeri')
plt.xticks([]) 
plt.show()

In [None]:
np.mean(scaler.fit_transform(np.array(clv).reshape(-1,1)).flatten())