# Öneri Sistemi Deneme Çalışmaları

## Verileri Yükleme Aşamaları

- Datayı yükleyip sadece satış statüsüne sahip olan verileri sayıyoruz


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
df = pd.read_xml('/content/sample_data/stok_hareketleriii.xml')
df.head()

Unnamed: 0,STATUS,FYEAR,FMON,EVRAK_TARIHI,REC_ID,CARI_KOD,URUN_KODU,URUN_GRUBU,MIKTAR
0,SATIS,2016,12,2016-12-09,554068,19279,2300610,SEKERLEME,24.0
1,SATIS,2016,12,2016-12-09,554068,19279,5800012,LOKUM,12.0
2,SATIS,2016,12,2016-12-09,554068,19279,5800013,LOKUM,12.0
3,SATIS,2016,12,2016-12-09,554068,19279,6500045,KURUYEMIS,12.0
4,SATIS,2016,12,2016-12-09,554068,19279,7203003,UNLU MAMULLER,18.0


In [None]:
df = df.loc[df['STATUS'] =='SATIS']
df.shape

(117177, 9)

- CARI_KOD Özeti

In [None]:
df['URUN_KODU'] = pd.to_numeric(df['URUN_KODU'])
df['CARI_KOD'].describe()

count     117177
unique      1683
top        10079
freq        8349
Name: CARI_KOD, dtype: object

## Customer-Product Matrisi Oluşturma

- Customer-Item Matrisi oluşturuyoruz

In [None]:
customer_item_matrix = df.pivot_table(index='CARI_KOD',columns='URUN_KODU',values='MIKTAR',aggfunc='sum')

- One Hot Encoding: Sıfırın üzerindeki herhangi bir şeyi 1'e dönüştürmek için lambda işlevini uyguluyoruz

In [None]:
customer_item_matrix = customer_item_matrix.applymap(lambda x: 1 if x>0 else 0)


In [None]:
customer_item_matrix.head()

URUN_KODU,100101,100102,100103,100201,100202,100203,100204,100205,100206,100207,...,9990223,9990224,9990248,9990253,9990259,9990261,9999015,9999986,9999987,9999996
CARI_KOD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10004,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10013,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10019,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10021,1,1,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
10030,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Sklearn modulünün kullanılarak Customer Bazlı Collaborative Filtering'in uygulanması

In [None]:
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
user_to_user_sim_matrix = pd.DataFrame(cosine_similarity(customer_item_matrix))
user_to_user_sim_matrix.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
0,1.0,0.0,0.0,0.058543,0.074338,0.09513,0.070829,0.140112,0.0,0.042796,...,0.051801,0.120687,0.186816,0.037229,0.034669,0.0,0.0,0.027196,0.110324,0.0
1,0.0,1.0,0.52656,0.322429,0.269047,0.199595,0.204335,0.220479,0.072739,0.404061,...,0.339641,0.154303,0.146986,0.307562,0.340965,0.114614,0.145479,0.246077,0.052081,0.0
2,0.0,0.52656,1.0,0.285942,0.258701,0.217803,0.172976,0.256632,0.127,0.352738,...,0.316267,0.193421,0.195529,0.255711,0.28575,0.266815,0.084667,0.236614,0.121243,0.0
3,0.058543,0.322429,0.285942,1.0,0.407341,0.345909,0.263534,0.314822,0.351799,0.217135,...,0.315389,0.298511,0.243733,0.264445,0.378184,0.221729,0.187626,0.351866,0.190316,0.044499
4,0.074338,0.269047,0.258701,0.407341,1.0,0.42902,0.273793,0.355916,0.357371,0.181974,...,0.280337,0.303239,0.278543,0.3238,0.321634,0.197085,0.12508,0.446799,0.281466,0.033903


- User to User matrisini ayarlamak için tablo düzenlemeleri

In [None]:
user_to_user_sim_matrix.columns = customer_item_matrix.index
user_to_user_sim_matrix['CARI_KOD'] = customer_item_matrix.index
user_to_user_sim_matrix = user_to_user_sim_matrix.set_index('CARI_KOD')
user_to_user_sim_matrix.head()

CARI_KOD,10004,10013,10019,10021,10030,10031,10038,10043,10045,10052,...,EFE15013,EFE15014,EFE15015,EFE15024,EFE15033,EFE15046,EFE15048,EFE15051,EFE15053,Z1033
CARI_KOD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10004,1.0,0.0,0.0,0.058543,0.074338,0.09513,0.070829,0.140112,0.0,0.042796,...,0.051801,0.120687,0.186816,0.037229,0.034669,0.0,0.0,0.027196,0.110324,0.0
10013,0.0,1.0,0.52656,0.322429,0.269047,0.199595,0.204335,0.220479,0.072739,0.404061,...,0.339641,0.154303,0.146986,0.307562,0.340965,0.114614,0.145479,0.246077,0.052081,0.0
10019,0.0,0.52656,1.0,0.285942,0.258701,0.217803,0.172976,0.256632,0.127,0.352738,...,0.316267,0.193421,0.195529,0.255711,0.28575,0.266815,0.084667,0.236614,0.121243,0.0
10021,0.058543,0.322429,0.285942,1.0,0.407341,0.345909,0.263534,0.314822,0.351799,0.217135,...,0.315389,0.298511,0.243733,0.264445,0.378184,0.221729,0.187626,0.351866,0.190316,0.044499
10030,0.074338,0.269047,0.258701,0.407341,1.0,0.42902,0.273793,0.355916,0.357371,0.181974,...,0.280337,0.303239,0.278543,0.3238,0.321634,0.197085,0.12508,0.446799,0.281466,0.033903


- Örnek User to User matris hesaplamaların sonucu

In [None]:
user_to_user_sim_matrix.loc["19279"].sort_values(ascending = False)

CARI_KOD
19279    1.000000
19284    0.455420
19268    0.415285
16702    0.414665
19282    0.400031
           ...   
19390    0.000000
19409    0.000000
20358    0.000000
20372    0.000000
Z1033    0.000000
Name: 19279, Length: 1683, dtype: float64

# Customer Bazlı Collaborative Filtering
## A kullanıcısı tarafından satın alınmış ürünler 
> Veri setindeki tüm Cari kodlar integer değilmiş stringmiş, Örneğin **CEL14009** gibi

In [None]:
items_bought_by_A = set(customer_item_matrix.loc['19279'].iloc[customer_item_matrix.loc['19279'].to_numpy().nonzero()].index)
items_bought_by_A

{100303,
 100304,
 102015,
 104014,
 104037,
 104274,
 104276,
 104277,
 104290,
 104299,
 104302,
 104312,
 118251,
 118252,
 118261,
 121081,
 121082,
 144046,
 144966,
 148025,
 148080,
 400311,
 400425,
 400426,
 400427,
 400514,
 400613,
 400703,
 400705,
 412010,
 412080,
 600625,
 600626,
 800190,
 1100106,
 1100107,
 1100108,
 2300139,
 2300469,
 2300470,
 2300580,
 2300608,
 2300610,
 2300685,
 2300702,
 2300714,
 2300718,
 2300720,
 2300750,
 2300755,
 2300782,
 2300784,
 2300795,
 2300819,
 2600203,
 2600230,
 2600303,
 4200040,
 4200041,
 4200043,
 4200044,
 4200047,
 4200110,
 4200111,
 4500307,
 4500601,
 5230050,
 5800012,
 5800013,
 6500045,
 7203003,
 7203005,
 7204020,
 7204021,
 7204038,
 7500101,
 8300338,
 8300339,
 8300341,
 8300456,
 9990221}

## B kullanıcısı tarafından satın alınmış ürünler


In [None]:
items_bought_by_B = set(customer_item_matrix.loc['19284'].iloc[customer_item_matrix.loc['19284'].to_numpy().nonzero()].index)
items_bought_by_B

{100102,
 100303,
 100304,
 102015,
 104276,
 104290,
 104296,
 104299,
 104304,
 104306,
 104335,
 104340,
 104345,
 104405,
 104410,
 114057,
 114060,
 114070,
 114090,
 114255,
 116480,
 118250,
 118251,
 118759,
 144090,
 144958,
 144966,
 146109,
 148025,
 148030,
 400102,
 400103,
 400215,
 400225,
 400311,
 400425,
 400426,
 400427,
 400514,
 400601,
 400613,
 400703,
 400705,
 500206,
 500605,
 600101,
 600104,
 600404,
 600409,
 600620,
 600625,
 600626,
 800406,
 1100106,
 1100107,
 1100108,
 1100116,
 1400010,
 1700101,
 2300136,
 2300138,
 2300139,
 2300465,
 2300466,
 2300467,
 2300468,
 2300469,
 2300470,
 2300617,
 2300673,
 2300675,
 2300680,
 2300685,
 2300700,
 2300706,
 2300718,
 2300720,
 2300750,
 2300752,
 2300755,
 2300757,
 2300770,
 2300782,
 2300822,
 4200040,
 4200041,
 4200042,
 4200043,
 4200044,
 4200047,
 4200110,
 4200111,
 5200101,
 5200106,
 5200230,
 5230050,
 5800010,
 5800011,
 5800012,
 5800014,
 7102109,
 7203001,
 7203003,
 7203007,
 9990221}

## B kullanıcısına önerilebilecek ürünler

In [None]:
items_to_recommend_User_B = items_bought_by_A - items_bought_by_B
items_to_recommend_User_B

{104014,
 104037,
 104274,
 104277,
 104302,
 104312,
 118252,
 118261,
 121081,
 121082,
 144046,
 148080,
 412010,
 412080,
 800190,
 2300580,
 2300608,
 2300610,
 2300702,
 2300714,
 2300784,
 2300795,
 2300819,
 2600203,
 2600230,
 2600303,
 4500307,
 4500601,
 5800013,
 6500045,
 7203005,
 7204020,
 7204021,
 7204038,
 7500101,
 8300338,
 8300339,
 8300341,
 8300456}

In [None]:
df.loc[
    df['URUN_KODU'].isin(items_to_recommend_User_B),
    ['URUN_KODU','URUN_GRUBU']
].drop_duplicates().set_index('URUN_KODU')

Unnamed: 0_level_0,URUN_GRUBU
URUN_KODU,Unnamed: 1_level_1
2300610,SEKERLEME
5800013,LOKUM
6500045,KURUYEMIS
144046,LOKUM
148080,KURUYEMIS
104014,KURUYEMIS
7203005,UNLU MAMULLER
4500601,TATLI
104037,KURUYEMIS
118252,HELVA


# Ürün Bazlı Collaborative Filtering

In [None]:
item_item_sim_matrix = pd.DataFrame(cosine_similarity(customer_item_matrix.T))
item_item_sim_matrix.columns = customer_item_matrix.T.index
item_item_sim_matrix['URUN_KODU'] = customer_item_matrix.T.index
item_item_sim_matrix = item_item_sim_matrix.set_index('URUN_KODU')
item_item_sim_matrix.head()

URUN_KODU,100101,100102,100103,100201,100202,100203,100204,100205,100206,100207,...,9990223,9990224,9990248,9990253,9990259,9990261,9999015,9999986,9999987,9999996
URUN_KODU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100101,1.0,0.540119,0.645363,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.240585,0.131024,0.0,0.0,0.0,0.0,0.046324,0.0,0.0,0.0
100102,0.540119,1.0,0.619662,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.17196,0.163752,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100103,0.645363,0.619662,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.208966,0.138324,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100201,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.707107,0.57735,0.57735,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100202,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.707107,0.57735,0.57735,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


> **100101 id'li ürün için**



In [None]:
top_10_similar_items = list(
    item_item_sim_matrix\
        .loc[100101]\
        .sort_values(ascending=False)\
        .iloc[:10]\
    .index
)
top_10_similar_items

[100101,
 100103,
 9990221,
 100102,
 1400112,
 1100108,
 600203,
 1100106,
 800101,
 600202]

In [None]:
df.loc[
    df['URUN_KODU'].isin(top_10_similar_items),
    ['URUN_KODU','URUN_GRUBU',]
].drop_duplicates().set_index('URUN_KODU')

Unnamed: 0_level_0,URUN_GRUBU
URUN_KODU,Unnamed: 1_level_1
9990221,
800101,HARCLAR
1100106,PISMANIYE
1100108,PISMANIYE
1400112,HARCLAR
100101,ICECEK
600203,UNLU MAMULLER
600202,UNLU MAMULLER
100102,ICECEK
100103,ICECEK
