<a href="https://colab.research.google.com/github/saregul/my-github-projects/blob/main/RFM_Musteri_Segmentasyonu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
##########################################
#RFM ile Müşteri Segmentasyonu (Customer-Segmentation with RFM)
##########################################


#1. İş Problemi(Business Problem)
#2. Veriyi Anlama(Data Understanding)
#3. Veri Hazırlama(Data Preparation)
#4. RFM Metriklerinin Hesaplanması(Calculating RFM Metrics)
#5. RFM Skorlarının Hesaplanması (Calculating RFM Scores)
#6. RFM Segmentlerinin Oluşturulması ve Analiz Edilmesi(Creating & Analysing RFM Segments)
#7. Tüm Hücrenin Fonksiyonlaştırılması


#Bir e-ticaret şirketi müşterilerinin segmentlere ayırıp bu segmentlere göre
#pazarlama stratejileri belirlemek istiyor.

#Veri İngiltere merkezli online bir satış mağazasının
# 01/12/2009 - 09/12/2011 tarihleri arasındaki satışlarını içeriyor.

#Değişkenler

#InvoiceNo: Fatura numarası. Hr işleme yeni faturaya aait eşsiz numara. C ile başlıyorsa iptal edilen işlem.
#StockCode: Ürün kodu. Her bir ürün için eşsiz numara.
#Description: Ürün ismi
#Quantity: Ürün adedi. Faturalardaki ürünlerden kaçar tane satıldığını ifade etmektedir.
#InvoiceDate: Fatura tarihi ve zamanı
#UnitPrice: Ürün fiyatı(Sterlin cinsinden)
#CustomerID: Eşsiz müşteri numarası
#Country: Ülke ismi. Müşterinin yaşadığı ülke



#################################################################
#2.VERİYİ ANLAMA
#################################################################

import datetime as dt
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

df_ = pd.read_excel("/online_retail_II.xlsx")
df = df_.copy()
df.head()
df.shape
df.isnull().sum()

#eşsiz ürün sayısı nedir ?
df["Description"].nunique()

df["Description"].value_counts().head()

df.groupby("Description").agg({"Quantity": "sum"}).head()

df.groupby("Description").agg({"Quantity": "sum"}).sort_values("Quantity", ascending=False).head()

df["Invoice"].nunique()

df["TotalPrice"] = df["Quantity"] * df["Price"]

df.groupby("Invoice").agg({"TotalPrice": "sum"}).head()

#######################################################
# 3.Veri Hazırlama
#######################################################
df.shape
df.isnull().sum()
df.dropna(inplace=True)

df.describe().T

df[~df["Invoice"].str.contains("C", na=False)]



#############################################################
# 4. RFM Metriklerinin Hesaplanması (Calculating RFM Metrics)
#############################################################

#Recency, Freqency, Monetary
df.head()

df["InvoiceDate"].max()

today_date = dt.datetime(2010, 12, 11)
type(today_date)

rfm = df.groupby('Customer ID').agg({'InvoiceDate': lambda date: (today_date - date.max()).days,
                                     'Invoice': lambda num: num.nunique(),
                                     'TotalPrice': lambda TotalPrice: TotalPrice.sum()})


rfm.head()

rfm.columns = ['recency', 'frequency', 'monetary']

rfm.describe().T

rfm = rfm[rfm["monetary"] > 0]

rfm.shape



#################################################################
#5. RFM Skorlarının Hesaplanması
#################################################################

rfm["recency_score"] = pd.qcut(rfm['recency'], 5, labels=[5, 4, 3, 2, 1])

#0-100, 0-20, 20-40, 40-60, 60-80, 80-100

rfm["monetary_score"] = pd.qcut(rfm['monetary'], 5, labels=[1, 2, 3, 4, 5])

rfm["frequency_score"] = pd.qcut(rfm['frequency'].rank(method="first"), 5, labels=[1, 2, 3, 4, 5])

rfm["RFM_SCORE"] = (rfm['recency_score'].astype(str) +
                    rfm['frequency_score'].astype(str))


rfm.describe().T

rfm[rfm["RFM_SCORE"] == "55"]

rfm[rfm["RFM_SCORE"] == "11"]




###############################################################
# 6. RFM Segmentlerinin Oluşturulması ve Analiz Edilmesi
###############################################################

#regex

## RFM isimlendirmesi

seg_map = {
    r'[1-2][1-2]': 'hibernating',
    r'[1-2][3-4]': 'at_Risk',
    r'[1-2]5': 'cant_loose',
    r'3[1-2]': 'about_to_sleep',
    r'33': 'need_attention',
    r'[3-4][4-5]': 'loyal_customers',
    r'41': 'promising',
    r'51': 'new_customers',
    r'[4-5][2-3]': 'potential_loyalists',
    r'5[4-5]': 'champions'
}



rfm['segment'] = rfm['RFM_SCORE'].replace(seg_map, regex=True)

rfm[["segment", "recency", "frequency", "monetary"]].groupby("segment").agg(["mean", "count"])


rfm[rfm["segment"] == "cant_loose"].head()
rfm[rfm["segment"] == "cant_loose"].index


new_df = pd.DataFrame()
new_df["new_customer_id"] = rfm[rfm["segment"] == "new_customers"].index

new_df["new_customer_id"] = new_df["new_customer_id"].astype(int)

new_df.to_csv("new_customers.csv")

rfm.to_csv("rfm.csv")


###################################################################
#7.Tüm Sürecin Fonksiyonlaştırılması
###################################################################










