<a href="https://colab.research.google.com/github/makhmudovamunira/DataScience_Mohirdev/blob/main/CustomerChurn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

![Imgur](https://i.imgur.com/5pXzCIu.png)

# Data Science va Sun'iy Intellekt Praktikum

## 5-MODUL. Machine Learning.

# Classification

### Mijozning noroziligini (customer churn) aniqlash

Biznesda eng ko'p uchraydigan muammolardan biri. Mijozning kayfiyatini aniqlash.

Agar biznes egasi mijoz qaytmasligini, maxsulot yoki xizmatdan qayta foydanalmasligini oldindan bashorat qila olsa, mijozni ushlab qolish uchun, uning fikrini o'zgartirish uchun harakat qilishi mumkin.

Ushbu amaliyotda biz online do'kon xaridorlari ma'lumotlarini tahlil qilish orqali mijozning qolish-qolmasligini bashorat qilamiz.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

from sklearn.pipeline import Pipeline
from sklearn import metrics

In [None]:
# Agar excel fayllarni ochish muammosi bo'lsa, xlrd paketining eski (1.2.0) versiyasini o'rnating.
#pip install xlrd==1.2.0

In [None]:
url="https://github.com/anvarnarz/praktikum_datasets/blob/main/E-Commerce-Dataset.xlsx?raw=true"

df = pd.read_excel(url, sheet_name="E Comm")

df.head()

In [None]:
df.shape

In [None]:
description=pd.read_excel(url, sheet_name='Data Dict', header=1, usecols=[1,2,3])
description

## Ma'lumotlarni tahlil qilamiz

In [None]:
df.info()

- Qatorlar soni 5630 ta
- Ba'zi ustunlarda qiymatlar tushib qolgan

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df['Churn'].value_counts()

- 4682 mijoz qayta xarid qilgan (qolgan)
- 948 ta mijoz ketib qolgan

In [None]:
churn_rate=df['Churn'].value_counts()/len(df)*100
plt.figure(figsize=(5,5))
plt.pie(churn_rate, labels=['Qolgan', 'Ketgan'])
plt.show()

##Bazi sonli ustunlarni tahlil qilib ko'ramiz

In [None]:
fig, axes=plt.subplots(1,3, figsize=(15,5))

#tenure
sns.histplot(x='Tenure', data=df, ax=axes[0])
axes[0].set_title('Mijoz davomiyligi')

#OrderCount
sns.histplot(x='OrderCount', data=df, ax=axes[1])
axes[1].set_title('Mijozning buyurtmalari soni')

#CashbackAmount
sns.histplot(x='CashbackAmount', data=df, ax=axes[2])
axes[2].set_title('Mijozga qaytarilgan ceshback miqdori')

plt.show()

#### Kategoriyali ustunlarni tahlil qilamiz

In [None]:
fig, axes=plt.subplots(1,3, figsize=(15,5))

#Gender
sns.countplot(x='Gender', data=df, hue='Churn', ax=axes[0], palette='viridis')
axes[0].set_title('Qolgan va qaytgan mijozrlarning jinsi')

sns.countplot(x='MaritalStatus', data=df, hue='Churn', ax=axes[1], palette='viridis')
axes[1].set_title('Qolgan va qaytgan mijozrlarning oilaviy holati')

sns.countplot(x='Complain', data=df, hue='Churn', ax=axes[2], palette='viridis')
axes[2].set_title('Qolgan va qaytgan mijozrlarning shikoyatlari')

plt.show()

In [None]:
numeric_df=df.select_dtypes(include=['float64','int64'])
numeric_df.corrwith(df['Churn']).abs().sort_values(ascending=False)

## Ma'lumotlarga ishlov berish

In [None]:
print(f"Mavjud bo'lmagan qiymatlar soni: {df.isna().sum().sum()}ta")
missing_rows=df[df.isnull().any(axis=1)].shape[0]
print(f"Mavjud bo'lmagan qatorlar soni: {missing_rows}ta")
print(f"Mavjud bo'lmagan qatorlar % da: {np.round(missing_rows/len(df)*100)}%")

33% juda ham ko'p. Lekin biz barcha ustunlardan foydalanamizmi?

Keling korrelyasiya yuqori bo'lgan va ba'zi kategoriyali (natbnli) ustuinlarni saralab olamiz (_qaysi ustunlarni saralashni siz xal qilishingiz kerak. Biz misol berayapmiz xolos_)

In [None]:
data = df[['Churn','Tenure','Complain','DaySinceLastOrder','CashbackAmount','MaritalStatus','Gender']]

In [None]:
print(f"Mavjud bo'lmagan qiymatlar soni: {data.isna().sum().sum()}ta")
missing_rows=data[data.isnull().any(axis=1)].shape[0]
print(f"Mavjud bo'lmagan qatorlar soni: {missing_rows}ta")
print(f"Mavjud bo'lmagan qatorlar % da: {np.round(missing_rows/len(data)*100)}%")

10% qatorlar qiymati mavjud emas. Bu qiymatlar bilan qandya yo'l tutish sizga bog'liq.

Biz esa hozircha bu qatorlarni tashlab ketamiz.

In [None]:
data=data.dropna()
data.shape

In [None]:
data['Churn'].value_counts()/len(data)

  ## ML ga tayyorgarlik

In [None]:
#matnli ustunlarni songa o'tkazamiz
encoded=pd.get_dummies(data)
encoded.head()

In [None]:
X=encoded.drop('Churn', axis=1)
y=encoded['Churn']

In [None]:
scaler=StandardScaler()
X=scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, stratify=y, random_state=0)

## ML

### Logistic Regression

In [None]:
#Modelni yaratamiz (training)
LR_model=LogisticRegression()
LR_model.fit(X_train, y_train)

#modelni baholaymiz
y_predict=LR_model.predict(X_test)
metrics.classification_report(y_test, y_predict)
print("Model aniqligi: ",metrics.accuracy_score(y_test, y_predict))

#confusion matrix
conf_matrix=metrics.confusion_matrix(y_test, y_predict)
sns.heatmap(conf_matrix, annot=True, fmt='g')
plt.show()

#ROC curve
fpr, tpr, thresholds=metrics.roc_curve(y_test, y_predict)
roc_auc=metrics.auc(fpr, tpr)
display=metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='ROC curve')
display.plot()
plt.show()

### Support Vector Machines

In [None]:
#Modelni yaratamiz (training)
svm_model=SVC()
svm_model.fit(X_train, y_train)

#Modelni abholaymiz
y_predict=svm_model.predict(X_test)
metrics.classification_report(y_test, y_predict)
print('Model aniqligi: ', metrics.accuracy_score(y_test, y_predict))

#confusion matrix
conf_mat=metrics.confusion_matrix(y_test, y_predict)
sns.heatmap(conf_mat, annot=True, fmt='g')

#ROC cure
fpr, tpr, thresholds=metrics.roc_curve(y_test, y_predict)
roc_auc=metrics.auc(fpr, tpr)
display=metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='Roc curve')
display.plot()
plt.show()

##Decision Tree

In [None]:
#Model yaratamiz
tree_model=DecisionTreeClassifier()
tree_model.fit(X_train, y_train)

#Modelni baholaymiz
y_predict=tree_model.predict(X_test)
metrics.classification_report(y_test, y_predict)
print('Model aniqligi: ', metrics.accuracy_score(y_test, y_predict))

#Confusion matrix
conf_mat=metrics.confusion_matrix(y_test, y_predict)
sns.heatmap(conf_mat, annot=True, fmt='g')
plt.show()

#ROC curve
fpr, tpr, thresholds=metrics.roc_curve(y_test, y_predict)
roc_auc=metrics.auc(fpr, tpr)
display=metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='ROC curve')
display.plot()
plt.show()

In [None]:
cols=encoded.drop('Churn', axis=1).columns

plt.figure(figsize=(30,20))
plot_tree(tree_model, feature_names=cols, filled=True)
plt.show()

##RandomForest

In [None]:
#Model yaratamiz
rf_model=RandomForestClassifier(n_estimators=9)
rf_model.fit(X_train, y_train)

#modelni baholaymiz
y_predict=rf_model.predict(X_test)
metrics.classification_report(y_test, y_predict)
print('Model aniqligi', metrics.accuracy_score(y_test, y_predict))

#Confusion matrix
conf_mat=metrics.confusion_matrix(y_test, y_predict)
sns.heatmap(conf_mat, annot=True, fmt='g')
plt.show()

#ROC curve
fpr, tpr, thresholds=metrics.roc_curve(y_test,y_predict)
roc_auc=metrics.auc(fpr, tpr)
display=metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='ROC curve')
display.plot()
plt.show()

### XGBoost

In [None]:
xgb_model=XGBClassifier()
xgb_model.fit(X_train, y_train)

y_predict=xgb_model.predict(X_test)
metrics.classification_report(y_test, y_predict)
print("Model aniqligi: ", metrics.accuracy_score(y_test, y_predict))

conf_mat=metrics.confusion_matrix(y_test, y_predict)
sns.heatmap(conf_mat, annot=True, fmt='g')
plt.show()

fpr,tpr, thresholds=metrics.roc_curve(y_test, y_predict)
roc_auc=metrics.auc(fpr, tpr)
display=metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='ROC curve')
display.plot()
plt.show()