## **Development Propensity Engine**

### **Import Library**

In [2]:
#Data Manipulation
import os
import pandas as pd
import numpy as np
from zipfile import ZipFile

#Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

#Data Modelling
import pickle
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier

#Modelling Evaluation
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

### **Path Data**

In [3]:
path = os.getcwd() + "\\data"+"\\"
path

'C:\\Users\\10698\\Documents\\Jupyter\\Digital Skola\\Model Deployment I\\data\\'

### **Load Data**

In [4]:
zf = ZipFile(path + "data.zip") 
dfProfile = pd.read_csv(zf.open("user_profile.csv"))
dfTrasaction = pd.read_csv(zf.open("user_transaction.csv"))

In [5]:
dfProfile.head(1)

Unnamed: 0,user_id,user_gender,user_age,user_salary
0,15624510,Male,19,19000


In [15]:
dfTrasaction['user_transaction'] = pd.to_datetime(dfTrasaction['user_transaction'])
dfTrasaction = dfTrasaction[dfTrasaction['user_transaction'].dt.month.isin([1,2])]
dfTrasaction.head(1)

Unnamed: 0,user_id,user_transaction
0,15792818,2022-01-20


### **Labelling Data**

In [24]:
#0 : User Stay
#1 : User Churn

dfTrasactionGroup = dfTrasaction.groupby(['user_id']).agg({'user_transaction':['min','max']}).reset_index()
dfTrasactionGroup.columns = ['user_id','min_transaction','max_transaction']

dfTrasactionGroup['diff_transaction'] = dfTrasactionGroup['max_transaction']-dfTrasactionGroup['min_transaction']
dfTrasactionGroup.loc[dfTrasactionGroup['diff_transaction'] == '0 days', 'user_status'] = 1
dfTrasactionGroup.loc[dfTrasactionGroup['diff_transaction'] != '0 days', 'user_status'] = 0

dfTrasactionGroup.head(1)

Unnamed: 0,user_id,min_transaction,max_transaction,diff_transaction,user_status
0,15566689,2022-01-02,2022-01-02,0 days,1.0


### **Data Preparation**

In [28]:
df = dfProfile.merge(dfTrasactionGroup, how='inner', left_on='user_id', right_on='user_id')
df['user_gender'] = df['user_gender'].map({'Male':1, 'Female':0})

df.head(3)

Unnamed: 0,user_id,user_gender,user_age,user_salary,min_transaction,max_transaction,diff_transaction,user_status
0,15624510,1,19,19000,2022-01-10,2022-01-10,0 days,1.0
1,15810944,1,35,20000,2022-01-26,2022-01-26,0 days,1.0
2,15668575,0,26,43000,2022-01-03,2022-01-03,0 days,1.0


### **Data Modelling**

In [29]:
X = df[['user_gender','user_age','user_salary']]
y = df['user_status']

x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)
model = DecisionTreeClassifier()
model.fit(x_train, y_train)
y_predict = model.predict(x_test)

### **Evaluation Modelling**

In [31]:
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

         0.0       0.79      0.86      0.83        22
         1.0       0.95      0.91      0.93        58

    accuracy                           0.90        80
   macro avg       0.87      0.89      0.88        80
weighted avg       0.90      0.90      0.90        80



### **Model Export**

In [33]:
with open('modelDecisionTreeClassifier.pkl','wb') as file:
    pickle.dump(model, file)