In [2]:
# Importing Libraries
import numpy as np
import pandas as pd
import sklearn

import warnings
warnings.filterwarnings('ignore')

# Data visualization libraries
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt
import plotly
import plotly.express as px

# model definition
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Input, Dense, Dropout, concatenate

# handling outliers
from feature_engine.outliers import Winsorizer

# Split Dataset, Standarize, and Hyperparameter Tuning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, OneHotEncoder

# feature selection
from sklearn.feature_selection import SelectKBest, f_classif

# create pipeline
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline

# Evaluate Classification Models
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay, confusion_matrix

# Save and Load Model
import pickle
import json

In [3]:
# Load the Models
with open('final_pipeline.pkl', 'rb') as file_1:
  final_pipeline = pickle.load(file_1)
  
with open('Drop_Columns.txt','r') as file_2:
  Drop_Columns = json.load(file_2)

model_seq2 = load_model('model_seq2.h5')

In [6]:
# data loading
df_inf = pd.read_csv('hh8dsft_P2M1_Satriya_Fauzan_Adhim_Inference.csv')
df_inf_copy = df_inf.copy()
df_inf_copy = df_inf.drop(['Unnamed: 0'],axis=1)
df_inf_copy

Unnamed: 0,user_id,age,gender,region_category,membership_category,joining_date,joined_through_referral,preferred_offer_types,medium_of_operation,internet_option,...,avg_time_spent,avg_transaction_value,avg_frequency_login_days,points_in_wallet,used_special_discount,offer_application_preference,past_complaint,complaint_status,feedback,churn_risk_score
0,972706cb0db0068e,46,F,Town,Premium Membership,2015-03-27,No,Credit/Debit Card Offers,Smartphone,Mobile_Data,...,1447.387929,11839.58,29.0,727.91,Yes,No,No,Not Applicable,No reason specified,0
1,82d62cc806fb2816,25,M,Town,Premium Membership,2016-09-07,,Without Offers,Smartphone,Wi-Fi,...,114.17,46885.8,8.0,793.11,No,Yes,Yes,Unsolved,No reason specified,0
2,a596b4679c30ef3e,64,M,,Basic Membership,2017-09-07,Yes,Credit/Debit Card Offers,Smartphone,Wi-Fi,...,204.43,42224.69,26.0,510.37,Yes,No,Yes,Solved,Too many ads,1
3,8c4761dbf8df1816,52,F,City,Gold Membership,2017-12-31,Yes,,Smartphone,Wi-Fi,...,179.33,48836.04,20.0,705.0,Yes,No,Yes,Solved in Follow-up,No reason specified,0
4,8f2f492e0c25eb41,60,M,Town,Basic Membership,2015-07-08,Yes,Credit/Debit Card Offers,Smartphone,Wi-Fi,...,38.94,14104.52,21.0,677.41,Yes,No,Yes,No Information Available,Poor Customer Service,0
5,9bba07c5c386c34c,57,F,City,Silver Membership,2016-06-22,No,Gift Vouchers/Coupons,,Mobile_Data,...,0.0,8150.9,20.0,793.32,No,Yes,No,Not Applicable,No reason specified,0
6,8a205a969e23299d,17,M,,Gold Membership,2015-05-19,No,Gift Vouchers/Coupons,Desktop,Wi-Fi,...,199.94,75765.63,0.0,820.17,Yes,No,Yes,Solved,Quality Customer Care,0
7,91607fb18aafbfee,15,M,City,No Membership,2017-11-15,Yes,Gift Vouchers/Coupons,Desktop,Fiber_Optic,...,67.55,32777.77,23.0,533.44,Yes,No,No,Not Applicable,Poor Customer Service,1
8,847bddd58ff99d7e,32,M,Town,Basic Membership,2016-02-02,Yes,Without Offers,Smartphone,Mobile_Data,...,119.98,1888.81,28.0,628.93,No,Yes,No,Not Applicable,Too many ads,1
9,8c92ce118df969a1,29,M,Village,Platinum Membership,2016-10-19,No,Credit/Debit Card Offers,,Fiber_Optic,...,145.07,28873.91,19.0,715.5,No,Yes,No,Not Applicable,Too many ads,0


In [7]:
# Removing unnecessary features
df_inf_final = df_inf_copy.drop(Drop_Columns,axis=1).sort_index()
df_inf_final

Unnamed: 0,membership_category,avg_transaction_value,avg_frequency_login_days,points_in_wallet,feedback,churn_risk_score
0,Premium Membership,11839.58,29.0,727.91,No reason specified,0
1,Premium Membership,46885.8,8.0,793.11,No reason specified,0
2,Basic Membership,42224.69,26.0,510.37,Too many ads,1
3,Gold Membership,48836.04,20.0,705.0,No reason specified,0
4,Basic Membership,14104.52,21.0,677.41,Poor Customer Service,0
5,Silver Membership,8150.9,20.0,793.32,No reason specified,0
6,Gold Membership,75765.63,0.0,820.17,Quality Customer Care,0
7,No Membership,32777.77,23.0,533.44,Poor Customer Service,1
8,Basic Membership,1888.81,28.0,628.93,Too many ads,1
9,Platinum Membership,28873.91,19.0,715.5,Too many ads,0


### Checking Missing Values

In [8]:
# Check Missing Values in Data Inference

df_inf_final.isnull().sum()

membership_category         0
avg_transaction_value       0
avg_frequency_login_days    0
points_in_wallet            0
feedback                    0
churn_risk_score            0
dtype: int64

### Cardinality

In [10]:
# dictionary
feedback_class = {
    'No reason specified': 'No reason specified',
    'Too many ads': 'Negative Feedback',
    'Poor Customer Service': 'Negative Feedback',
    'Poor Website': 'Negative Feedback',
    'Poor Product Quality': 'Negative Feedback',
    'User Friendly Website': 'Positive Feedback',
    'Quality Customer Care': 'Positive Feedback',
    'Products always in Stock': 'Positive Feedback',
    'Reasonable Price': 'Positive Feedback'
}

# Replace values based on dictionary
df_inf_final['feedback'] = df_inf_final['feedback'].replace(feedback_class)

df_inf_final

Unnamed: 0,membership_category,avg_transaction_value,avg_frequency_login_days,points_in_wallet,feedback,churn_risk_score
0,Premium Membership,11839.58,29.0,727.91,No reason specified,0
1,Premium Membership,46885.8,8.0,793.11,No reason specified,0
2,Basic Membership,42224.69,26.0,510.37,Negative Feedback,1
3,Gold Membership,48836.04,20.0,705.0,No reason specified,0
4,Basic Membership,14104.52,21.0,677.41,Negative Feedback,0
5,Silver Membership,8150.9,20.0,793.32,No reason specified,0
6,Gold Membership,75765.63,0.0,820.17,Positive Feedback,0
7,No Membership,32777.77,23.0,533.44,Negative Feedback,1
8,Basic Membership,1888.81,28.0,628.93,Negative Feedback,1
9,Platinum Membership,28873.91,19.0,715.5,Negative Feedback,0


In [11]:
# Transform Inference-Set

data_inf_transform = final_pipeline.transform(df_inf_final)
data_inf_transform

array([[ 1.53171334,  0.13770484,  0.585359  ,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         1.        ,  0.        ],
       [-0.70668567,  0.57488045,  0.67469855,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         1.        ,  0.        ],
       [ 1.21194205,  0.51673656,  0.28727734,  1.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        ,  0.        ],
       [ 0.57239948,  0.59920825,  0.55396684,  0.        ,  1.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ,  0.        ],
       [ 0.67898991,  0.16595828,  0.51616196,  1.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        ,  0.        ],
       [ 0.57239948,  0.09169129,  0.6749863 ,  0.        ,  0.        ,
         0.        ,  

In [12]:
# Predict using Neural Network

y_pred_inf = model_seq2.predict(data_inf_transform)
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
y_pred_inf



array([[0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0]])

In [13]:
# Create Prediction DataFrame

y_pred_inf_df = pd.DataFrame(y_pred_inf, columns=['churn_risk_score_prediction'],index=df_inf_final.index)
y_pred_inf_df

Unnamed: 0,churn_risk_score_prediction
0,0
1,0
2,1
3,1
4,1
5,0
6,0
7,1
8,1
9,0
