In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_csv("cust_satisfaction.csv")

# Data Cleaning

In [4]:
df.isnull().sum()

Gender                          0
Customer Type                   0
Type of Travel                  0
Class                           0
satisfaction                    0
Age                             0
Flight Distance                 0
Inflight entertainment          0
Baggage handling                0
Cleanliness                     0
Departure Delay in Minutes      0
Arrival Delay in Minutes      310
dtype: int64

In [5]:
# df.dropna(inplace=True). # drop rows with null values
df.isnull().sum()

df.dropna(inplace=True)


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 103594 entries, 0 to 103903
Data columns (total 12 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   Gender                      103594 non-null  object 
 1   Customer Type               103594 non-null  object 
 2   Type of Travel              103594 non-null  object 
 3   Class                       103594 non-null  object 
 4   satisfaction                103594 non-null  object 
 5   Age                         103594 non-null  int64  
 6   Flight Distance             103594 non-null  int64  
 7   Inflight entertainment      103594 non-null  int64  
 8   Baggage handling            103594 non-null  int64  
 9   Cleanliness                 103594 non-null  int64  
 10  Departure Delay in Minutes  103594 non-null  int64  
 11  Arrival Delay in Minutes    103594 non-null  float64
dtypes: float64(1), int64(6), object(5)
memory usage: 10.3+ MB


In [7]:
df.head(5)

Unnamed: 0,Gender,Customer Type,Type of Travel,Class,satisfaction,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,Male,Loyal Customer,Personal Travel,Eco Plus,neutral or dissatisfied,13,460,5,4,5,25,18.0
1,Male,disloyal Customer,Business travel,Business,neutral or dissatisfied,25,235,1,3,1,1,6.0
2,Female,Loyal Customer,Business travel,Business,satisfied,26,1142,5,4,5,0,0.0
3,Female,Loyal Customer,Business travel,Business,neutral or dissatisfied,25,562,2,3,2,11,9.0
4,Male,Loyal Customer,Business travel,Business,satisfied,61,214,3,4,3,0,0.0


# Checking Duplicates ND Removing it

In [8]:
df.duplicated().sum()

172

In [9]:
df.drop_duplicates(inplace=True)

# Converting text into int ---> for model understanding

In [10]:
df["Gender"] = df["Gender"].map({"Male":0, "Female":1})
df["Customer Type"] = df["Customer Type"].map({"Loyal Customer":0, "disloyal Customer":1})
df["Type of Travel"] = df["Type of Travel"].map({"Personal Travel":0, "Business travel":1})
df["Class"] = df["Class"].map({"Eco":0, "Business":1, "Eco Plus":2})
df["satisfaction"] = df["satisfaction"].map({"neutral or dissatisfied":0, "satisfied":1})

In [11]:
df["Customer Type"].value_counts()


Customer Type
0    84517
1    18905
Name: count, dtype: int64

In [12]:
df["Class"].value_counts()
# df.head(2)

Class
1    49423
0    46532
2     7467
Name: count, dtype: int64

In [13]:
df.head(15)

Unnamed: 0,Gender,Customer Type,Type of Travel,Class,satisfaction,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,0,0,0,2,0,13,460,5,4,5,25,18.0
1,0,1,1,1,0,25,235,1,3,1,1,6.0
2,1,0,1,1,1,26,1142,5,4,5,0,0.0
3,1,0,1,1,0,25,562,2,3,2,11,9.0
4,0,0,1,1,1,61,214,3,4,3,0,0.0
5,1,0,0,0,0,26,1180,1,4,1,0,0.0
6,0,0,0,0,0,47,1276,2,4,2,9,23.0
7,1,0,1,1,1,52,2035,5,5,4,4,0.0
8,1,0,1,1,0,41,853,1,1,2,0,0.0
9,0,1,1,0,0,20,1061,2,4,2,0,0.0


# Handling the inbalance data

In [14]:
Loyal_customer = df[df["Customer Type"] == 0].loc[0:20000]
Disloyal_customer = df[df["Customer Type"] == 1]

In [15]:
balance_df = pd.concat([Loyal_customer, Disloyal_customer], axis=0)
balance_df

Unnamed: 0,Gender,Customer Type,Type of Travel,Class,satisfaction,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,0,0,0,2,0,13,460,5,4,5,25,18.0
2,1,0,1,1,1,26,1142,5,4,5,0,0.0
3,1,0,1,1,0,25,562,2,3,2,11,9.0
4,0,0,1,1,1,61,214,3,4,3,0,0.0
5,1,0,0,0,0,26,1180,1,4,1,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
103892,1,1,1,1,0,37,596,3,3,3,110,121.0
103895,1,1,1,0,0,24,1055,1,5,1,13,10.0
103899,1,1,1,0,0,23,192,2,4,2,3,0.0
103901,0,1,1,1,0,30,1995,4,4,4,7,14.0


# Data dividing X and Y 

In [16]:
x = balance_df.drop("Customer Type", axis=1)
y = balance_df["Customer Type"]

# data spliting and testing

In [17]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state= 42)

# normalise -- data range

In [18]:
from sklearn.preprocessing import StandardScaler
sd = StandardScaler()
x_train_scaled = sd.fit_transform(x_train)
x_test_scaled = sd.transform(x_test)

# ANN Architecture Designing

In [21]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# ann model definfing
model = Sequential()

#input layer
model.add(Dense(68,activation="relu",input_dim = x_train.shape[1]))

#hidden layer
model.add(Dense(32,activation="relu"))
model.add(Dense(24,activation="relu"))
model.add(Dense(12,activation="relu"))

#output Layer
model.add(Dense(1,activation="sigmoid"))

#model compile
model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])


model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [25]:
history = model.fit(x_train_scaled, y_train,
                    epochs=10,
                    batch_size=32,
                    validation_data=(x_test_scaled, y_test))

Epoch 1/10
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.8704 - loss: 0.3348 - val_accuracy: 0.9091 - val_loss: 0.2329
Epoch 2/10
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.9066 - loss: 0.2399 - val_accuracy: 0.9129 - val_loss: 0.2281
Epoch 3/10
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.9130 - loss: 0.2214 - val_accuracy: 0.9150 - val_loss: 0.2200
Epoch 4/10
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9156 - loss: 0.2230 - val_accuracy: 0.9157 - val_loss: 0.2179
Epoch 5/10
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.9225 - loss: 0.2076 - val_accuracy: 0.9191 - val_loss: 0.2142
Epoch 6/10
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.9221 - loss: 0.2075 - val_accuracy: 0.9208 - val_loss: 0.2103
Epoch 7/10
[1m881/881[0m 