# Import Libs

In [1]:
import numpy as np
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from joblib import dump
from joblib import load
from keras.models import load_model

# Load Data

In [2]:
df = pd.read_csv('customers_data.csv', sep=";")
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4521 entries, 0 to 4520
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        4521 non-null   int64 
 1   job        4521 non-null   object
 2   marital    4521 non-null   object
 3   education  4521 non-null   object
 4   default    4521 non-null   object
 5   balance    4521 non-null   int64 
 6   housing    4521 non-null   object
 7   loan       4521 non-null   object
 8   contact    4521 non-null   object
 9   day        4521 non-null   int64 
 10  month      4521 non-null   object
 11  duration   4521 non-null   int64 
 12  campaign   4521 non-null   int64 
 13  pdays      4521 non-null   int64 
 14  previous   4521 non-null   int64 
 15  poutcome   4521 non-null   object
 16  y          4521 non-null   object
dtypes: int64(7), object(10)
memory usage: 600.6+ KB


# Number values of each class

In [4]:
value_counts = df['y'].value_counts()
value_counts

y
no     4000
yes     521
Name: count, dtype: int64

# Equalize Data

In [5]:
input_columns = df.drop('y', axis=1)

class_column = df['y']

oversampler = RandomOverSampler(random_state=0)

input_columns_resampled, class_column_resampled = oversampler.fit_resample(input_columns, class_column)

df = pd.concat([input_columns_resampled, class_column_resampled], axis=1)

class_distribution = df['y'].value_counts()
print(class_distribution)

y
no     4000
yes    4000
Name: count, dtype: int64


In [6]:
# Features
X = df.iloc[:,0:16].values
# Target
y = df.iloc[:,-1].values

# Encoding Target Values

In [7]:
labelencoder_y = LabelEncoder()

y = labelencoder_y.fit_transform(y)

In [8]:
print(y)

[0 0 0 ... 1 1 1]


# Get Job Column

In [9]:
X_job=X[:,[1]]

# Get unique jobs

In [10]:
print(np.unique(X_job))

['admin.' 'blue-collar' 'entrepreneur' 'housemaid' 'management' 'retired'
 'self-employed' 'services' 'student' 'technician' 'unemployed' 'unknown']


In [11]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], sparse_threshold=0)

X_job = ct.fit_transform(X_job)
print(X_job.shape)

(8000, 12)


In [12]:
X_cat = X[:,[1,2,3,4,6,7,8,10,15]]

In [13]:
orginalNumOfCols= X_cat.shape[1]

# Encoding other columns

In [14]:
X_cat

array([['unemployed', 'married', 'primary', ..., 'cellular', 'oct',
        'unknown'],
       ['services', 'married', 'secondary', ..., 'cellular', 'may',
        'failure'],
       ['management', 'single', 'tertiary', ..., 'cellular', 'apr',
        'failure'],
       ...,
       ['management', 'married', 'secondary', ..., 'cellular', 'aug',
        'unknown'],
       ['admin.', 'single', 'secondary', ..., 'telephone', 'may',
        'success'],
       ['management', 'single', 'tertiary', ..., 'cellular', 'jan',
        'unknown']], dtype=object)

In [15]:
for i in range(X_cat.shape[1]):

    currNumOfCols = X_cat.shape[1]
    
    indexOfColumnToEncode = currNumOfCols - orginalNumOfCols + i
    
    ct = ColumnTransformer(transformers= [('encoder', OneHotEncoder(), [indexOfColumnToEncode])],
    remainder='passthrough',
    sparse_threshold=0)
    X_cat = ct.fit_transform(X_cat)

In [16]:
X_num = X[:,[0,5,9,11,12,13,14]]

In [17]:
X = np.concatenate((X_num,X_cat), axis=1)

In [18]:
X.shape

(8000, 51)

# Split Data

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)

# Scalling Data

In [18]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Build Model

In [19]:
classifier = Sequential()

classifier.add(Dense(units = 128, activation = 'relu'))

classifier.add(Dense(units = 64, activation = 'relu'))

classifier.add(Dense(units = 32, activation = 'relu'))

classifier.add(Dense(units = 1, activation = 'sigmoid'))

classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics =
['accuracy'])

classifierHistory = classifier.fit(X_train, y_train, epochs = 40, validation_data=(X_test,y_test))

Epoch 1/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.6974 - loss: 0.5537 - val_accuracy: 0.8610 - val_loss: 0.3384
Epoch 2/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8813 - loss: 0.2984 - val_accuracy: 0.9035 - val_loss: 0.2738
Epoch 3/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9134 - loss: 0.2345 - val_accuracy: 0.9115 - val_loss: 0.2403
Epoch 4/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9278 - loss: 0.1886 - val_accuracy: 0.9305 - val_loss: 0.2006
Epoch 5/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9526 - loss: 0.1381 - val_accuracy: 0.9275 - val_loss: 0.2059
Epoch 6/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9516 - loss: 0.1276 - val_accuracy: 0.9380 - val_loss: 0.1873
Epoch 7/40
[1m188/188[0m 

# Evaluation

In [20]:
evaluation = classifier.evaluate(X_test, y_test)
print("Loss:", evaluation[0])
print("Accuracy:", evaluation[1])

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9596 - loss: 0.2915
Loss: 0.2705594003200531
Accuracy: 0.9614999890327454


# Solve Overfitting

# Rebuild The Mode

In [21]:
classifier = Sequential()

classifier.add(Dense(units = 128, activation = 'relu'))

classifier.add(Dense(units = 64, activation = 'relu'))

classifier.add(Dropout(0.5))

classifier.add(Dense(units = 32, activation = 'relu'))

classifier.add(Dense(units = 1, activation = 'sigmoid'))

classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

classifierHistory = classifier.fit(X_train, y_train, epochs = 100, validation_data=(X_test,y_test))

Epoch 1/100
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.6507 - loss: 0.6097 - val_accuracy: 0.8510 - val_loss: 0.3733
Epoch 2/100
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8544 - loss: 0.3618 - val_accuracy: 0.8755 - val_loss: 0.3176
Epoch 3/100
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8838 - loss: 0.3115 - val_accuracy: 0.8990 - val_loss: 0.2824
Epoch 4/100
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8974 - loss: 0.2697 - val_accuracy: 0.9125 - val_loss: 0.2637
Epoch 5/100
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9104 - loss: 0.2319 - val_accuracy: 0.9200 - val_loss: 0.2336
Epoch 6/100
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9167 - loss: 0.2180 - val_accuracy: 0.9295 - val_loss: 0.2172
Epoch 7/100
[1m188/18

# Evaluation

In [22]:
evaluation = classifier.evaluate(X_test, y_test)
print("Loss:", evaluation[0])
print("Accuracy:", evaluation[1])

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9571 - loss: 0.4619
Loss: 0.37237709760665894
Accuracy: 0.9589999914169312


# Predict

In [23]:
y_pred = classifier.predict(X_test)

y_pred_binary = (y_pred > 0.5)
print(y_pred_binary)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[[False]
 [False]
 [ True]
 ...
 [ True]
 [ True]
 [False]]


# Accuracy Factores

In [24]:
accuracy = accuracy_score(y_test, y_pred_binary)
print("Accuracy:", round(100*accuracy,2))

precision = precision_score(y_test, y_pred_binary)
print("Precision:", round(100*precision,2))

recall = recall_score(y_test, y_pred_binary)
print("Recall:", round(100*recall,2))

f1 = f1_score(y_test, y_pred_binary)
print("F1 Score:", round(100*f1,2))

Accuracy: 95.9
Precision: 92.5
Recall: 100.0
F1 Score: 96.11


# Save The Model

In [25]:
classifier.save("Imported_customers_model.h5")



# Save Standard Scaler

In [26]:
dump(sc, "imported_customers_standard_scaler.pkl")

['imported_customers_standard_scaler.pkl']