## Algorithms

In [6]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
import random


### Neural Network

In [11]:
#NEURAL NETWORK ALGORITHM
print("NEURAL NETWORK ALGORITHM\n")

# Read the dataset
data = pd.read_excel("Train.xlsx")

# Fill empty columns
customer_age_default = 'unknown'
marital_default = "widowed"
balance_default = data["balance"].median()
personal_default = "unknown"
last_contact_duration_default = round(data["last_contact_duration"].mean(), 0)
num_contacts_in_campaign_default = data["num_contacts_in_campaign"].median()
days_since_prev_campaign_contact_default = 0
data.fillna({'customer_age': customer_age_default}, inplace=True)
data.fillna({'marital': marital_default}, inplace=True)
data.fillna({'balance': balance_default}, inplace=True)
data.fillna({'personal_loan': personal_default}, inplace=True)
data.fillna({'last_contact_duration': last_contact_duration_default}, inplace=True)
data.fillna({'num_contacts_in_campaign': num_contacts_in_campaign_default}, inplace=True)
data.fillna({'days_since_prev_campaign_contact': days_since_prev_campaign_contact_default}, inplace=True)

# Remove rows of our choice
data = data[(data['job_type'] != 'unknown')]
data = data[(data['personal_loan'] != 'unknown')]
data = data[(data['customer_age'] != 'unknown')]
data = data[(data['prev_campaign_outcome'] != 'other')]

# Balance the dataset
class_counts = data['term_deposit_subscribed'].value_counts()
num_zeros = class_counts[0]
num_ones = class_counts[1]
zero_indices = data[data['term_deposit_subscribed'] == 0].index
one_indices = data[data['term_deposit_subscribed'] == 1].index
count_diff = abs(num_zeros - num_ones)

if num_zeros > num_ones:
    remove_indices = np.random.choice(zero_indices, count_diff, replace=False)
else:
    remove_indices = np.random.choice(one_indices, count_diff, replace=False)

data = data.drop(remove_indices)

# Encode categorical columns
label_encoder = LabelEncoder()
for column in data.columns:
    if data[column].dtype == 'object':
        data[column] = label_encoder.fit_transform(data[column])

# Select features and target
features = data.drop(['id', 'term_deposit_subscribed'], axis=1)
target = data['term_deposit_subscribed']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Create and train the neural network model
model = MLPClassifier(hidden_layer_sizes=(100, 100), random_state=42)
model.fit(X_train, y_train)

predictions = model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)

# Print the evaluation metrics
print("Accuracy:", accuracy )
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

NEURAL NETWORK ALGORITHM

Accuracy: 0.7800963081861958
Precision: 0.7323369565217391
Recall: 0.875
F1 Score: 0.7973372781065088


In [8]:
#SVM ALGORITHM
print("SVM ALGORITHM\n")

# Encode categorical columns
label_encoder = LabelEncoder()
for column in data.columns:
    if data[column].dtype == 'object':
        data[column] = label_encoder.fit_transform(data[column])

# Select features and target
features = data.drop(['id', 'term_deposit_subscribed'], axis=1)
target = data['term_deposit_subscribed']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Create and train the SVM model
model = SVC(random_state=42)
model.fit(X_train, y_train)

predictions = model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)

# Print the evaluation metrics and confusion matrix
print("Accuracy:", accuracy )
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)



SVM ALGORITHM

Accuracy: 0.7552166934189406
Precision: 0.7890070921985816
Recall: 0.705229793977813
F1 Score: 0.7447698744769875


In [9]:
# Decision Tree Model
print("DECISION TREE ALGORITHM\n")

# Encode categorical columns
label_encoder = LabelEncoder()
for column in data.columns:
    if data[column].dtype == 'object':
        data[column] = label_encoder.fit_transform(data[column])

# Select features and target
features = data.drop(['id', 'term_deposit_subscribed'], axis=1)
target = data['term_deposit_subscribed']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Create and train the Decision Tree model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
predictions = model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)

# Print the evaluation metrics and confusion matrix
print("Accuracy:", accuracy )
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)



DECISION TREE ALGORITHM

Accuracy: 0.8210272873194222
Precision: 0.8333333333333334
Recall: 0.8082408874801902
F1 Score: 0.8205953338696702


In [10]:
#Random Algorithm
print("RANDOM ALGORITHM\n")

# Encode categorical columns
label_encoder = LabelEncoder()
for column in data.columns:
    if data[column].dtype == 'object':
        data[column] = label_encoder.fit_transform(data[column])

# Select features and target
features = data.drop(['id', 'term_deposit_subscribed'], axis=1)
target = data['term_deposit_subscribed']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

random_predictions = [random.choice([0, 1]) for _ in range(len(y_test))]

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, random_predictions)
precision = precision_score(y_test, random_predictions)
recall = recall_score(y_test, random_predictions)
f1 = f1_score(y_test, random_predictions)

# Print the evaluation metrics and confusion matrix
print("Accuracy:", accuracy )
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


RANDOM ALGORITHM

Accuracy: 0.49518459069020865
Precision: 0.5015527950310559
Recall: 0.5118858954041204
F1 Score: 0.5066666666666666
