# Model Implementation

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('cwk_data_20542740.csv')
df.head(3)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,duration,campaign,pdays,previous,poutcome,y
0,29,blue-collar,single,primary,no,722,yes,no,cellular,15,78,1,359,1,failure,no
1,39,blue-collar,married,primary,no,2366,yes,no,cellular,16,1007,3,325,4,failure,no
2,30,blue-collar,single,secondary,yes,4,yes,no,unknown,23,84,3,-1,0,unknown,no


In [None]:
df_test = pd.read_csv('Your_test_file.csv')

In [None]:
X_test = df_test[:-1]
y_test = df_test[-1]

In [3]:
df['y'] = [0 if x == 'no' else 1 for x in df['y']]

In [4]:
#input features
df_input = df.iloc[:,:-1]
df_input.shape 

(4000, 15)

In [5]:
#output features
df_output = df.iloc[:,-1]
df_output.shape 

(4000,)

In [6]:
df_input = pd.get_dummies(df_input, dtype=int)
df_input.head(3)

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous,job_admin.,job_blue-collar,job_entrepreneur,...,housing_yes,loan_no,loan_yes,contact_cellular,contact_telephone,contact_unknown,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown
0,29,722,15,78,1,359,1,0,1,0,...,1,1,0,1,0,0,1,0,0,0
1,39,2366,16,1007,3,325,4,0,1,0,...,1,1,0,1,0,0,1,0,0,0
2,30,4,23,84,3,-1,0,0,1,0,...,1,1,0,0,0,1,0,0,0,1


In [7]:
X = df_input
y = df_output

In [14]:
rf_clf = RandomForestClassifier()
mean_rf_cv_score = np.mean(cross_val_score(rf_clf, X, y, cv=5))

print(f"Mean Cross Validation Score for Random Forest Classifier: {mean_rf_cv_score :.2%}")

## parameters of RF
rf_param_grid = {
    'n_estimators': [10, 30, 100],
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 2, 6, 10],
    'min_samples_split': [5, 10],
    'min_samples_leaf': [3, 6]

}

Mean Cross Validation Score for Random Forest Classifier: 86.25%


In [16]:
## initiate gridsearch and print the best parameter combination
rf_grid_search = GridSearchCV(rf_clf,
                              rf_param_grid,
                              cv = 5)
rf_grid_search.fit(X, y)


print(f"Accuracy: {rf_grid_search.best_score_ :.4%}")
print("")
print(f"Optimal Parameters: {rf_grid_search.best_params_}")

Accuracy: 86.4250%

Optimal Parameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 3, 'min_samples_split': 10, 'n_estimators': 100}


In [11]:
rf_clf1 = RandomForestClassifier(criterion='gini', max_depth=None, min_samples_leaf=4, min_samples_split= 6,n_estimators= 100)
rf_clf1

RandomForestClassifier(min_samples_leaf=4, min_samples_split=6)

In [12]:
rf_clf1.fit(X, y)

RandomForestClassifier(min_samples_leaf=4, min_samples_split=6)

In [13]:
y_pred = rf_clf1.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
recall =  recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1 Score:", f1)
print("Recall Score:", recall)

In [None]:
recall_yes = recall_score(y_test,y_pred , pos_label=1)
recall_no = recall_score(y_test,y_pred , pos_label=0)

print(f"Recall for class 'yes': {recall_yes}")
print(f"Recall for class 'no': {recall_no}")

In [None]:
classification_rep = classification_report(y_test, y_pred)

In [None]:
import matplotlib.pyplot as plt
cm=confusion_matrix(y_test, y_pred)
print(cm)

In [None]:
# Plot the confusion matrix as a heatmap
sns.heatmap(cm, annot=True, fmt='d', cmap='Oranges', xticklabels=['Predicted No', 'Predicted Yes'], yticklabels=['Actual No', 'Actual Yes'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()