# **Assessment Task 5: Bank Customer**

**Objective:**
Predict whether a customer will subscribe to a term deposit based on their demographic and account information using logistic regression.

In [4]:
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

**1. Data Preprocessing**

In [6]:
data = pd.read_csv('BankCustomerData.csv')
data_dummies = pd.get_dummies(data, drop_first=True)
print(data.head())
print(data.isnull().sum())

   age           job  marital  education default  balance housing loan  \
0   58    management  married   tertiary      no     2143     yes   no   
1   44    technician   single  secondary      no       29     yes   no   
2   33  entrepreneur  married  secondary      no        2     yes  yes   
3   47   blue-collar  married    unknown      no     1506     yes   no   
4   33       unknown   single    unknown      no        1      no   no   

   contact  day month  duration  campaign  pdays  previous poutcome  \
0  unknown    5   may       261         1     -1         0  unknown   
1  unknown    5   may       151         1     -1         0  unknown   
2  unknown    5   may        76         1     -1         0  unknown   
3  unknown    5   may        92         1     -1         0  unknown   
4  unknown    5   may       198         1     -1         0  unknown   

  term_deposit  
0           no  
1           no  
2           no  
3           no  
4           no  
age             0
job     

**2. Feature Selection**

In [7]:
X = data_dummies.drop(['term_deposit_yes'], axis = 1)
Y = data_dummies['term_deposit_yes']

**3. Data Splitting**

In [30]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, train_size= .02)

**4. Model Training**

In [31]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy = 'mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

model = LogisticRegression()
model.fit(X_train_scaled, Y_train)

**5. Model Evaluation**

In [32]:
from sklearn.metrics import f1_score, recall_score, accuracy_score, confusion_matrix

y_pred = model.predict(X_test_scaled)

f1_score = f1_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)
accuracy = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)

**6. Conclusion**

In [33]:
print("F1 Score:", f1_score)
print("Recall:", recall)
print("Accuracy:", accuracy)
print("Confusion Matrix:", conf_matrix)
print("Precision Score:", precision)

F1 Score: 0.07136150234741784
Recall: 0.05867215645908389
Accuracy: 0.8579941130016512
Confusion Matrix: [[35625  2276]
 [ 3658   228]]
Precision Score: 0.09105431309904154
