In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt

# **1. Data Preprocessing**

In [None]:
df = '/content/BankCustomerData.csv'
dataset = pd.read_csv(df)
dataset.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,term_deposit
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [None]:
df_dummies = pd.get_dummies(dataset, drop_first=True)
df_dummies['subscribed'] = (dataset['balance']>50).astype(int)

# **2. Feature Selection**

In [None]:
X = df_dummies.drop (['balance','duration', 'pdays', 'subscribed'], axis = 1)
y = df_dummies['subscribed']

# **3.  Data Splitting**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2,random_state=42)

# **4. Model Training**

In [None]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(X_train)
x_test_scaled = scaler.fit_transform(X_test)

In [None]:
model = LogisticRegression()
model.fit(x_train_scaled, y_train)

# **5. Model Evaluation**

In [None]:
y_pred = model.predict(x_test_scaled)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test,y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix: ")
print(conf_matrix)
print("Classification Report: ")
print(class_report)

Accuracy: 0.7716932457786116
Confusion Matrix: 
[[ 127 1912]
 [  35 6454]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.78      0.06      0.12      2039
           1       0.77      0.99      0.87      6489

    accuracy                           0.77      8528
   macro avg       0.78      0.53      0.49      8528
weighted avg       0.77      0.77      0.69      8528



# **6. Conclusion**

*   From what I can conclude from this, the model can predict whether a bank customer will subscribe to a service with around 77% accuracy. However, it's not so good at spotting customers who won't subscribe, with only about 6% of those correctly identified. This means the bank should work on targeting non-subscribers better, maybe by using more information or smarter techniques to improve accuracy and make marketing efforts more effective.