# 🌍 Real-World Applications: Bank Marketing Dataset

This notebook demonstrates how to apply statistical tools to a real dataset: predicting subscription likelihood from marketing campaign data.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Load dataset
url = 'https://raw.githubusercontent.com/henriqueyamahata/bank-marketing-dataset/master/bank.csv'
df = pd.read_csv(url, sep=';')
print(df.head())

In [None]:
# Preprocess and encode categorical variables
df_encoded = pd.get_dummies(df[['age', 'job', 'marital', 'education', 'balance', 'housing', 'loan']], drop_first=True)
df_encoded['target'] = df['y'].map({'yes': 1, 'no': 0})

# Train-test split
X = df_encoded.drop('target', axis=1)
y = df_encoded['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Logistic regression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Results
print(classification_report(y_test, y_pred))