# 🧠 Predictive Modeling for Marketing Campaign Response

## 1. 📦 Import Libraries

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report


## 2. 📂 Load and Clean the Data

In [None]:
# Load dataset
df = pd.read_csv('../data/raw/marketing_campaign.csv', sep='\t')

# Clean column names
df.columns = df.columns.str.strip()

# Preview
df.head()


## 3. 🎯 Define Features and Target Variable

In [None]:
# Separate features and target
features = df.drop(columns=['Response'])
target = df['Response']

# Identify column types
numeric_features = features.select_dtypes(include=['int64', 'float64']).columns
categorical_features = features.select_dtypes(include=['object']).columns


## 4. 🛠️ Preprocessing Pipelines

In [None]:
# Numeric transformer
numeric_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

# Categorical transformer
categorical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine into ColumnTransformer
preprocessor = ColumnTransformer([
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)
])


## 5. ✂️ Split the Dataset

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)


## 6. 🤖 Train Models

In [None]:
logreg_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))
])
logreg_pipeline.fit(X_train, y_train)


In [None]:
rf_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])
rf_pipeline.fit(X_train, y_train)


## 7. 📊 Evaluate Model Performance

In [None]:
logreg_predictions = logreg_pipeline.predict(X_test)
print("Logistic Regression Performance:\n")
print(classification_report(y_test, logreg_predictions))


In [None]:
rf_predictions = rf_pipeline.predict(X_test)
print("Random Forest Performance:\n")
print(classification_report(y_test, rf_predictions))


## 8. 🔍 Compare Accuracy Scores

In [None]:
logreg_score = logreg_pipeline.score(X_test, y_test)
rf_score = rf_pipeline.score(X_test, y_test)

print(f'Logistic Regression Accuracy: {logreg_score:.2f}')
print(f'Random Forest Accuracy: {rf_score:.2f}')
