# Email Marketing Campaign Optimization


In [1]:
# Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score


In [3]:
# Load datasets
email_df = pd.read_csv("email_table.csv")
opened_df = pd.read_csv("email_opened_table.csv")
clicked_df = pd.read_csv("link_clicked_table.csv")


In [5]:
# Add target variables
email_df['opened'] = email_df['email_id'].isin(opened_df['email_id']).astype(int)
email_df['clicked'] = email_df['email_id'].isin(clicked_df['email_id']).astype(int)

In [7]:
# 1. Basic Stats
open_rate = email_df['opened'].mean() * 100
click_rate = email_df['clicked'].mean() * 100
print(f"Open Rate: {open_rate:.2f}%")
print(f"Click Through Rate: {click_rate:.2f}%")

Open Rate: 10.35%
Click Through Rate: 2.12%


In [9]:
# 2. Machine Learning Model
features = ['email_text', 'email_version', 'hour', 'weekday', 'user_country', 'user_past_purchases']
X = email_df[features].copy()
y = email_df['clicked']

In [11]:
# Encode categorical variables
for col in ['email_text', 'email_version', 'weekday', 'user_country']:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])

In [13]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# Train model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [19]:
# Evaluate model
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1]
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print(f"ROC-AUC Score: {roc_auc_score(y_test, y_proba):.4f}")


Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99     19547
           1       0.06      0.01      0.02       453

    accuracy                           0.97     20000
   macro avg       0.52      0.50      0.50     20000
weighted avg       0.96      0.97      0.97     20000


Confusion Matrix:
[[19486    61]
 [  449     4]]
ROC-AUC Score: 0.5913


In [21]:
# 3. Improvement Estimation
threshold = 0.5
email_df['predicted_proba'] = model.predict_proba(X)[:, 1]
email_df['predicted_clicked'] = (email_df['predicted_proba'] >= threshold).astype(int)

predicted_ctr = email_df[email_df['predicted_clicked'] == 1]['clicked'].mean() * 100
original_ctr = email_df['clicked'].mean() * 100
print(f"\nEstimated Click Rate (If emails sent only to likely clickers): {predicted_ctr:.2f}%")
print(f"Original Click Rate: {original_ctr:.2f}%")



Estimated Click Rate (If emails sent only to likely clickers): 75.52%
Original Click Rate: 2.12%


In [23]:
# 4. Segment Analysis
print("\nSegment Analysis: Average Click Rates")
print(email_df.groupby('email_text')['clicked'].mean())
print(email_df.groupby('email_version')['clicked'].mean())
print(email_df.groupby('weekday')['clicked'].mean())
print(email_df.groupby('user_country')['clicked'].mean().sort_values(ascending=False).head(10))



Segment Analysis: Average Click Rates
email_text
long_email     0.018538
short_email    0.023872
Name: clicked, dtype: float64
email_version
generic         0.015137
personalized    0.027294
Name: clicked, dtype: float64
weekday
Friday       0.014037
Monday       0.022906
Saturday     0.017846
Sunday       0.016751
Thursday     0.024445
Tuesday      0.024889
Wednesday    0.027620
Name: clicked, dtype: float64
user_country
UK    0.024675
US    0.024360
ES    0.008327
FR    0.008004
Name: clicked, dtype: float64
