# Propensity Modeling for Cross-Sell and Upsell
This notebook demonstrates the process of merging customer transaction data, master customer profiles,
and engagement metrics to predict cross-sell/upsell opportunities using propensity modeling.

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load datasets
customer_transactions = pd.read_csv('D:/Jupyter_projects/RFM_loan/updated_customer_transactions.csv')
master_table_updated = pd.read_csv('D:/Jupyter_projects/RFM_loan/master_table_updated.csv')
engagement_metrics = pd.read_csv('D:/Jupyter_projects/RFM_loan/updated_engagement_metrics.csv')

# Merge datasets
merged_df = pd.merge(customer_transactions, master_table_updated, on='customer_id')
merged_df = pd.merge(merged_df, engagement_metrics, on='customer_id')

# Encode categorical columns
le = LabelEncoder()
categorical_columns = ['product_type', 'transaction_type', 'channel', 'customer_segment', 
                       'income_level', 'location', 'risk_profile']
for col in categorical_columns:
    merged_df[col] = le.fit_transform(merged_df[col])

# Define cross-sell propensity with balanced criteria
# Define cross-sell propensity with balanced criteria
balanced_engagement_threshold = (merged_df['website_visits'] > 50) & \
                                (merged_df['time_spent_on_pages'] > 30) & \
                                (merged_df['email_open_rate'] > 0.8) & \
                                (merged_df['click_through_rate'] > 0.3)

# Define the target variable
merged_df['cross_sell_target'] = ((merged_df.groupby('customer_id')['product_type'].transform('nunique') > 1) |
                                  balanced_engagement_threshold).astype(int)

# Artificially balance the dataset
negative_cases = merged_df[merged_df['cross_sell_target'] == 0]
positive_cases = merged_df[merged_df['cross_sell_target'] == 1].sample(n=len(negative_cases), random_state=42)
balanced_df = pd.concat([positive_cases, negative_cases])

# Define features and target
features = balanced_df[['age', 'income_level', 'location', 'credit_score', 'customer_lifetime_value',
                        'transaction_amount', 'product_type', 'transaction_type', 'channel', 
                        'risk_score', 'customer_segment', 'risk_profile', 'website_visits', 
                        'time_spent_on_pages', 'email_open_rate', 'click_through_rate']]
target = balanced_df['cross_sell_target']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)

# Model training and evaluation
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:", report)


FileNotFoundError: [Errno 2] No such file or directory: 'D:/Jupyter_projects/RFM_loan/updated_customer_transactions.csv'