# Lab 3: Contextual Bandit-Based News Article Recommendation

**`Course`:** Reinforcement Learning Fundamentals  
**`Student Name`:** Sohan  
**`Roll Number`:** U20230162  
**`GitHub Branch`:** sohan_U20230162  

# Imports and Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from rlcmab_sampler import sampler
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

# Configure plotting
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Load Datasets

In [2]:
# Load datasets
news_df = pd.read_csv("data/news_articles.csv")
train_users = pd.read_csv("data/train_users.csv")
test_users = pd.read_csv("data/test_users.csv")

print("=== DATASET OVERVIEW ===")
print(f"\nNews Articles: {news_df.shape[0]} articles, {news_df.shape[1]} features")
print(f"Train Users: {train_users.shape[0]} users, {train_users.shape[1]} features")
print(f"Test Users: {test_users.shape[0]} users, {test_users.shape[1]} features")

print("\n=== News Categories ===")
print(news_df['category'].value_counts())

print("\n=== User Labels ===")
print(train_users['label'].value_counts())

=== DATASET OVERVIEW ===

News Articles: 209527 articles, 6 features
Train Users: 2000 users, 33 features
Test Users: 2000 users, 32 features

=== News Categories ===
category
POLITICS          35602
WELLNESS          17945
ENTERTAINMENT     17362
TRAVEL             9900
STYLE & BEAUTY     9814
PARENTING          8791
HEALTHY LIVING     6694
QUEER VOICES       6347
FOOD & DRINK       6340
BUSINESS           5992
COMEDY             5400
SPORTS             5077
BLACK VOICES       4583
HOME & LIVING      4320
PARENTS            3955
THE WORLDPOST      3664
WEDDINGS           3653
WOMEN              3572
CRIME              3562
IMPACT             3484
DIVORCE            3426
WORLD NEWS         3299
MEDIA              2944
WEIRD NEWS         2777
GREEN              2622
WORLDPOST          2579
RELIGION           2577
STYLE              2254
SCIENCE            2206
TECH               2104
TASTE              2096
MONEY              1756
ARTS               1509
ENVIRONMENT        1444
FIFTY   

## Data Preprocessing

In this section:
- Handle missing values
- Encode categorical features
- Prepare data for user classification

In [4]:
# Data Preprocessing
print("=== DATA PREPROCESSING ===")

# Handle missing values in train_users
print(f"\nMissing values in train_users:\n{train_users.isnull().sum()}")
train_users_clean = train_users.dropna(subset=['label'])  # Remove rows with missing labels
train_users_clean = train_users_clean.fillna(train_users_clean.mean(numeric_only=True))

print(f"\nMissing values in test_users:\n{test_users.isnull().sum()}")
test_users_clean = test_users.fillna(test_users.mean(numeric_only=True))

# Separate features and labels
X_train = train_users_clean.drop('label', axis=1)
y_train = train_users_clean['label']

# Encode categorical features
le_dict = {}
categorical_cols = X_train.select_dtypes(include=['object']).columns.tolist()

for col in categorical_cols:
    le = LabelEncoder()
    X_train[col] = le.fit_transform(X_train[col].astype(str))
    le_dict[col] = le

# Apply same encoding to test_users - handle unknown values
X_test = test_users_clean.copy()
for col in categorical_cols:
    if col in le_dict:
        # Handle unknown values by assigning them a default value (0)
        X_test[col] = X_test[col].astype(str)
        known_values = set(le_dict[col].classes_)
        X_test[col] = X_test[col].apply(
            lambda x: le_dict[col].transform([x])[0] if x in known_values else 0
        )

# Encode target labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
label_map = {i: label for i, label in enumerate(label_encoder.classes_)}

print(f"\nLabel Mapping: {label_map}")
print(f"\nPreprocessed data shapes:")
print(f"X_train: {X_train.shape}")
print(f"X_test: {X_test.shape}")

=== DATA PREPROCESSING ===

Missing values in train_users:
user_id                          0
age                            698
income                           0
clicks                           0
purchase_amount                  0
session_duration                 0
content_variety                  0
engagement_score                 0
num_transactions                 0
avg_monthly_spend                0
avg_cart_value                   0
browsing_depth                   0
revisit_rate                     0
scroll_activity                  0
time_on_site                     0
interaction_count                0
preferred_price_range            0
discount_usage_rate              0
wishlist_size                    0
product_views                    0
repeat_purchase_gap (days)       0
churn_risk_score                 0
loyalty_index                    0
screen_brightness                0
battery_percentage               0
cart_abandonment_count           0
browser_version                

## User Classification

Train a classifier to predict the user category (`User1`, `User2`, `User3`),
which serves as the **context** for the contextual bandit.


In [5]:
# Split data for user classification
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train, y_train_encoded, test_size=0.2, random_state=42, stratify=y_train_encoded
)

print("=== USER CLASSIFICATION MODEL ===")
print(f"Training set size: {X_train_split.shape[0]} ({80}%)")
print(f"Validation set size: {X_val_split.shape[0]} ({20}%)")

# Train Random Forest Classifier
user_classifier = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=10)
user_classifier.fit(X_train_split, y_train_split)

# Evaluate on validation set
y_pred_val = user_classifier.predict(X_val_split)
val_accuracy = accuracy_score(y_val_split, y_pred_val)

print(f"\nValidation Accuracy: {val_accuracy:.4f}")
print("\n=== Classification Report (Validation Set) ===")
print(classification_report(y_val_split, y_pred_val, target_names=label_encoder.classes_))

# Predict on test set
test_user_predictions = user_classifier.predict(X_test)
test_user_contexts = np.array([label_encoder.classes_[pred] for pred in test_user_predictions])

print(f"\nTest Set Predictions:")
print(f"User1: {np.sum(test_user_predictions == 0)}")
print(f"User2: {np.sum(test_user_predictions == 1)}")
print(f"User3: {np.sum(test_user_predictions == 2)}")

=== USER CLASSIFICATION MODEL ===
Training set size: 1600 (80%)
Validation set size: 400 (20%)

Validation Accuracy: 0.9000

=== Classification Report (Validation Set) ===
              precision    recall  f1-score   support

      user_1       0.89      0.86      0.87       142
      user_2       0.97      0.89      0.93       142
      user_3       0.84      0.97      0.90       116

    accuracy                           0.90       400
   macro avg       0.90      0.90      0.90       400
weighted avg       0.90      0.90      0.90       400


Test Set Predictions:
User1: 720
User2: 760
User3: 520
