<a href="https://colab.research.google.com/github/sherylmatthew/fairness-aware-RLHF/blob/main/train_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install stable_baselines3

Collecting stable_baselines3
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (

In [5]:
import pandas as pd
import numpy as np
import gym
from gym import spaces
from stable_baselines3 import PPO
from sklearn.preprocessing import StandardScaler
from google.colab import files
import io

# Step 1: Upload the preprocessed data file manually in Google Colab
def load_preprocessed_data():
    print("Please upload the 'preprocessed_shopping_data.csv' file.")
    uploaded = files.upload()

    # Find any file that contains 'preprocessed_shopping_data' in its name
    target_file = None
    for filename in uploaded.keys():
        if 'preprocessed_shopping_data' in filename.lower():
            target_file = filename
            break

    if target_file is None:
        print("Error: No file containing 'preprocessed_shopping_data' was found in uploaded files.")
        return None

    try:
        df = pd.read_csv(io.BytesIO(uploaded[target_file]))
        print(f"Preprocessed data loaded from '{target_file}' with shape: {df.shape}")
        return df
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return None

# Step 2: Define a custom Gym environment for shopping recommendations
class ShoppingEnv(gym.Env):
    def __init__(self, data):
        super(ShoppingEnv, self).__init__()
        self.data = data.reset_index(drop=True)
        self.current_step = 0
        self.max_steps = len(data)

        # Action space: Recommend a category (simplified to 5 categories for demo)
        self.action_space = spaces.Discrete(5)  # e.g., Clothing, Footwear, Outerwear, Accessories, Jewelry

        # Observation space: Customer features (excluding Customer ID and target)
        self.feature_cols = [col for col in data.columns if col != 'Customer ID' and 'Purchase Amount (USD)' not in col]
        self.observation_space = spaces.Box(low=-5, high=5, shape=(len(self.feature_cols),), dtype=np.float32)

        # Extract category mapping for actions
        self.category_mapping = {i: cat for i, cat in enumerate(['Clothing', 'Footwear', 'Outerwear', 'Accessories', 'Jewelry'])}

        # For fairness: Track gender for reward adjustment
        self.gender_col = 'Gender_Male'
        self.action_counts = {0: {'Male': 0, 'Female': 0}, 1: {'Male': 0, 'Female': 0},
                             2: {'Male': 0, 'Female': 0}, 3: {'Male': 0, 'Female': 0},
                             4: {'Male': 0, 'Female': 0}}

    def reset(self):
        self.current_step = 0  # Reset step counter
        return self._get_observation()

    def _get_observation(self):
        # Safeguard: If current_step exceeds dataset size, reset to 0
        if self.current_step >= self.max_steps:
            self.current_step = 0
        return self.data.iloc[self.current_step][self.feature_cols].values.astype(np.float32)

    def _calculate_fairness_penalty(self, gender, action):
        gender_label = 'Male' if gender == 1 else 'Female'
        self.action_counts[action][gender_label] += 1
        male_count = self.action_counts[action]['Male']
        female_count = self.action_counts[action]['Female']
        total = male_count + female_count
        if total == 0:
            return 0
        imbalance = abs(male_count / total - female_count / total)
        penalty = -10 * imbalance
        return penalty

    def step(self, action):
        # Get current customer data
        customer = self.data.iloc[self.current_step]
        actual_category = customer.filter(like='Category_').idxmax().replace('Category_', '')
        purchase_amount = customer['Purchase Amount (USD)']
        gender = customer[self.gender_col]

        # Reward: Base reward is purchase amount if the recommended category matches
        if self.category_mapping[action] == actual_category:
            base_reward = purchase_amount
        else:
            base_reward = 0

        fairness_penalty = self._calculate_fairness_penalty(gender, action)
        reward = base_reward + fairness_penalty

        self.current_step += 1
        # Reset if we've reached the end of the dataset
        done = False
        if self.current_step >= self.max_steps:
            done = True
            self.current_step = 0  # Reset for the next episode

        return self._get_observation(), reward, done, {}

# Step 3: Train the RL model
def train_model(df):
    env = ShoppingEnv(df)
    model = PPO("MlpPolicy", env, verbose=1)
    # Adjust timesteps to be a multiple of dataset size for cleaner episode handling
    total_timesteps = 10000
    model.learn(total_timesteps=total_timesteps)
    model.save("ppo_shopping_model")
    print("Model training completed and saved as 'ppo_shopping_model'.")
    return model

# Main function
def main():
    df = load_preprocessed_data()
    if df is None:
        return
    train_model(df)

if __name__ == "__main__":
    main()

Please upload the 'preprocessed_shopping_data.csv' file.


Saving preprocessed_shopping_data.csv to preprocessed_shopping_data (1).csv
Preprocessed data loaded from 'preprocessed_shopping_data (1).csv' with shape: (3900, 107)
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




-----------------------------
| time/              |      |
|    fps             | 418  |
|    iterations      | 1    |
|    time_elapsed    | 4    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.9e+03      |
|    ep_rew_mean          | -3.45e+04    |
| time/                   |              |
|    fps                  | 404          |
|    iterations           | 2            |
|    time_elapsed         | 10           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0026427428 |
|    clip_fraction        | 0.000195     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.61        |
|    explained_variance   | -0.00692     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.19e+04     |
|    n_updates            | 10           |
|    policy_grad

In [4]:
!pip install shimmy>=2.0