In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import matplotlib.pyplot as plt


# Load the data
file_path = 'DRL_PreDataset_Subset.csv'  # Replace with your file path
df = pd.read_csv(file_path)


# Normalize numeric features
numeric_features = [
    "TotalDMsSent", "TotalEMsSent", "TotalSMSsSent", "TotalCALLsSent",
    "TimeSinceLastEmail", "TimeSinceLastDM", "TimeSinceLastCall", "TimeSinceLastSMS",
    "Nr_of_Sales", "Nr_of_Opens", "Nr_of_Clicks", "Nr_of_Unsubs"
]

scaler = MinMaxScaler()
df[numeric_features] = scaler.fit_transform(df[numeric_features])

# One-hot encode categorical features
categorical_features = ["Age_segment", "Segment"]
encoder = OneHotEncoder(sparse_output=False)  # Updated parameter
encoded_features = encoder.fit_transform(df[categorical_features])
encoded_columns = encoder.get_feature_names_out(categorical_features)

# Add encoded features to DataFrame
encoded_df = pd.DataFrame(encoded_features, columns=encoded_columns)
df = pd.concat([df, encoded_df], axis=1)

# Drop original categorical columns
df = df.drop(columns=categorical_features)

# Construct the state vector
state_columns = numeric_features + list(encoded_columns)
df["State"] = df[state_columns].values.tolist()

# Select relevant columns for the final structure
df_rl = df[["State", "Description", "Reward"]].rename(columns={"Description": "Action"})


In [None]:
# Step 2: Remove Outliers
df_rl = df_rl[df_rl['Reward'] <= 50]

# Step 3: Rename Action "DM_Offersent" to "DM_sent"
df_rl['Action'] = df_rl['Action'].replace('DM_Offersent', 'DM_sent')

# Step 4: Remove Rare Actions
actions_to_keep = ['EMsent', 'DM_sent']
df_rl = df_rl[df_rl['Action'].isin(actions_to_keep)]

# Step 5: Separate Actions into Groups
action_groups = df_rl.groupby('Action')

# Step 6: Find Minimum Action Size for Balancing
min_action_size = action_groups.size().min()

# Step 7: Undersample All Actions to the Minimum Size
df_rl_balanced = action_groups.apply(
    lambda x: x.sample(n=min_action_size, random_state=42)
).reset_index(drop=True)

# Step 8: Replace 0.0 Rewards with -0.1
df_rl_balanced['Reward'] = df_rl_balanced['Reward'].replace(0.0, -0.01)

# Step 9: Verify the Balance and Reward Distribution
print("Action Counts After Balancing:")
print(df_rl_balanced['Action'].value_counts())
print("\nReward Statistics:")
print(df_rl_balanced['Reward'].describe())

# Step 10: Visualize Reward Distribution
df_rl_balanced['Reward'].hist(bins=7)
plt.title("Reward Distribution")
plt.xlabel("Reward")
plt.ylabel("Frequency")
plt.show()

df_rl_balanced.to_csv('DRL_Training_Dataset_Subset.csv', index=False)
