# LIBRARIES

In [15]:
import pandas as pd
import numpy as np
import pickle

# DATA

In [16]:
userbase = pd.read_csv('userbase.csv')
sent = pd.read_csv('sent_emails.csv')
responded = pd.read_csv('responded.csv')

In [17]:
responded = responded.drop_duplicates()


# MERGE DATA
To create environment

In [20]:
responded.head(1)

Unnamed: 0,Responded_Date,Customer_ID,SubjectLine_ID
0,2016-01-25,217102,3


In [21]:
merged_data.head(1)

Unnamed: 0,Sent_Date,Customer_ID,SubjectLine_ID,Gender,Type,Email_Address,Age,Tenure
0,2016-01-28,1413,2,M,B,Jaj2NuUJneD@gmail.com,44,12


In [22]:
# sent + userbase
merged_data = pd.merge(sent, userbase, on='Customer_ID', how='left')

# Fill in additional columns based on userbase information
merged_data['Gender'] = merged_data['Gender'].fillna(userbase.set_index('Customer_ID')['Gender'])
merged_data['Type'] = merged_data['Type'].fillna(userbase.set_index('Customer_ID')['Type'])
merged_data['Email_Address'] = merged_data['Email_Address'].fillna(userbase.set_index('Customer_ID')['Email_Address'])
merged_data['Age'] = merged_data['Age'].fillna(userbase.set_index('Customer_ID')['Age'])
merged_data['Tenure'] = merged_data['Tenure'].fillna(userbase.set_index('Customer_ID')['Tenure'])

# Drop rows with missing values
merged_data = merged_data.dropna() # CHECK AGAIN LATER
merged_data.head(5)


Unnamed: 0,Sent_Date,Customer_ID,SubjectLine_ID,Gender,Type,Email_Address,Age,Tenure
0,2016-01-28,1413,2,M,B,Jaj2NuUJneD@gmail.com,44,12
1,2016-03-02,83889,2,M,C,Y1Se1qBYrUe@hotmail.com,29,23
2,2016-03-09,457832,3,M,C,vAF@gmail.com,26,14
3,2016-01-20,127772,1,M,C,QAWqTT@gmail.com,26,15
4,2016-02-03,192123,3,M,C,Qtgy0C@msn.com,33,9


In [24]:
# add responded 
merged_data = pd.merge(merged_data, responded, on=['Customer_ID'], how='left') # meger on responded date as well.

# SANITY CHECK
merged_data.head(1)

Unnamed: 0,Sent_Date,Customer_ID,SubjectLine_ID_x,Gender,Type,Email_Address,Age,Tenure,Responded_Date,SubjectLine_ID_y
0,2016-01-28,1413,2,M,B,Jaj2NuUJneD@gmail.com,44,12,2016-01-31,2.0


In [25]:
# REWARDS
merged_data['Reward'] = np.where(merged_data['Sent_Date'] == merged_data['Responded_Date'], 1, 0)

# Fix missing dates
placeholder_date = pd.to_datetime('1900-01-01') # placeholder date
merged_data['Responded_Date'].fillna(placeholder_date, inplace=True)

# Renaming columns
merged_data.rename(columns={'SubjectLine_ID_x': 'SubLine_Sent'}, inplace=True)
merged_data.rename(columns={'SubjectLine_ID_y': 'SubLine_Responded'}, inplace=True)
merged_data['SubLine_Responded'].fillna(-1, inplace=True)

merged_data.head(1)

Unnamed: 0,Sent_Date,Customer_ID,SubLine_Sent,Gender,Type,Email_Address,Age,Tenure,Responded_Date,SubLine_Responded,Reward
0,2016-01-28,1413,2,M,B,Jaj2NuUJneD@gmail.com,44,12,2016-01-31,2.0,0


# STATES

In [26]:
from itertools import product
states=list(product(merged_data['Gender'].unique(), merged_data['Type'].unique(),merged_data['Age'].unique(), merged_data['Tenure'].unique()))


In [63]:
# Initialize a dictionary to store rewards for each state
state_rewards = {}

# Iterate over each state
for state in states:
    # Calculate the hash value of the state tuple and get the corresponding index
    state_index = hash(tuple(state)) % len(states)
    
    # Retrieve the reward from the merged_data DataFrame based on the state index
    reward = merged_data.loc[state_index, 'Reward']
    
    # Store the reward in the state_rewards dictionary
    state_rewards[state] = reward




State: ('M', 'B', 44, 12), Reward: 0
State: ('M', 'B', 44, 23), Reward: 0
State: ('M', 'B', 44, 14), Reward: 1
State: ('M', 'B', 44, 15), Reward: 0
State: ('M', 'B', 44, 9), Reward: 0
State: ('M', 'B', 44, 21), Reward: 0
State: ('M', 'B', 44, 11), Reward: 0
State: ('M', 'B', 44, 25), Reward: 0
State: ('M', 'B', 44, 8), Reward: 0
State: ('M', 'B', 44, 27), Reward: 0
State: ('M', 'B', 44, 18), Reward: 1
State: ('M', 'B', 44, 32), Reward: 0
State: ('M', 'B', 44, 5), Reward: 0
State: ('M', 'B', 44, 20), Reward: 0
State: ('M', 'B', 44, 10), Reward: 0
State: ('M', 'B', 44, 13), Reward: 0
State: ('M', 'B', 44, 24), Reward: 0
State: ('M', 'B', 44, 7), Reward: 0
State: ('M', 'B', 44, 22), Reward: 0
State: ('M', 'B', 44, 17), Reward: 0
State: ('M', 'B', 44, 28), Reward: 0
State: ('M', 'B', 44, 6), Reward: 0
State: ('M', 'B', 44, 19), Reward: 1
State: ('M', 'B', 44, 26), Reward: 0
State: ('M', 'B', 44, 16), Reward: 0
State: ('M', 'B', 44, 30), Reward: 1
State: ('M', 'B', 44, 4), Reward: 0
State: 

# ACTIONS

In [27]:
actions = merged_data['SubLine_Sent'].unique()

# REWARDS

# Q-AGENT

In [70]:
class QLearningAgent:
    def __init__(self, state_size, action_size, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.Q_table = np.zeros((state_size, action_size))  

    def map_state_to_index(self, state):
        # Map state to index
        # Assuming state is a list or a tuple
        state_hash = hash(tuple(state))
        return state_hash % self.state_size

    def map_action_to_index(self, action):
        # Map action to index
        # Assuming action is an integer
        return int(action) % self.action_size

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            # Explore: choose a random action
            return np.random.randint(self.action_size)
        else:
            # Exploit: choose the action with the highest Q-value for the current state
            state_index = self.map_state_to_index(state)
            return np.argmax(self.Q_table[state_index])

    def update_q_table(self, state, action, reward, next_state):
        # Map state and action to indices
        state_index = self.map_state_to_index(state)
        action_index = self.map_action_to_index(action)
        
        # Q-learning update rule
        next_state_index = self.map_state_to_index(next_state)
        td_target = reward + self.discount_factor * np.max(self.Q_table[next_state_index])
        td_error = td_target - self.Q_table[state_index, action_index]
        self.Q_table[state_index, action_index] += self.learning_rate * td_error

    def train(self, state, action, reward, next_state):
        self.update_q_table(state, action, reward, next_state)

    def get_q_table(self):
        return self.Q_table


# TRAINING OF Q-AGENT

In [76]:
state_size = len(states)  # Assuming states is a list of states
action_size = len(actions)  # Assuming actions is a list of unique actions

# Initialize the Q-learning agent
agent = QLearningAgent(state_size, action_size, learning_rate=0.1, discount_factor=0.9, epsilon=0.1)

# Define your training loop
num_episodes = 1000  # Number of training episodes

for episode in range(num_episodes):
    # Iterate over state-action pairs
    for i in range(len(states) - 1):
        # Get current state and action
        state = states[i]
        state_index = hash(tuple(state)) % len(states)
        action = merged_data.loc[state_index, 'SubLine_Sent']
        reward = merged_data.loc[state_index, 'Reward']
       
        # Get next state
        next_state = states[i + 1]
        
        # Train the agent
        agent.train(state, action, reward, next_state)

# Get the learned Q-table
Q_table = agent.get_q_table()
print("Q-Table:", Q_table)


Q-Table: [[0.         0.         0.73494989]
 [0.         0.         0.        ]
 [0.5423535  0.         0.        ]
 ...
 [1.45434786 0.         0.        ]
 [0.         0.         0.        ]
 [0.         0.89727918 0.        ]]


The Q_table printed in the code represents the learned Q-values for each state-action pair after training the Q-learning agent. Each entry in the Q-table represents the expected cumulative reward the agent expects to receive when taking a specific action from a specific state.

In a Q-table, each row corresponds to a state, and each column corresponds to an action. So, the value at row i and column j (i.e., Q_table[i, j]) represents the Q-value for taking action j from state i.

When you print the Q_table, you'll see a matrix where each row corresponds to a state, and each column corresponds to an action. 

So, for instance, Q_table[0, 2]=0 indicates that the Q-value for taking action 2 from the first state is 0. Similarly, Q_table[2, 1] indicates that the Q-value for taking action 1 from the third state is 0.0. Similarly, Q_table[0,1]=1 means Q-value for taking action 1 from the first state is 1

This Q-table is what the agent has learned from its interactions with the environment during training. These Q-values guide the agent's decision-making process during the exploitation phase, where it selects actions based on the highest Q-value for each state.


In [65]:
print(f'Dimensions of Q-table: {Q_table.shape}')

Dimensions of Q-table: (6992, 3)


# NEW SCENARIOS

In [77]:
merged_data.iloc[2012:2013][['Gender', 'Type', 'Age', 'Tenure']].values.tolist()

[['F', 'C', 23, 19]]

In [91]:
merged_data.iloc[5003:5004][['Gender', 'Type', 'Age', 'Tenure']].values.tolist()

[['F', 'B', 53, 20]]

In [92]:
merged_data.iloc[3788:3789][['Gender', 'Type', 'Age', 'Tenure']].values.tolist()

[['M', 'C', 28, 18]]

In [95]:
# Assuming 'new_state' is the new state of the environment
new_state = merged_data.iloc[9053:9054][['Gender', 'Type', 'Age', 'Tenure']].values.tolist()

# Convert nested lists to tuples
new_state = [tuple(item) if isinstance(item, list) else item for item in new_state]
new_state = tuple(new_state)  # Convert the list representing the state to a tuple
new_state_index = agent.map_state_to_index(new_state)  # Map state to index
q_values = Q_table[new_state_index]  # Retrieve Q-values for the new state
action = np.argmax(q_values)  # Select action with the highest Q-value
print(action)

1


## Multiple scenarios/states

In [90]:
# List of new states
new_states = [
    merged_data.iloc[5003:5004][['Gender', 'Type', 'Age', 'Tenure']].values.tolist(),
    merged_data.iloc[3788:3789][['Gender', 'Type', 'Age', 'Tenure']].values.tolist(),
]

# Obtain action for each new state
actions = []
for new_state in new_states:
    # Convert nested lists to tuples
    new_state = [tuple(item) if isinstance(item, list) else item for item in new_state]
    new_state = tuple(new_state)  # Convert the list representing the state to a tuple
    
    # Map state to index and retrieve action
    new_state_index = agent.map_state_to_index(new_state)
    q_values = Q_table[new_state_index]
    action = np.argmax(q_values)
    
    # Append action to the list of actions
    actions.append(action)

# Now 'actions' contains the actions selected by the agent for each new state
print(actions)


[0, 2]


In [93]:
# Assuming 'actions' contains the indices of predicted actions
subject_lines = ['Subject line 1', 'Subject line 2', 'Subject line 3']

# Print the predicted subject lines
for action_index in actions:
    predicted_subject_line = subject_lines[action_index]
    print(f"The subject line predicted by the Q-agent is {predicted_subject_line}.")


The subject line predicted by the Q-agent is Subject line 1.
The subject line predicted by the Q-agent is Subject line 3.


# DOWNLOAD

In [94]:
# DOWNLOAD THE AGENT
# Assuming Q_table is your Q-table numpy array
# Save Q-table as a pickle file
with open('q_table.pkl', 'wb') as f:
    pickle.dump(Q_table, f)
