In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv("crm_data_2000.csv")
df.head()

Unnamed: 0,user_id,age_group,location,previous_purchases,last_offer,response
0,2000,adult,IN,9,discount,no_click
1,2001,young,UK,2,free_shipping,no_click
2,2002,young,IN,2,discount,click
3,2003,adult,AU,0,buy1get1,no_click
4,2004,young,IN,7,buy1get1,no_click


In [5]:
# Encode categorical columns
encoders = {}
for col in ["age_group", "location", "last_offer"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    encoders[col] = le

encoders

{'age_group': LabelEncoder(),
 'location': LabelEncoder(),
 'last_offer': LabelEncoder()}

In [6]:
df.head()

Unnamed: 0,user_id,age_group,location,previous_purchases,last_offer,response
0,2000,0,2,9,1,no_click
1,2001,2,3,2,2,no_click
2,2002,2,2,2,1,click
3,2003,0,0,0,0,no_click
4,2004,2,2,7,0,no_click


In [7]:
# Create state tuples
df["state"] = list(zip(df.age_group, df.location, df.previous_purchases))

df.head()

Unnamed: 0,user_id,age_group,location,previous_purchases,last_offer,response,state
0,2000,0,2,9,1,no_click,"(0, 2, 9)"
1,2001,2,3,2,2,no_click,"(2, 3, 2)"
2,2002,2,2,2,1,click,"(2, 2, 2)"
3,2003,0,0,0,0,no_click,"(0, 0, 0)"
4,2004,2,2,7,0,no_click,"(2, 2, 7)"


In [8]:
# Define state/action space
unique_states = df["state"].unique()
unique_actions = df["last_offer"].unique()

In [9]:
unique_states

array([(0, 2, 9), (2, 3, 2), (2, 2, 2), (0, 0, 0), (2, 2, 7), (1, 1, 8),
       (2, 4, 8), (1, 4, 2), (1, 2, 2), (2, 2, 3), (2, 1, 2), (0, 1, 1),
       (0, 2, 5), (2, 2, 6), (1, 1, 6), (2, 2, 8), (1, 3, 8), (0, 0, 6),
       (1, 4, 4), (1, 0, 10), (1, 4, 9), (0, 3, 1), (1, 3, 10), (1, 0, 9),
       (0, 3, 4), (2, 1, 6), (1, 1, 9), (0, 0, 5), (0, 0, 8), (2, 1, 3),
       (1, 2, 1), (0, 4, 10), (1, 4, 8), (0, 0, 1), (1, 0, 4), (1, 4, 7),
       (0, 2, 2), (0, 4, 0), (0, 3, 2), (1, 3, 0), (2, 3, 10), (2, 3, 8),
       (0, 2, 4), (2, 3, 7), (0, 1, 3), (2, 0, 7), (0, 0, 9), (1, 3, 6),
       (2, 4, 5), (1, 3, 1), (1, 3, 3), (0, 4, 3), (1, 3, 5), (0, 2, 0),
       (1, 1, 2), (2, 1, 10), (1, 4, 5), (0, 3, 9), (1, 0, 6), (1, 1, 5),
       (2, 1, 0), (2, 2, 1), (2, 2, 9), (2, 3, 6), (0, 1, 8), (0, 3, 7),
       (2, 0, 2), (0, 0, 7), (2, 3, 1), (2, 3, 4), (0, 1, 0), (2, 0, 0),
       (0, 1, 5), (2, 4, 10), (1, 2, 0), (2, 4, 2), (1, 2, 4), (1, 3, 9),
       (2, 4, 9), (2, 3, 3), (1, 4, 3), (1, 2

In [10]:
unique_actions

array([1, 2, 0])

In [11]:
state_to_index = {s: i for i, s in enumerate(unique_states)}
state_to_index

{(0, 2, 9): 0,
 (2, 3, 2): 1,
 (2, 2, 2): 2,
 (0, 0, 0): 3,
 (2, 2, 7): 4,
 (1, 1, 8): 5,
 (2, 4, 8): 6,
 (1, 4, 2): 7,
 (1, 2, 2): 8,
 (2, 2, 3): 9,
 (2, 1, 2): 10,
 (0, 1, 1): 11,
 (0, 2, 5): 12,
 (2, 2, 6): 13,
 (1, 1, 6): 14,
 (2, 2, 8): 15,
 (1, 3, 8): 16,
 (0, 0, 6): 17,
 (1, 4, 4): 18,
 (1, 0, 10): 19,
 (1, 4, 9): 20,
 (0, 3, 1): 21,
 (1, 3, 10): 22,
 (1, 0, 9): 23,
 (0, 3, 4): 24,
 (2, 1, 6): 25,
 (1, 1, 9): 26,
 (0, 0, 5): 27,
 (0, 0, 8): 28,
 (2, 1, 3): 29,
 (1, 2, 1): 30,
 (0, 4, 10): 31,
 (1, 4, 8): 32,
 (0, 0, 1): 33,
 (1, 0, 4): 34,
 (1, 4, 7): 35,
 (0, 2, 2): 36,
 (0, 4, 0): 37,
 (0, 3, 2): 38,
 (1, 3, 0): 39,
 (2, 3, 10): 40,
 (2, 3, 8): 41,
 (0, 2, 4): 42,
 (2, 3, 7): 43,
 (0, 1, 3): 44,
 (2, 0, 7): 45,
 (0, 0, 9): 46,
 (1, 3, 6): 47,
 (2, 4, 5): 48,
 (1, 3, 1): 49,
 (1, 3, 3): 50,
 (0, 4, 3): 51,
 (1, 3, 5): 52,
 (0, 2, 0): 53,
 (1, 1, 2): 54,
 (2, 1, 10): 55,
 (1, 4, 5): 56,
 (0, 3, 9): 57,
 (1, 0, 6): 58,
 (1, 1, 5): 59,
 (2, 1, 0): 60,
 (2, 2, 1): 61,
 (2, 2, 9): 6

In [12]:
action_space_size = len(unique_actions)
state_space_size = len(unique_states)

action_space_size, state_space_size

(3, 165)

In [13]:
# Initialize Q-table
Q = np.zeros((state_space_size, action_space_size))
Q

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0

In [14]:
# Map response to rewards
df["reward"] = df["response"].apply(lambda x: 1 if x == "click" else 0)
df.head()

Unnamed: 0,user_id,age_group,location,previous_purchases,last_offer,response,state,reward
0,2000,0,2,9,1,no_click,"(0, 2, 9)",0
1,2001,2,3,2,2,no_click,"(2, 3, 2)",0
2,2002,2,2,2,1,click,"(2, 2, 2)",1
3,2003,0,0,0,0,no_click,"(0, 0, 0)",0
4,2004,2,2,7,0,no_click,"(2, 2, 7)",0


In [18]:
# Q-learning parameters
alpha = 0.1
gamma = 0.9

# Training loop
for _, row in df.iterrows():
    print(f"row is {row}")
    state_idx = state_to_index[row["state"]]
    print(f"state_idx is {state_idx}")
    #break
    action = row["last_offer"]
    reward = row["reward"]
    next_state_idx = state_idx
    print(f"next_state_idx is {next_state_idx}")
    best_next = np.max(Q[next_state_idx])
    print(f"best_next is {best_next}")
    Q[state_idx, action] = Q[state_idx, action] + alpha * (reward + gamma * best_next - Q[state_idx, action])
    print(f"Q[state_idx, action] is {Q[state_idx, action]}")
    print("***************************************************************")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Q[state_idx, action] is 0.23463812069100004
***************************************************************
row is user_id                    3643
age_group                     1
location                      4
previous_purchases            2
last_offer                    1
response               no_click
state                 (1, 4, 2)
reward                        0
Name: 1643, dtype: object
state_idx is 7
next_state_idx is 7
best_next is 0.3943911674880452
Q[state_idx, action] is 0.2659453496519893
***************************************************************
row is user_id                    3644
age_group                     2
location                      2
previous_purchases            3
last_offer                    1
response               no_click
state                 (2, 2, 3)
reward                        0
Name: 1644, dtype: object
state_idx is 9
next_state_idx is 9
best_next is 0.4825304642560996
Q[state_

In [21]:
# Display best marketing action per state
print("\nBest offer per user segment:")
for state, idx in state_to_index.items():
    print(f"state is {state}")
    print(f"idx is {idx}")
    best_action_idx = np.argmax(Q[idx])
    age = encoders["age_group"].inverse_transform([state[0]])[0]
    loc = encoders["location"].inverse_transform([state[1]])[0]
    offer = encoders["last_offer"].inverse_transform([best_action_idx])[0]
    print(f"{(age, loc, state[2])} → {offer}")
    print("********************************************")


Best offer per user segment:
state is (0, 2, 9)
idx is 0
('adult', 'IN', 9) → buy1get1
********************************************
state is (2, 3, 2)
idx is 1
('young', 'UK', 2) → free_shipping
********************************************
state is (2, 2, 2)
idx is 2
('young', 'IN', 2) → buy1get1
********************************************
state is (0, 0, 0)
idx is 3
('adult', 'AU', 0) → discount
********************************************
state is (2, 2, 7)
idx is 4
('young', 'IN', 7) → free_shipping
********************************************
state is (1, 1, 8)
idx is 5
('senior', 'CA', 8) → buy1get1
********************************************
state is (2, 4, 8)
idx is 6
('young', 'US', 8) → free_shipping
********************************************
state is (1, 4, 2)
idx is 7
('senior', 'US', 2) → buy1get1
********************************************
state is (1, 2, 2)
idx is 8
('senior', 'IN', 2) → buy1get1
********************************************
state is (2, 2, 3)
idx is