In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Create a pandas DataFrame to represent the state space
state_space = pd.DataFrame({
    'CustomerID': [1, 2, 3],
    'LoanAmount': [10000, 5000, 15000],
    'Tenure': [12, 6, 24],
    # Add other relevant customer and product data variables
})

# Scale the numerical variables in the state space
scaler = StandardScaler()
numerical_cols = ['LoanAmount', 'Tenure']
state_space[numerical_cols] = scaler.fit_transform(state_space[numerical_cols])

# Print the state space DataFrame
print(state_space)


   CustomerID  LoanAmount    Tenure
0           1    0.000000 -0.267261
1           2   -1.224745 -1.069045
2           3    1.224745  1.336306


In [2]:
import itertools
from sklearn.preprocessing import MinMaxScaler

# Define the possible loan amounts and tenures for the offers
loan_amounts = [10000, 15000, 20000]
tenures = [12, 24, 36]

# Generate all possible combinations of loan amounts and tenures
action_space = list(itertools.product(loan_amounts, tenures))

# Scale the numerical variables in the action space
action_scaler = MinMaxScaler()
action_space = action_scaler.fit_transform(action_space)

# Print the action space
print(action_space)


[[0.  0. ]
 [0.  0.5]
 [0.  1. ]
 [0.5 0. ]
 [0.5 0.5]
 [0.5 1. ]
 [1.  0. ]
 [1.  0.5]
 [1.  1. ]]


In [3]:
def calculate_reward(acceptance):
    if acceptance == 'Accept':
        return 1  # Positive reward if the customer accepts the offer
    elif acceptance == 'Decline':
        return -1  # Negative reward if the customer declines the offer
    else:
        return 0  # Neutral reward for the default offer

# Example usage:
acceptance = 'Accept'
reward = calculate_reward(acceptance)
print(reward)


1


In [4]:
import numpy as np
import tensorflow as tf

# Define the Q-learning model using a simple feed-forward neural network
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(state_space.shape[1],)),
    tf.keras.layers.Dense(len(action_space))
])

# Define the optimizer and loss function
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_fn = tf.keras.losses.MeanSquaredError()

# Define the function for Q-value prediction
@tf.function
def predict_q_values(state):
    return model(state)

# Example usage:
state = state_space.iloc[0].values  # Get the first state from the state space DataFrame
state = np.expand_dims(state, axis=0)  # Add a batch dimension
q_values = predict_q_values(state)
print(q_values)


tf.Tensor(
[[ 0.03686025  0.16195558  0.14189246 -0.07226569 -0.21703526 -0.10657915
   0.06995755  0.37288317  0.16156806]], shape=(1, 9), dtype=float32)


In [5]:
# Create a sample dataset DataFrame
dataset = pd.DataFrame({
    'CustomerID': [1, 2, 3, 4, 5],
    'LoanAmount': [10000, 5000, 15000, 8000, 12000],
    'Tenure': [12, 6, 24, 12, 18],
    'Acceptance': ['Accept', 'Decline', 'Decline', 'Accept', 'Decline']
    # Add other relevant customer and product data variables
})

# Print the sample dataset DataFrame
print(dataset)


   CustomerID  LoanAmount  Tenure Acceptance
0           1       10000      12     Accept
1           2        5000       6    Decline
2           3       15000      24    Decline
3           4        8000      12     Accept
4           5       12000      18    Decline


In [26]:
# Training loop
for epoch in range(epochs):
    # Randomly sample a batch of data from the dataset
    if len(dataset) < batch_size:
        batch_indices = np.arange(len(dataset))
    else:
        batch_indices = np.random.choice(len(dataset), size=batch_size, replace=False)
        
    print(batch_indices)
    batch_states = state_space.iloc[3].values
    print("batch_states", batch_states)
#     batch_actions = np.random.choice(len(action_space), size=batch_states.shape[0])
#     batch_rewards = np.array([calculate_reward(acceptance) for acceptance in dataset.iloc[batch_indices]['Acceptance']])
#     batch_next_states = state_space.iloc[batch_indices].values

#     # Compute the target Q-values using the Q-learning update rule
#     q_values = predict_q_values(batch_states)
#     q_targets = q_values.numpy().copy()
#     max_q_values_next = np.max(predict_q_values(batch_next_states).numpy(), axis=1)
#     for i, action in enumerate(batch_actions):
#         q_targets[i, action] = batch_rewards[i] + gamma * max_q_values_next[i]

#     # Update the Q-learning model using gradient descent
#     with tf.GradientTape() as tape:
#         q_values = predict_q_values(batch_states)
#         loss_value = loss_fn(q_targets, q_values)
#     grads = tape.gradient(loss_value, model.trainable_variables)
#     optimizer.apply_gradients(zip(grads, model.trainable_variables))

# # Save the trained model
# model.save('rl_model.h5')


[0 1 2 3 4]


IndexError: single positional indexer is out-of-bounds

In [30]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Generate sample data
customer_ids = [101, 101, 101, 104, 105]
loan_amounts = [10000, 15000, 20000, 12000, 18000]
tenures = [12, 24, 36, 48, 60]
acceptances = ["Decline", "Decline", "Accept", "Decline", "Accept"]

data = {'CustomerID': customer_ids, 'LoanAmount': loan_amounts, 'Tenure': tenures, 'Acceptance': acceptances}
df = pd.DataFrame(data)

# Define the state space and action space
state_space = df[['LoanAmount', 'Tenure']]
action_space = ['Offer1', 'Offer2', 'Offer3']

# Create a Q-learning model
model = Sequential()
model.add(Dense(32, input_shape=(state_space.shape[1],), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(action_space), activation='linear'))
model.compile(loss='mse', optimizer='adam')

# Parameters
gamma = 0.8  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.99  # Decay rate for exploration
epochs = 100  # Number of training epochs
batch_size = 32  # Batch size

# Q-learning training
for epoch in range(epochs):
    state = state_space.values
    if np.random.rand() < epsilon:
        action = np.random.choice(len(action_space))
    else:
        q_values = model.predict(state)
        action = np.argmax(q_values, axis=1)[0]
    next_state = state
    reward = 1 if acceptances[action] == 'Accept' else 0

    target = reward + gamma * np.amax(model.predict(next_state), axis=1)
    target_full = model.predict(state)
    target_full[0, action] = target[0]

    model.fit(state, target_full, epochs=1, verbose=0)

    if epsilon > 0.01:
        epsilon *= epsilon_decay

# Make recommendations for new customers
new_customers = {'LoanAmount': [13000, 17000], 'Tenure': [24, 36]}
new_df = pd.DataFrame(new_customers)

recommendations = []
for idx, row in new_df.iterrows():
    state = row.values.reshape(1, -1)
    q_values = model.predict(state)
    action = np.argmax(q_values, axis=1)[0]
    recommendations.append(action_space[action])

new_df['Recommendation'] = recommendations
print(new_df)


   LoanAmount  Tenure Recommendation
0       13000      24         Offer3
1       17000      36         Offer3


In [31]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Generate sample data for 1000 customers
np.random.seed(123)
customer_ids = range(1001, 2001)
loan_amounts = np.random.choice(range(10000, 50000, 1000), size=1000)
tenures = np.random.choice([12, 24, 36, 48, 60], size=1000)
acceptances = np.random.choice(["Accept", "Decline"], size=1000, p=[0.6, 0.4])

data = {'CustomerID': customer_ids, 'LoanAmount': loan_amounts, 'Tenure': tenures, 'Acceptance': acceptances}
df = pd.DataFrame(data)

# Define the state space and action space
state_space = df[['LoanAmount', 'Tenure']]
action_space = ['Offer1', 'Offer2', 'Offer3']

# Create a Q-learning model
model = Sequential()
model.add(Dense(32, input_shape=(state_space.shape[1],), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(action_space), activation='linear'))
model.compile(loss='mse', optimizer='adam')

# Parameters
gamma = 0.8  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.99  # Decay rate for exploration
epochs = 10  # Number of training epochs
batch_size = 32  # Batch size

# Q-learning training
for epoch in range(epochs):
    state = state_space.values
    if np.random.rand() < epsilon:
        action = np.random.choice(len(action_space))
    else:
        q_values = model.predict(state)
        action = np.argmax(q_values, axis=1)[0]
    next_state = state
    reward = 1 if acceptances[action] == 'Accept' else 0

    target = reward + gamma * np.amax(model.predict(next_state), axis=1)
    target_full = model.predict(state)
    target_full[0, action] = target[0]

    model.fit(state, target_full, epochs=1, verbose=0)

    if epsilon > 0.01:
        epsilon *= epsilon_decay

# Make recommendations for new customers
new_customers = {'LoanAmount': [30000, 20000, 15000], 'Tenure': [24, 36, 48]}
new_df = pd.DataFrame(new_customers)

recommendations = []
for idx, row in new_df.iterrows():
    state = row.values.reshape(1, -1)
    q_values = model.predict(state)
    action = np.argmax(q_values, axis=1)[0]
    recommendations.append(action_space[action])

new_df['Recommendation'] = recommendations
print(new_df)


   LoanAmount  Tenure Recommendation
0       30000      24         Offer1
1       20000      36         Offer1
2       15000      48         Offer1


In [32]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Generate sample data for 1000 customers
np.random.seed(123)
customer_ids = range(1001, 2001)
loan_amounts = np.random.choice(range(10000, 50000, 1000), size=1000)
tenures = np.random.choice([12, 24, 36, 48, 60], size=1000)
acceptances = np.random.choice(["Accept", "Decline"], size=1000, p=[0.6, 0.4])

data = {'CustomerID': customer_ids, 'LoanAmount': loan_amounts, 'Tenure': tenures, 'Acceptance': acceptances}
df = pd.DataFrame(data)

# Define the state space and action space
state_space = df[['LoanAmount', 'Tenure']]
action_space = ['Offer1', 'Offer2', 'Offer3']
default_offer = 'Default Offer'

# Create a Q-learning model
model = Sequential()
model.add(Dense(32, input_shape=(state_space.shape[1],), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(action_space), activation='linear'))
model.compile(loss='mse', optimizer='adam')

# Parameters
gamma = 0.8  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.99  # Decay rate for exploration
epochs = 10  # Number of training epochs
batch_size = 32  # Batch size

# Q-learning training
for epoch in range(epochs):
    state = state_space.values
    if np.random.rand() < epsilon:
        action = np.random.choice(len(action_space))
    else:
        q_values = model.predict(state)
        action = np.argmax(q_values, axis=1)[0]
    next_state = state
    reward = 1 if acceptances[action] == 'Accept' else 0

    target = reward + gamma * np.amax(model.predict(next_state), axis=1)
    target_full = model.predict(state)
    target_full[0, action] = target[0]

    model.fit(state, target_full, epochs=1, verbose=0)

    if epsilon > 0.01:
        epsilon *= epsilon_decay

# Make recommendations for new customers
new_customers = {'LoanAmount': [30000, 20000, 15000, 25000], 'Tenure': [24, 36, 48, 60]}
new_df = pd.DataFrame(new_customers)

recommendations = []
for idx, row in new_df.iterrows():
    state = row.values.reshape(1, -1)
    q_values = model.predict(state)
    action = np.argmax(q_values, axis=1)[0]
    if np.max(q_values) == 0:
        recommendations.append(default_offer)
    else:
        recommendations.append(action_space[action])

new_df['Recommendation'] = recommendations
print(new_df)


   LoanAmount  Tenure Recommendation
0       30000      24         Offer3
1       20000      36         Offer3
2       15000      48         Offer3
3       25000      60         Offer3


In [38]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Generate sample data for 1000 customers
np.random.seed(123)
customer_ids = range(1001, 2001)
loan_amounts = np.random.choice(range(10000, 50000, 1000), size=1000)
tenures = np.random.choice([12, 24, 36, 48, 60], size=1000)
acceptances = np.random.choice(["Accept", "Decline"], size=1000, p=[0.6, 0.4])

data = {'CustomerID': customer_ids, 'LoanAmount': loan_amounts, 'Tenure': tenures, 'Acceptance': acceptances}
df = pd.DataFrame(data)

# Define the state space and action space
state_space = df[['LoanAmount', 'Tenure']]
action_space = ['Offer1', 'Offer2', 'Offer3']
default_offer = 'Default Offer'

# Create a Q-learning model
model = Sequential()
model.add(Dense(32, input_shape=(state_space.shape[1],), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(action_space), activation='linear'))
model.compile(loss='mse', optimizer='adam')

# Parameters
gamma = 0.8  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.99  # Decay rate for exploration
epochs = 10  # Number of training epochs
batch_size = 32  # Batch size

# Q-learning training
for epoch in range(epochs):
    state = state_space.values
    if np.random.rand() < epsilon:
        action = np.random.choice(len(action_space))
    else:
        q_values = model.predict(state)
        action = np.argmax(q_values, axis=1)[0]
    next_state = state
    reward = 1 if acceptances[action] == 'Accept' else 0

    target = reward + gamma * np.amax(model.predict(next_state), axis=1)
    target_full = model.predict(state)
    target_full[0, action] = target[0]

    model.fit(state, target_full, epochs=1, verbose=0)

    if epsilon > 0.01:
        epsilon *= epsilon_decay

# Make recommendations for new customers
new_customer_ids = range(2001, 2101)
new_loan_amounts = np.random.choice(range(10000, 50000, 1000), size=100)
new_tenures = np.random.choice([12, 24, 36, 48, 60], size=100)

new_data = {'CustomerID': new_customer_ids, 'LoanAmount': new_loan_amounts, 'Tenure': new_tenures}
new_df = pd.DataFrame(new_data)

recommendations = []
for idx, row in new_df.iterrows():
    state = row[['LoanAmount', 'Tenure']].values.reshape(1, -1)
    q_values = model.predict(state)
    action = np.argmax(q_values, axis=1)[0]
    if np.max(q_values) == 0:
        recommendations.append(default_offer)
    else:
        recommendations.append(action_space[action])

new_df['Recommendation'] = recommendations
print(new_df)




KeyboardInterrupt: 

In [37]:
new_df['Recommendation'].unique()

array(['Offer1'], dtype=object)

In [51]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Generate sample data for 1000 customers
np.random.seed(123)
customer_ids = range(1001, 2001)
loan_amounts = np.random.choice(range(10000, 50000, 1000), size=1000)
tenures = np.random.choice([12, 24, 36, 48, 60], size=1000)
acceptances = np.random.choice(["Accept", "Decline"], size=1000, p=[0.6, 0.4])

data = {'CustomerID': customer_ids, 'LoanAmount': loan_amounts, 'Tenure': tenures, 'Acceptance': acceptances}
df = pd.DataFrame(data)

# Define the state space and action space
state_space = df[['LoanAmount', 'Tenure']]
action_space = ['Offer1', 'Offer2', 'Offer3']
default_offer = 'Default Offer'

# Create a Q-learning model
model = Sequential()
model.add(Dense(32, input_shape=(state_space.shape[1],), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(action_space), activation='linear'))
model.compile(loss='mse', optimizer='adam')

# Parameters
gamma = 0.8  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.99  # Decay rate for exploration
epochs = 10  # Number of training epochs
batch_size = 32  # Batch size

# Q-learning training
for epoch in range(epochs):
    state = state_space.values
    if np.random.rand() < epsilon:
        action = np.random.choice(len(action_space))
    else:
        q_values = model.predict(state)
        action = np.argmax(q_values, axis=1)[0]
    next_state = state
    reward = 1 if acceptances[action] == 'Accept' else 0

    target = reward + gamma * np.amax(model.predict(next_state), axis=1)
    target_full = model.predict(state)
    target_full[0, action] = target[0]

    model.fit(state, target_full, epochs=1, verbose=0)

    if epsilon > 0.01:
        epsilon *= epsilon_decay

# Make recommendations for new customers
new_customer_ids = range(2001, 2101)
new_loan_amounts = np.random.choice(range(10000, 50000, 1000), size=100)
new_tenures = np.random.choice([12, 24, 36, 48, 60], size=100)

new_data = {'CustomerID': new_customer_ids, 'LoanAmount': new_loan_amounts, 'Tenure': new_tenures}
new_df = pd.DataFrame(new_data)



recommendations = []
for idx, row in new_df.iterrows():
    state = row[['LoanAmount', 'Tenure']].values.reshape(1, -1)
    q_values = model.predict(state)
    print("q_values", q_values)
    action = np.argmax(q_values, axis=1)[0]
    if np.max(q_values) == 0:
        recommendations.append(default_offer)
    else:
        recommendations.append(action_space[action])

new_df['Recommendation'] = recommendations
print(new_df)


q_values [[ -878.42645  3955.3079  -2015.9812 ]]
q_values [[-2108.4382  9493.18   -4838.3228]]
q_values [[-2722.656 12260.581 -6249.793]]
q_values [[-4217.408 18987.406 -9676.481]]
q_values [[-3776.878 17007.182 -8668.971]]
q_values [[-3162.66   14239.78   -7257.5005]]
q_values [[-2635.2864 11865.969  -6048.012 ]]
q_values [[-2546.3386 11468.288  -5846.8286]]
q_values [[-1141.0609  5140.169  -2621.1252]]
q_values [[-2370.5474 10677.019  -5443.666 ]]
q_values [[-3778.9824 17011.271  -8668.172 ]]
q_values [[-1581.065   7119.3677 -3628.8337]]
q_values [[-4041.6162 18196.13   -9273.315 ]]
q_values [[-4305.8296 19384.06   -9877.86  ]]
q_values [[-4042.1423 18197.156  -9273.118 ]]
q_values [[-4130.5635 18593.812  -9474.5   ]]
q_values [[-1668.9603  7515.0024 -3830.4155]]
q_values [[-4216.881 18986.38  -9676.679]]
q_values [[-1933.1736  8702.931  -4434.96  ]]
q_values [[-4040.5642 18194.088  -9273.716 ]]
q_values [[-1403.6959  6325.03   -3226.2688]]
q_values [[-1316.8522  5931.4385 -3024.2883

In [40]:
print(new_df)

    CustomerID  LoanAmount  Tenure Recommendation
0         2001       10000      12         Offer1
1         2002       24000      24         Offer1
2         2003       31000      48         Offer1
3         2004       48000      36         Offer1
4         2005       43000      60         Offer1
..         ...         ...     ...            ...
95        2096       14000      24         Offer1
96        2097       40000      36         Offer1
97        2098       29000      12         Offer1
98        2099       17000      60         Offer1
99        2100       31000      36         Offer1

[100 rows x 4 columns]


In [58]:
new_df['Recommendation'].unique()

array(['Offer3'], dtype=object)

In [54]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import load_model

# Generate sample data for 1000 customers
np.random.seed(123)
customer_ids = range(1001, 2001)
loan_amounts = np.random.choice(range(10000, 50000, 1000), size=1000)
tenures = np.random.choice([12, 24, 36, 48, 60], size=1000)
acceptances = np.random.choice(["Accept", "Decline"], size=1000, p=[0.6, 0.4])

data = {'CustomerID': customer_ids, 'LoanAmount': loan_amounts, 'Tenure': tenures, 'Acceptance': acceptances}
df = pd.DataFrame(data)

# Define the state space and action space
state_space = df[['LoanAmount', 'Tenure']]
action_space = ['Offer1', 'Offer2', 'Offer3']
default_offer = 'Default Offer'

# Create a Q-learning model
model = Sequential()
model.add(Dense(32, input_shape=(state_space.shape[1],), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(action_space), activation='linear'))
model.compile(loss='mse', optimizer='adam')

# Parameters
gamma = 0.8  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.99  # Decay rate for exploration
epochs = 100  # Number of training epochs
batch_size = 32  # Batch size

# Q-learning training
for epoch in range(epochs):
    state = state_space.values
    if np.random.rand() < epsilon:
        action = np.random.choice(len(action_space))
    else:
        q_values = model.predict(state)
        action = np.argmax(q_values, axis=1)[0]
    next_state = state
    reward = 1 if acceptances[action] == 'Accept' else 0

    target = reward + gamma * np.amax(model.predict(next_state), axis=1)
    target_full = model.predict(state)
    target_full[0, action] = target[0]

    model.fit(state, target_full, epochs=1, verbose=0)

    if epsilon > 0.01:
        epsilon *= epsilon_decay

# Save the trained model
model.save('loan_offer_model.h5')





In [55]:
# Load the saved model
loaded_model = load_model('loan_offer_model.h5')

# Make recommendations for new customers
new_customer_ids = range(2001, 3001)
new_loan_amounts = np.random.choice(range(10000, 50000, 1000), size=1000)
new_tenures = np.random.choice([12, 24, 36, 48, 60], size=1000)

new_data = {'CustomerID': new_customer_ids, 'LoanAmount': new_loan_amounts, 'Tenure': new_tenures}
new_df = pd.DataFrame(new_data)

recommendations = []
for idx, row in new_df.iterrows():
    new_state = row[['LoanAmount', 'Tenure']].values.reshape(1, -1)
    q_values = loaded_model.predict(new_state)
    action = np.argmax(q_values, axis=1)[0]
    if np.max(q_values) <= 0:
        recommendations.append(default_offer)
    else:
        recommendations.append(action_space[action])

new_df['Recommendation'] = recommendations
print(new_df)


     CustomerID  LoanAmount  Tenure Recommendation
0          2001       36000      12         Offer3
1          2002       34000      12         Offer3
2          2003       43000      24         Offer3
3          2004       33000      36         Offer3
4          2005       24000      60         Offer3
..          ...         ...     ...            ...
995        2996       26000      24         Offer3
996        2997       29000      48         Offer3
997        2998       24000      60         Offer3
998        2999       35000      36         Offer3
999        3000       46000      36         Offer3

[1000 rows x 4 columns]


In [57]:
new_df.head(50)

Unnamed: 0,CustomerID,LoanAmount,Tenure,Recommendation
0,2001,36000,12,Offer3
1,2002,34000,12,Offer3
2,2003,43000,24,Offer3
3,2004,33000,36,Offer3
4,2005,24000,60,Offer3
5,2006,39000,12,Offer3
6,2007,20000,24,Offer3
7,2008,19000,24,Offer3
8,2009,37000,24,Offer3
9,2010,16000,36,Offer3
