In [5]:
import numpy as np
import random

# Function to discretize continuous features into bins
def discretize(value, feature_range, num_bins):
    return int(np.clip((value - feature_range[0]) / (feature_range[1] - feature_range[0]), 0, num_bins - 1))

# Update the recommend_offer function to consider the entire state
def recommend_offer(customer_id, amount, interest_rate, tenor):
    # Discretize the continuous features into bins
    amount_bin = discretize(amount, (0, 5000), num_bins)  # Adjust the feature_range as per your data
    interest_rate_bin = discretize(interest_rate, (0, 0.2), num_bins)  # Adjust the feature_range as per your data
    tenor_bin = discretize(tenor, (0, 36), num_bins)  # Adjust the feature_range as per your data

    state = (customer_id, amount_bin, interest_rate_bin, tenor_bin)
    action = np.argmax(q_table[state])
    return action

# Function to offer the next offer to a customer
def offer_next(customer_id, amount, interest_rate, tenor):
    action = recommend_offer(customer_id, amount, interest_rate, tenor)
    if action == 0:
        offer = "Offer 0"
        print(f"Offer 0 recommended to Customer {customer_id}: Amount={amount}, Interest Rate={interest_rate}, Tenor={tenor}")
    elif action == 1:
        offer = "Offer 1"
        print(f"Offer 1 recommended to Customer {customer_id}: Amount={amount}, Interest Rate={interest_rate}, Tenor={tenor}")
    else:
        offer = None  # Default case if no offer is recommended
    return offer

# Function to offer the default offer when the threshold is reached
def offer_default():
    print("Threshold offers reached. Offering the default offer.")

# Sample dataset (Customer ID, Amount, Interest Rate, Tenor, Decision)
# Replace this with your actual dataset
# Sample dataset (Customer ID, Amount, Interest Rate, Tenor, Decision)
# Replace this with your actual dataset
sample_dataset = [
    (0, 1000, 0.05, 12, "Accepted"),
    (0, 2000, 0.06, 18, "Declined"),
    (1, 1500, 0.07, 24, "Declined"),
    # Add more data here
]

# Define the number of bins for discretization
num_bins = 10

# Define the state and action spaces
# Ensure these variables are set correctly based on your dataset
num_customers = 2  # Update with the number of unique customers in your dataset
num_offers = 2  # Update with the number of unique offers in your dataset
num_features = 4  # Customer ID, Amount, Interest Rate, Tenor

# Initialize the Q-value table with zeros
q_table = np.zeros((num_customers, num_offers, num_bins, num_bins, num_bins))


# Define the reward function
def get_reward(decision):
    return 1 if decision == "Accepted" else -1

# Implement the Q-learning algorithm
def q_learning(dataset, num_epochs, learning_rate, discount_factor, epsilon):
    for epoch in range(num_epochs):
        for customer_id, amount, interest_rate, tenor, decision in dataset:
            current_state = (customer_id, discretize(amount, (0, 5000), num_bins), discretize(interest_rate, (0, 0.2), num_bins), discretize(tenor, (0, 36), num_bins))
            current_action = np.argmax(q_table[current_state])
            
            if random.uniform(0, 1) < epsilon:  # Exploration
                current_action = random.randint(0, num_offers - 1)
            
            reward = get_reward(decision)
            new_state = (customer_id, discretize(amount, (0, 5000), num_bins), discretize(interest_rate, (0, 0.2), num_bins), discretize(tenor, (0, 36), num_bins))
            new_action = np.argmax(q_table[new_state])
            
            q_table[current_state][current_action] += learning_rate * (
                reward + discount_factor * q_table[new_state][new_action] - q_table[current_state][current_action]
            )
    return q_table

# Train the model
num_epochs = 1000
learning_rate = 0.1
discount_factor = 0.9
epsilon = 0.1
q_learning(sample_dataset, num_epochs, learning_rate, discount_factor, epsilon)

# Example usage:
threshold_offers = 5

# Offer to the customer until the threshold is reached or the customer accepts an offer
for customer_id in range(num_customers):
    accepted_offer = None
    for _ in range(threshold_offers):
        # Sample customer features (replace with actual customer features)
        customer_amount = random.uniform(1000, 5000)
        customer_interest_rate = random.uniform(0.05, 0.15)
        customer_tenor = random.randint(6, 36)

        offer = offer_next(customer_id, customer_amount, customer_interest_rate, customer_tenor)
        if offer:  # Check if offer is not None before using it
            # Assuming the customer accepts or declines the offer
            # Update the dataset with the new decision for the next iteration
            decision = input(f"Customer {customer_id}: Do you accept the offer? (y/n): ")
            if decision.lower() == "y":
                accepted_offer = offer
                break
    if accepted_offer:
        print(f"Customer {customer_id} accepted offer: {accepted_offer}")
    else:
        offer_default()


Offer 0 recommended to Customer 0: Amount=1578.9462288903562, Interest Rate=0.10336988145480894, Tenor=34
Customer 0: Do you accept the offer? (y/n): n
Offer 0 recommended to Customer 0: Amount=3756.4554583226595, Interest Rate=0.08050824990212035, Tenor=21
Customer 0: Do you accept the offer? (y/n): n
Offer 0 recommended to Customer 0: Amount=3161.7788355763446, Interest Rate=0.08235323120193558, Tenor=6
Customer 0: Do you accept the offer? (y/n): n
Offer 0 recommended to Customer 0: Amount=4749.818063769778, Interest Rate=0.12236232000938217, Tenor=11
Customer 0: Do you accept the offer? (y/n): n
Offer 0 recommended to Customer 0: Amount=3622.8127392888377, Interest Rate=0.07090419933785905, Tenor=31
Customer 0: Do you accept the offer? (y/n): y
Customer 0 accepted offer: Offer 0
Threshold offers reached. Offering the default offer.


In [6]:
import numpy as np
import pandas as pd
import random

# Function to discretize continuous features into bins
def discretize(value, feature_range, num_bins):
    return int(np.clip((value - feature_range[0]) / (feature_range[1] - feature_range[0]), 0, num_bins - 1))

# Update the recommend_offer function to consider the entire state
def recommend_offer(customer_id, amount, interest_rate, tenor):
    # Discretize the continuous features into bins
    amount_bin = discretize(amount, (0, 5000), num_bins)  # Adjust the feature_range as per your data
    interest_rate_bin = discretize(interest_rate, (0, 0.2), num_bins)  # Adjust the feature_range as per your data
    tenor_bin = discretize(tenor, (0, 36), num_bins)  # Adjust the feature_range as per your data

    state = (customer_id, amount_bin, interest_rate_bin, tenor_bin)
    action = np.argmax(q_table[state])
    return action

# Function to offer the next offer to a customer
def offer_next(customer_id, amount, interest_rate, tenor):
    action = recommend_offer(customer_id, amount, interest_rate, tenor)
    if action == 0:
        offer = "Offer"
        print(f"Offer recommended to Customer {customer_id}: Amount={amount}, Interest Rate={interest_rate}, Tenor={tenor}")
    elif action == 1:
        offer = "Offer"
        print(f"Offer recommended to Customer {customer_id}: Amount={amount}, Interest Rate={interest_rate}, Tenor={tenor}")
    else:
        offer = None  # Default case if no offer is recommended
    return offer

# Function to offer the default offer when the threshold is reached
def offer_default():
    print("Threshold offers reached. Offering the default offer.")

# Sample dataset in Pandas DataFrame (Customer ID, Amount, Interest Rate, Tenor, Decision)
# Replace this with your actual dataset
data = {
    'Customer ID': [0, 0, 1],
    'Amount': [1000, 2000, 1500],
    'Interest Rate': [0.05, 0.06, 0.07],
    'Tenor': [12, 18, 24],
    'Decision': ["Accepted", "Declined", "Declined"]
}
sample_dataset = pd.DataFrame(data)

# Define the number of bins for discretization
num_bins = 10

# Initialize the Q-value table with zeros
num_customers = len(sample_dataset['Customer ID'].unique())
num_offers = len(sample_dataset['Decision'].unique())
num_features = 4  # Customer ID, Amount, Interest Rate, Tenor
q_table = np.zeros((num_customers, num_offers, num_bins, num_bins, num_bins))

# Define the reward function
def get_reward(decision):
    return 1 if decision == "Accepted" else -1

# Implement the Q-learning algorithm
def q_learning(dataset, num_epochs, learning_rate, discount_factor, epsilon):
    for epoch in range(num_epochs):
        for _, row in dataset.iterrows():
            customer_id = row['Customer ID']
            amount = row['Amount']
            interest_rate = row['Interest Rate']
            tenor = row['Tenor']
            decision = row['Decision']

            current_state = (customer_id, discretize(amount, (0, 5000), num_bins), discretize(interest_rate, (0, 0.2), num_bins), discretize(tenor, (0, 36), num_bins))
            current_action = np.argmax(q_table[current_state])
            
            if random.uniform(0, 1) < epsilon:  # Exploration
                current_action = random.randint(0, num_offers - 1)
            
            reward = get_reward(decision)
            new_state = (customer_id, discretize(amount, (0, 5000), num_bins), discretize(interest_rate, (0, 0.2), num_bins), discretize(tenor, (0, 36), num_bins))
            new_action = np.argmax(q_table[new_state])
            
            q_table[current_state][current_action] += learning_rate * (
                reward + discount_factor * q_table[new_state][new_action] - q_table[current_state][current_action]
            )

# Train the model
num_epochs = 1000
learning_rate = 0.1
discount_factor = 0.9
epsilon = 0.1
q_learning(sample_dataset, num_epochs, learning_rate, discount_factor, epsilon)

# Example usage:
threshold_offers = 5

# Offer to the customer until the threshold is reached or the customer accepts an offer
for customer_id in range(num_customers):
    accepted_offer = None
    for _ in range(threshold_offers):
        # Sample customer features (replace with actual customer features)
        row = sample_dataset[sample_dataset['Customer ID'] == customer_id].iloc[0]
        customer_amount = row['Amount']
        customer_interest_rate = row['Interest Rate']
        customer_tenor = row['Tenor']

        offer = offer_next(customer_id, customer_amount, customer_interest_rate, customer_tenor)
        if offer:  # Check if offer is not None before using it
            # Assuming the customer accepts or declines the offer
            # Update the dataset with the new decision for the next iteration
            decision = input(f"Customer {customer_id}: Do you accept the offer? (y/n): ")
            if decision.lower() == "y":
                accepted_offer = offer
                break
    if accepted_offer:
        print(f"Customer {customer_id} accepted offer: {accepted_offer}")
    else:
        offer_default()


Offer 0 recommended to Customer 0: Amount=1000, Interest Rate=0.05, Tenor=12
Customer 0: Do you accept the offer? (y/n): n
Offer 0 recommended to Customer 0: Amount=1000, Interest Rate=0.05, Tenor=12
Customer 0: Do you accept the offer? (y/n): n
Offer 0 recommended to Customer 0: Amount=1000, Interest Rate=0.05, Tenor=12
Customer 0: Do you accept the offer? (y/n): y
Customer 0 accepted offer: Offer 0
Threshold offers reached. Offering the default offer.


In [7]:
# Function to offer the next offer to a customer
def offer_next(customer_id, amount, interest_rate, tenor):
    action = recommend_offer(customer_id, amount, interest_rate, tenor)
    offer_name = f"Offer {action}"
    print(f"{offer_name} recommended to Customer {customer_id}: Amount={amount}, Interest Rate={interest_rate}, Tenor={tenor}")
    return offer_name

# Example usage:
threshold_offers = 5

# Offer to the customer until the threshold is reached or the customer accepts an offer
for customer_id in range(num_customers):
    accepted_offer = None
    for _ in range(threshold_offers):
        # Sample customer features (replace with actual customer features)
        row = sample_dataset[sample_dataset['Customer ID'] == customer_id].iloc[0]
        customer_amount = row['Amount']
        customer_interest_rate = row['Interest Rate']
        customer_tenor = row['Tenor']

        offer = offer_next(customer_id, customer_amount, customer_interest_rate, customer_tenor)
        if offer:  # Check if offer is not None before using it
            # Assuming the customer accepts or declines the offer
            # Update the dataset with the new decision for the next iteration
            decision = input(f"Customer {customer_id}: Do you accept the offer? (y/n): ")
            if decision.lower() == "y":
                accepted_offer = offer
                break
    if accepted_offer:
        print(f"Customer {customer_id} accepted offer: {accepted_offer}")
    else:
        offer_default()


Offer 0 recommended to Customer 0: Amount=1000, Interest Rate=0.05, Tenor=12
Customer 0: Do you accept the offer? (y/n): n
Offer 0 recommended to Customer 0: Amount=1000, Interest Rate=0.05, Tenor=12


KeyboardInterrupt: Interrupted by user

In [8]:
# Rest of the code from the previous example...

# Implement the Q-learning algorithm with offer updates
def q_learning(dataset, num_epochs, learning_rate, discount_factor, epsilon):
    for epoch in range(num_epochs):
        for _, row in dataset.iterrows():
            customer_id = row['Customer ID']
            amount = row['Amount']
            interest_rate = row['Interest Rate']
            tenor = row['Tenor']
            decision = row['Decision']

            current_state = (customer_id, discretize(amount, (0, 5000), num_bins), discretize(interest_rate, (0, 0.2), num_bins), discretize(tenor, (0, 36), num_bins))
            current_action = np.argmax(q_table[current_state])
            
            if random.uniform(0, 1) < epsilon:  # Exploration
                current_action = random.randint(0, num_offers - 1)
            
            reward = get_reward(decision)
            new_state = (customer_id, discretize(amount, (0, 5000), num_bins), discretize(interest_rate, (0, 0.2), num_bins), discretize(tenor, (0, 36), num_bins))
            new_action = np.argmax(q_table[new_state])
            
            q_table[current_state][current_action] += learning_rate * (
                reward + discount_factor * q_table[new_state][new_action] - q_table[current_state][current_action]
            )

            # Update Q-table with the new action taken by the customer
            q_table[current_state] = np.max(q_table[current_state], axis=0)

# Train the model with the updated Q-learning algorithm
q_learning(sample_dataset, num_epochs, learning_rate, discount_factor, epsilon)

# Example usage:
threshold_offers = 5

# Offer to the customer until the threshold is reached or the customer accepts an offer
for customer_id in range(num_customers):
    accepted_offer = None
    for _ in range(threshold_offers):
        # Sample customer features (replace with actual customer features)
        row = sample_dataset[sample_dataset['Customer ID'] == customer_id].iloc[0]
        customer_amount = row['Amount']
        customer_interest_rate = row['Interest Rate']
        customer_tenor = row['Tenor']

        offer = offer_next(customer_id, customer_amount, customer_interest_rate, customer_tenor)
        if offer:  # Check if offer is not None before using it
            # Assuming the customer accepts or declines the offer
            # Update the dataset with the new decision for the next iteration
            decision = input(f"Customer {customer_id}: Do you accept the offer? (y/n): ")
            if decision.lower() == "y":
                accepted_offer = offer
                break
    if accepted_offer:
        print(f"Customer {customer_id} accepted offer: {accepted_offer}")
    else:
        offer_default()


Offer 0 recommended to Customer 0: Amount=1000, Interest Rate=0.05, Tenor=12
Customer 0: Do you accept the offer? (y/n): n
Offer 0 recommended to Customer 0: Amount=1000, Interest Rate=0.05, Tenor=12


KeyboardInterrupt: Interrupted by user

In [9]:
import numpy as np
import pandas as pd
import random

# Function to discretize continuous features into bins
def discretize(value, feature_range, num_bins):
    return int(np.clip((value - feature_range[0]) / (feature_range[1] - feature_range[0]), 0, num_bins - 1))

# Update the recommend_offer function to consider the entire state
def recommend_offer(customer_id, amount, interest_rate, tenor):
    # Discretize the continuous features into bins
    amount_bin = discretize(amount, (0, 5000), num_bins)  # Adjust the feature_range as per your data
    interest_rate_bin = discretize(interest_rate, (0, 0.2), num_bins)  # Adjust the feature_range as per your data
    tenor_bin = discretize(tenor, (0, 36), num_bins)  # Adjust the feature_range as per your data

    state = (customer_id, amount_bin, interest_rate_bin, tenor_bin)
    action = np.argmax(q_table[state])
    return action

# Function to offer the next offer to a customer
def offer_next(customer_id, amount, interest_rate, tenor):
    action = recommend_offer(customer_id, amount, interest_rate, tenor)
    offer_name = f"Offer {action}"
    print(f"{offer_name} recommended to Customer {customer_id}: Amount={amount}, Interest Rate={interest_rate}, Tenor={tenor}")
    return offer_name

# Function to offer the default offer when the threshold is reached
def offer_default():
    print("Threshold offers reached. Offering the default offer.")

# Sample dataset in Pandas DataFrame (Customer ID, Amount, Interest Rate, Tenor, Decision)
# Replace this with your actual dataset
data = {
    'Customer ID': [0, 0, 1],
    'Amount': [1000, 2000, 1500],
    'Interest Rate': [0.05, 0.06, 0.07],
    'Tenor': [12, 18, 24],
    'Decision': ["Accepted", "Declined", "Declined"]
}
sample_dataset = pd.DataFrame(data)

# Define the number of bins for discretization
num_bins = 10

# Define the state and action spaces
num_customers = len(sample_dataset['Customer ID'].unique())
num_offers = len(sample_dataset['Decision'].unique())
num_features = 4  # Customer ID, Amount, Interest Rate, Tenor

# Initialize the Q-value table and offer history with zeros
q_table = np.zeros((num_customers, num_offers, num_bins, num_bins, num_bins), dtype=float)
offer_history = np.zeros((num_customers, num_offers), dtype=int)

# Define the reward function
def get_reward(decision):
    return 1 if decision == "Accepted" else -1

# Implement the Q-learning algorithm with offer updates
def q_learning(dataset, num_epochs, learning_rate, discount_factor, epsilon):
    for epoch in range(num_epochs):
        for _, row in dataset.iterrows():
            customer_id = row['Customer ID']
            amount = row['Amount']
            interest_rate = row['Interest Rate']
            tenor = row['Tenor']
            decision = row['Decision']

            current_state = (customer_id, discretize(amount, (0, 5000), num_bins), discretize(interest_rate, (0, 0.2), num_bins), discretize(tenor, (0, 36), num_bins))
            current_action = np.argmax(q_table[current_state])
            
            if random.uniform(0, 1) < epsilon:  # Exploration
                current_action = random.randint(0, num_offers - 1)
            
            reward = get_reward(decision)
            new_state = (customer_id, discretize(amount, (0, 5000), num_bins), discretize(interest_rate, (0, 0.2), num_bins), discretize(tenor, (0, 36), num_bins))
            new_action = np.argmax(q_table[new_state])
            
            q_table[current_state][current_action] += learning_rate * (
                reward + discount_factor * q_table[new_state][new_action] - q_table[current_state][current_action]
            )

            # Update Q-table with the new action taken by the customer
            offer_history[customer_id][current_action] += 1
            offers_made = offer_history[customer_id].sum()
            if offers_made >= threshold_offers:
                q_table[current_state] = np.max(q_table[current_state], axis=0)
                offer_history[customer_id] = np.zeros(num_offers, dtype=int)

# Train the model with the updated Q-learning algorithm
num_epochs = 1000
learning_rate = 0.1
discount_factor = 0.9
epsilon = 0.1
threshold_offers = 5
q_learning(sample_dataset, num_epochs, learning_rate, discount_factor, epsilon)

# Example usage:
# Rest of the code remains the same...


IndexError: index 2 is out of bounds for axis 0 with size 2