In [None]:
import numpy as np
import random

# Function to discretize continuous features into bins
def discretize(value, feature_range, num_bins):
    return int(np.clip((value - feature_range[0]) / (feature_range[1] - feature_range[0]), 0, num_bins - 1))

# Update the recommend_offer function to consider the entire state
def recommend_offer(customer_index, amount, interest_rate, tenor, epsilon=0.1):
    # Discretize the continuous features into bins
    amount_bin = discretize(amount, (0, 5000), num_bins)  # Adjust the feature_range as per your data
    interest_rate_bin = discretize(interest_rate, (0, 0.2), num_bins)  # Adjust the feature_range as per your data
    tenor_bin = discretize(tenor, (0, 36), num_bins)  # Adjust the feature_range as per your data

    state = (customer_index, amount_bin, interest_rate_bin, tenor_bin)
    if random.uniform(0, 1) < epsilon:  # Exploration
        action = random.randint(0, num_offers - 1)
    else:
        action = np.argmax(q_table[state])

    offer_details = {
        "Amount": amount,
        "Interest Rate": interest_rate,
        "Tenor": tenor
    }

    return action, offer_details

# Sample dataset (Customer ID, Amount, Interest Rate, Tenor, Decision)
# Replace this with your actual dataset
sample_dataset = [
    (0, 1000, 0.05, 12, "Accepted"),
    (0, 2000, 0.06, 18, "Declined"),
    (1, 1500, 0.07, 24, "Declined"),
    # Add more data here
]

# Define the number of bins for discretization
num_bins = 10

# Define the state and action spaces
# Ensure these variables are set correctly based on your dataset
num_customers = len(set(customer_id for customer_id, _, _, _, _ in sample_dataset))
num_offers = 2  # Update with the number of unique offers in your dataset
num_features = 3  # Amount, Interest Rate, Tenor

# Initialize the Q-value table with zeros
q_table = np.zeros((num_customers, num_bins, num_bins, num_bins, num_offers))

# Create a mapping between customer IDs and corresponding indices
unique_customer_ids = list(set(customer_id for customer_id, _, _, _, _ in sample_dataset))
customer_id_to_index = {customer_id: index for index, customer_id in enumerate(unique_customer_ids)}

# Define the reward function
def get_reward(decision):
    return 1 if decision == "Accepted" else -1

# Implement the Q-learning algorithm
def q_learning(dataset, num_epochs, learning_rate, discount_factor, epsilon):
    for epoch in range(num_epochs):
        for customer_id, amount, interest_rate, tenor, decision in dataset:
            # Check if the customer_id is present in the customer_id_to_index mapping
            if customer_id in customer_id_to_index:
                customer_index = customer_id_to_index[customer_id]
                current_state = (customer_index, discretize(amount, (0, 5000), num_bins), discretize(interest_rate, (0, 0.2), num_bins), discretize(tenor, (0, 36), num_bins))
                current_action = np.argmax(q_table[current_state])

                if random.uniform(0, 1) < epsilon:  # Exploration
                    current_action = random.randint(0, num_offers - 1)

                reward = get_reward(decision)
                new_state = (customer_index, discretize(amount, (0, 5000), num_bins), discretize(interest_rate, (0, 0.2), num_bins), discretize(tenor, (0, 36), num_bins))
                new_action = np.argmax(q_table[new_state])

                # Update the Q-value for the current state and action based on the customer's decision
                q_table[current_state][current_action] += learning_rate * (
                    reward + discount_factor * q_table[new_state][new_action] - q_table[current_state][current_action]
                )
    return q_table

# Train the model
num_epochs = 1000
learning_rate = 0.1
discount_factor = 0.9
epsilon = 0.1
q_table = q_learning(sample_dataset, num_epochs, learning_rate, discount_factor, epsilon)

# Assuming the historical data is in the format: (customer_id, amount, interest_rate, tenor, decision)
for customer_id, amount, interest_rate, tenor, decision in sample_dataset:
    # Check if the customer_id is present in the customer_id_to_index mapping
    if customer_id in customer_id_to_index:
        customer_index = customer_id_to_index[customer_id]
        # Use the trained model (q_table) to recommend the offer and get offer details
        offer, offer_details = recommend_offer(customer_index, amount, interest_rate, tenor, epsilon)
        if offer == 0:
            recommended_offer = "Offer 0"
        elif offer == 1:
            recommended_offer = "Offer 1"
        else:
            recommended_offer = "No offer recommended."

        print(f"Customer {customer_id}: {recommended_offer} with the following details:")
        for key, value in offer_details.items():
            print(f"{key}: {value}")

        # Assuming you have received the decision from the customer
        decision = input(f"Customer {customer_id}: Do you accept the offer? (y/n): ")
        if decision.lower() == "y":
            print(f"Customer {customer_id} accepted the offer.")
        else:
            print(f"Customer {customer_id} declined the offer.")
    else:
        print(f"Customer {customer_id}: Customer not found in historical data.")
