In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict

# Step 1: Sample Dataset Preparation
data = {
    'CustomerID': [1, 2, 3, 4, 5],
    'LoanAmount': [5000, 10000, 15000, 20000, 25000],
    'Tenure': [12, 24, 36, 48, 60],
    'Acceptance': ['Decline', 'Accept', 'Decline', 'Accept', 'Decline']
}

df = pd.DataFrame(data)

df.head()

Unnamed: 0,CustomerID,LoanAmount,Tenure,Acceptance
0,1,5000,12,Decline
1,2,10000,24,Accept
2,3,15000,36,Decline
3,4,20000,48,Accept
4,5,25000,60,Decline


In [2]:

# Step 2: Environment Setup
class LoanOfferEnvironment:
    def __init__(self, data):
        self.data = data
        self.num_customers = len(data)
        self.reset()

    def reset(self):
        self.current_step = 0

    def get_state(self):
        customer_data = self.data.iloc[self.current_step]
        return customer_data['LoanAmount'], customer_data['Tenure']

    def take_action(self, action):
        customer_data = self.data.iloc[self.current_step]
        accepted = customer_data['Acceptance'] == 'Accept'
        if action == 0 and accepted:
            reward = 1  # Positive reward for accepting the first offer
        elif action != 0 and accepted:
            reward = -1  # Negative reward for declining subsequent offers after accepting the first one
        else:
            reward = 0  # No reward if the offer is declined or it's the first offer
        self.current_step += 1
        done = self.current_step >= self.num_customers
        return reward, done


In [4]:
# Step 3: Action and Reward Definition
num_actions = 3  # Example: Three different offers

# Step 4: Model Training (Q-learning)
class QQModel:
    def __init__(self, num_actions, alpha=0.1, gamma=0.9):
        self.num_actions = num_actions
        self.alpha = alpha
        self.gamma = gamma
        self.q_table = defaultdict(lambda: np.zeros(num_actions))

    def update_q_table(self, state, action, reward, next_state):
        current_q = self.q_table[state][action]
        max_next_q = np.max(self.q_table[next_state])
        new_q = current_q + self.alpha * (reward + self.gamma * max_next_q - current_q)
        self.q_table[state][action] = new_q

    def choose_action(self, state, epsilon):
        if np.random.random() < epsilon:
            return np.random.choice(self.num_actions)
        return np.argmax(self.q_table[state])


# Step 5: Offer Generation
def generate_offers(env, model, epsilon):
    env.reset()
    done = False
    while not done:
        state = env.get_state()
        action = model.choose_action(state, epsilon)
        reward, done = env.take_action(action)
        next_state = env.get_state()
        model.update_q_table(state, action, reward, next_state)
        if action == 0 and reward == 1:
            print(f"Offer Accepted: Customer ID {env.current_step} | Offer Details: Amount: {state[0]}, Tenure: {state[1]}")
            # You can save the accepted offer details in a separate dataframe or data structure if desired




In [6]:
# Step 6: Output Formatting (using a separate dataframe for accepted offers)
accepted_offers = {'CustomerID': [], 'Amount': [], 'Tenure': []}

accepted_offers_df = pd.DataFrame(accepted_offers)

# Training and offer generation
env = LoanOfferEnvironment(df)
model = QQModel(num_actions)

# Training the model
epsilon = 0.5  # Exploration rate
num_episodes = 1000
for _ in range(num_episodes):
    generate_offers(env, model, epsilon)
    epsilon *= 0.99  # Reduce exploration rate over time for exploitation

# Generating offers for new customers
new_customers_data = {
    'CustomerID': [6, 7, 8],
    'LoanAmount': [3000, 15000, 20000],
    'Tenure': [12, 36, 48]
}
new_customers_df = pd.DataFrame(new_customers_data)

new_env = LoanOfferEnvironment(new_customers_df)
generate_offers(new_env, model, epsilon)

Offer Accepted: Customer ID 4 | Offer Details: Amount: 20000, Tenure: 48


IndexError: single positional indexer is out-of-bounds

In [9]:
import pandas as pd
import numpy as np
from collections import defaultdict

# Step 1: Sample Dataset Preparation
data = {
    'CustomerID': [1, 2, 3, 4, 5],
    'LoanAmount': [5000, 10000, 15000, 20000, 25000],
    'Tenure': [12, 24, 36, 48, 60],
    'Status': ['Decline', 'Accept', 'Decline', 'Accept', 'Decline']
}

df = pd.DataFrame(data)


# Step 2: Environment Setup
class LoanOfferEnvironment:
    def __init__(self, data):
        self.data = data
        self.num_customers = len(data)
        self.reset()

    def reset(self):
        self.current_step = 0

    def get_state(self):
        customer_data = self.data.iloc[self.current_step % self.num_customers]  # Wrap around the index using modulo
        return customer_data['LoanAmount'], customer_data['Tenure']

    def take_action(self, action):
        customer_data = self.data.iloc[self.current_step % self.num_customers]  # Wrap around the index using modulo
        accepted = customer_data['Status'] == 'Accept'
        if action == 0 and accepted:
            reward = 1  # Positive reward for accepting the first offer
        elif action != 0 and accepted:
            reward = -1  # Negative reward for declining subsequent offers after accepting the first one
        else:
            reward = 0  # No reward if the offer is declined or it's the first offer
        self.current_step += 1
        done = self.current_step >= self.num_customers
        return reward, done


# Step 3: Action and Reward Definition
num_actions = 3  # Example: Three different offers

# Step 4: Model Training (Q-learning)
class QQModel:
    def __init__(self, num_actions, alpha=0.1, gamma=0.9):
        self.num_actions = num_actions
        self.alpha = alpha
        self.gamma = gamma
        self.q_table = defaultdict(lambda: np.zeros(num_actions))

    def update_q_table(self, state, action, reward, next_state):
        current_q = self.q_table[state][action]
        max_next_q = np.max(self.q_table[next_state])
        new_q = current_q + self.alpha * (reward + self.gamma * max_next_q - current_q)
        self.q_table[state][action] = new_q

    def choose_action(self, state, epsilon):
        if np.random.random() < epsilon:
            return np.random.choice(self.num_actions)
        return np.argmax(self.q_table[state])


# Step 5: Offer Generation
def generate_offers(env, model, epsilon):
    env.reset()
    done = False
    while not done:
        state = env.get_state()
        action = model.choose_action(state, epsilon)
        reward, done = env.take_action(action)
        next_state = env.get_state()
        model.update_q_table(state, action, reward, next_state)
        if action == 0 and reward == 1:
            print(f"Offer Accepted: Customer ID {env.current_step + 1} | Offer Details: Amount: {state[0]}, Tenure: {state[1]}")
            # You can save the accepted offer details in a separate dataframe or data structure if desired


# Step 6: Output Formatting (using a separate dataframe for accepted offers)
accepted_offers = {'CustomerID': [], 'Amount': [], 'Tenure': []}

# Training and offer generation
env = LoanOfferEnvironment(df)
model = QQModel(num_actions)

# Training the model
epsilon = 0.5  # Exploration rate
num_episodes = 1000
for _ in range(num_episodes):
    generate_offers(env, model, epsilon)
    epsilon *= 0.99  # Reduce exploration rate over time for exploitation

# Generating offers for new customers
new_customers_data = {
    'CustomerID': [6, 7, 8],
    'LoanAmount': [3000, 15000, 20000],
    'Tenure': [12, 36, 48]
}
new_customers_df = pd.DataFrame(new_customers_data)

new_env = LoanOfferEnvironment(new_customers_df)
generate_offers(new_env, model, epsilon)


Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 5 | Offer Details: Amou

KeyError: 'Status'

In [10]:
import pandas as pd
import numpy as np
from collections import defaultdict

# Step 1: Sample Dataset Preparation
data = {
    'CustomerID': [1, 2, 3, 4, 5],
    'LoanAmount': [5000, 10000, 15000, 20000, 25000],
    'Tenure': [12, 24, 36, 48, 60],
    'Acceptance': ['Decline', 'Accept', 'Decline', 'Accept', 'Decline']
}

df = pd.DataFrame(data)


# Step 2: Environment Setup
class LoanOfferEnvironment:
    def __init__(self, data):
        self.data = data
        self.num_customers = len(data)
        self.reset()

    def reset(self):
        self.current_step = 0

    def get_state(self):
        customer_data = self.data.iloc[self.current_step % self.num_customers]  # Wrap around the index using modulo
        return customer_data['LoanAmount'], customer_data['Tenure']

    def take_action(self, action):
        customer_data = self.data.iloc[self.current_step % self.num_customers]  # Wrap around the index using modulo
        accepted = customer_data['Acceptance'] == 'Accept'
        if action == 0 and accepted:
            reward = 1  # Positive reward for accepting the first offer
        elif action != 0 and accepted:
            reward = -1  # Negative reward for declining subsequent offers after accepting the first one
        else:
            reward = 0  # No reward if the offer is declined or it's the first offer
        self.current_step += 1
        done = self.current_step >= self.num_customers
        return reward, done


# Step 3: Action and Reward Definition
num_actions = 3  # Example: Three different offers

# Step 4: Model Training (Q-learning)
class QQModel:
    def __init__(self, num_actions, alpha=0.1, gamma=0.9):
        self.num_actions = num_actions
        self.alpha = alpha
        self.gamma = gamma
        self.q_table = defaultdict(lambda: np.zeros(num_actions))

    def update_q_table(self, state, action, reward, next_state):
        current_q = self.q_table[state][action]
        max_next_q = np.max(self.q_table[next_state])
        new_q = current_q + self.alpha * (reward + self.gamma * max_next_q - current_q)
        self.q_table[state][action] = new_q

    def choose_action(self, state, epsilon):
        if np.random.random() < epsilon:
            return np.random.choice(self.num_actions)
        return np.argmax(self.q_table[state])


# Step 5: Offer Generation
def generate_offers(env, model, epsilon):
    env.reset()
    done = False
    while not done:
        state = env.get_state()
        action = model.choose_action(state, epsilon)
        reward, done = env.take_action(action)
        next_state = env.get_state()
        model.update_q_table(state, action, reward, next_state)
        if action == 0 and reward == 1:
            print(f"Offer Accepted: Customer ID {env.current_step + 1} | Offer Details: Amount: {state[0]}, Tenure: {state[1]}")
            # You can save the accepted offer details in a separate dataframe or data structure if desired


# Step 6: Output Formatting (using a separate dataframe for accepted offers)
accepted_offers = {'CustomerID': [], 'Amount': [], 'Tenure': []}

# Training and offer generation
env = LoanOfferEnvironment(df)
model = QQModel(num_actions)

# Training the model
epsilon = 0.5  # Exploration rate
num_episodes = 1000
for _ in range(num_episodes):
    generate_offers(env, model, epsilon)
    epsilon *= 0.99  # Reduce exploration rate over time for exploitation

# Generating offers for new customers
new_customers_data = {
    'CustomerID': [6, 7, 8],
    'LoanAmount': [3000, 15000, 20000],
    'Tenure': [12, 36, 48]
}
new_customers_df = pd.DataFrame(new_customers_data)

new_env = LoanOfferEnvironment(new_customers_df)
generate_offers(new_env, model, epsilon)


Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 3 | Offer Details: Amount: 10000, Tenure: 24
Offer Accepted: Customer ID 5 | Offer Details: Amount: 20000, Tenure: 48
Offer Accepted: Customer ID 3 | Offer Details: Amou

KeyError: 'Acceptance'