# Define Agents

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchrl.envs import EnvBase
from torchrl.data import TensorSpec, CompositeSpec
import numpy as np
import random

# Define Borrower Agent
class BorrowerAgent(nn.Module):
    def __init__(self, feature_dim):
        super(BorrowerAgent, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(feature_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 1),  # Output proposed interest rate
            nn.Sigmoid()       # Ensure output is between 0 and 1 (interest rate)
        )

    def forward(self, x):
        return 6 + self.model(x) * 30  # Scale interest rate to 6-36%

# Define Lender Agent
class LenderAgent(nn.Module):
    def __init__(self, feature_dim):
        super(LenderAgent, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(5*feature_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 5),  # Output funding decisions for all borrowers
            nn.Softmax()       # Probability distribution of choosing borrower to fund
        )

    def forward(self, x):
        return self.model(x)

In [2]:
import torch.nn.init as init
def init_weights(m):
    if isinstance(m, nn.Linear):
        init.kaiming_normal_(m.weight)
        init.constant_(m.bias, 0)

# Load Random Sample of Borrowers from Lending Club

In [3]:
import pandas as pd
raw_data = pd.read_csv('data/LC_top20.csv')

In [4]:
# flip loan status 0 to 1 and 1 to 0
raw_data['loan_status'] = 1 - raw_data['loan_status']
# get proportion of default loans (equal to 1)
default_proportion = raw_data['loan_status'].value_counts(normalize=True)[1]

In [5]:
raw_data['loan_status'].value_counts()

loan_status
0    1076751
1     268599
Name: count, dtype: int64

In [6]:
# get mean and std of each feature
x_borrower = raw_data.drop(columns=['loan_status', 'int_rate'])
x_borrower = x_borrower.astype(np.float32)
mean_borrower = torch.tensor(x_borrower.mean().values)
std_borrower = torch.tensor(x_borrower.std().values)

x_lender = raw_data.drop(columns=['loan_status'])
x_lender = x_lender.astype(np.float32)
mean_lender = torch.tensor(x_lender.mean().values)
std_lender = torch.tensor(x_lender.std().values)

In [7]:
def normalize_borrower(x):
    return torch.div(torch.sub(x, mean_borrower), std_borrower)
def normalize_lender(x):
    return torch.div(torch.sub(x, mean_lender), std_lender)

In [8]:
# get random sample of 5 loans where 4 are paid off and 1 is defaulted
default_loans = raw_data[raw_data['loan_status'] == 0]
paid_loans = raw_data[raw_data['loan_status'] == 1]
sample = pd.concat([default_loans.sample(1), paid_loans.sample(4)])

In [9]:
sample = sample.drop(columns=['loan_status'])

In [10]:
sample = sample.drop(columns=['int_rate'])

In [11]:
sample.shape

(5, 19)

In [12]:
sample

Unnamed: 0,loan_amnt,term,dti,mo_sin_old_rev_tl_op,acc_open_past_24mths,total_bc_limit,home_ownership_RENT,annual_inc,delinq_2yrs,avg_cur_bal,num_accts_ever_120_pd,all_util,num_rev_tl_bal_gt_0,num_tl_120dpd_2m,purpose_moving,open_act_il,num_tl_op_past_12m,inq_last_6mths,num_bc_sats
51090,30000.0,36,11.85,329.0,3.0,89500.0,False,120000.0,0.0,47767.0,0.0,58.176122,3.0,0.0,False,2.79254,2.0,0.0,4.0
1013926,25000.0,36,24.13,138.0,3.0,17300.0,False,79000.0,0.0,20883.0,1.0,58.176122,9.0,0.0,False,2.79254,2.0,0.0,6.0
779042,13600.0,60,19.17,198.0,1.0,6200.0,True,36000.0,0.0,6485.0,0.0,58.176122,3.0,0.0,False,2.79254,1.0,1.0,2.0
295164,15000.0,60,28.99,288.0,10.0,29700.0,False,100000.0,1.0,4526.0,0.0,58.176122,8.0,0.0,False,2.79254,6.0,1.0,6.0
1124251,10000.0,36,13.46,134.0,3.0,12900.0,True,42000.0,0.0,1558.0,0.0,58.176122,9.0,0.0,False,2.79254,2.0,1.0,5.0


# Load XGB Model

In [13]:
"""
import xgboost as xgb
import pickle

# Load the model from the pickle file
with open('models/xgb_top20.pkl', 'rb') as file:
    xgbm = pickle.load(file)
"""

"\nimport xgboost as xgb\nimport pickle\n\n# Load the model from the pickle file\nwith open('models/xgb_top20.pkl', 'rb') as file:\n    xgbm = pickle.load(file)\n"

In [14]:
# cols_ordered = xgbm.get_booster().feature_names

# Define Environment

In [15]:
# Define the P2P Lending Environment
class P2PLendingEnv(EnvBase):
    def __init__(self, num_borrowers, num_lenders, borrower_features, xgboost_model):
        super().__init__()
        self.num_borrowers = num_borrowers
        self.num_lenders = num_lenders
        self.xgboost_model = xgboost_model
        self.feature_dim = 20
        
        # Initialize borrowers and lenders
        self.borrowers = [BorrowerAgent(self.feature_dim - 1) for _ in range(num_borrowers)]
        self.lenders = [LenderAgent(self.feature_dim) for _ in range(num_lenders)]

        # initialize borrower and lender weights randomly
        for borrower in self.borrowers:
            borrower.apply(init_weights)
        for lender in self.lenders:
            lender.apply(init_weights)

        # Borrower features and hidden default probabilities
        self.borrower_features = borrower_features
        self.default_probs = [0] * self.num_borrowers # default probabilities for each borrower

        # Action and observation specs
        observation_spec = TensorSpec(torch.Size([self.feature_dim]), device=torch.device('cpu'), space=None)
        self.observation_spec = CompositeSpec(observation=observation_spec)
        self.action_spec = TensorSpec(torch.Size([1]), device=torch.device('cpu'), space=None)

    def calculate_default_outcomes(self, int_rates):
        """
        NOT USING XGB FOR NOW TO FIX TRAINING BUGS
        Use XGB to set default outcome of each borrower based on interest rate chosen 
        """

        # default probability for each borrower is (int_rate - 6) / 30 

        # Calculate default probabilities
        default_probs = (int_rates - 6) / 30

        # Sample default outcomes based on probabilities
        self.default_probs = default_probs.squeeze(1)
    
    def _set_seed(self, seed):
        pass

    def _reset(self, **kwargs):
        return self.borrower_features

    def _step(self, borrower_actions, lender_actions):
        rewards_lenders = []
        rewards_borrowers = torch.zeros(self.num_borrowers)

        for lender_idx in range(self.num_lenders):
            borrower_actions = borrower_actions.clone()
            lender_actions = lender_actions.clone()
            funding_probs = lender_actions[lender_idx]
            loan_amnts = torch.tensor(self.borrower_features['loan_amnt'].values.astype(np.float32))

            pos_lender = (funding_probs * loan_amnts).sum() + (funding_probs * loan_amnts * borrower_actions).sum()
            pos_borrowers = funding_probs * loan_amnts
            neg_lender = (funding_probs * self.default_probs * loan_amnts).sum()
            neg_borrowers = self.default_probs * loan_amnts

            # Avoid in-place updates
            borrower_reward_update = pos_borrowers - neg_borrowers
            print(borrower_reward_update.shape)
            lender_reward_update = pos_lender - neg_lender

            rewards_borrowers = rewards_borrowers + borrower_reward_update
            rewards_lenders.append(lender_reward_update)

        return rewards_borrowers, rewards_lenders


In [16]:
# Training Loop
num_episodes = 200
num_borrowers = 5
num_lenders = 10

# Load pre-trained XGBoost model
xgboost_model = None  

env = P2PLendingEnv(num_borrowers, num_lenders, sample, xgboost_model)
borrower_optimizers = [optim.Adam(b.model.parameters(), lr=0.01) for b in env.borrowers]
lender_optimizers = [optim.Adam(l.model.parameters(), lr=0.01) for l in env.lenders]

borrower_loss = []
lender_loss = []

for episode in range(num_episodes):
    print("Episode ", episode)
    borrower_features = env._reset()
    torch.autograd.set_detect_anomaly(True)

    # Borrowers propose interest rates. borrower actions is proposed interest rate of each borrower.
    borrower_actions = torch.stack([
        b(normalize_borrower(
            torch.tensor(borrower_features.iloc[i].values.astype(np.float32), dtype=torch.float32)
        )) for i, b in enumerate(env.borrowers)
    ])

    #print(borrower_actions.shape)

    # Default outcome is predicted using XGBoost model
    env.calculate_default_outcomes(borrower_actions)

    # Lenders decide to fund. output of each lender is a 5 element vector of funding decisions for each borrower. 
    borrower_features_tensor = torch.tensor(borrower_features.values.astype(np.float32), dtype=torch.float32)
    print(borrower_features_tensor.shape)
    lender_inputs = torch.cat((borrower_features_tensor, borrower_actions), dim=1)
    lender_inputs = normalize_lender(lender_inputs)
    lender_actions = torch.stack([l(torch.flatten(lender_inputs)) for i, l in enumerate(env.lenders)]) # shape: (num_lenders, 5)

    # Step environment
    rewards_borrowers, rewards_lenders = env._step(borrower_actions, lender_actions)

    # Logging
    """
    total_lender_loss = -rewards[:num_borrowers].sum().item()
    total_borrower_loss = -rewards[num_borrowers:].sum().item()
    borrower_loss.append(total_borrower_loss)
    lender_loss.append(total_lender_loss)
    """

    # Check gradients
    
    # Update Lenders
    for i, optimizer in enumerate(lender_optimizers):
        print(f"Optimizing lender {i}")
        optimizer.zero_grad()
        loss = -rewards_lenders[i]  # Maximize lender reward
        print(f"Tensor version before backward for lender {i}: {lender_actions._version}")
        loss.backward(retain_graph=True)
        optimizer.step()

    # Update Borrowers
    for i, optimizer in enumerate(borrower_optimizers):
        optimizer.zero_grad()
        loss = -rewards_borrowers[i]  # Maximize borrower reward
        loss.backward(retain_graph=True)
        for name, param in env.borrowers[0].named_parameters():
            if param.grad is not None:
                print(f"Gradients for {name}: {param.grad.norm().item()}")
            else:
                print(f"No gradients for {name}")
        optimizer.step()

    

print("Training Complete")

  return self._call_impl(*args, **kwargs)
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/simonlee/Documents/Cypher/P2P RL/.venv/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/simonlee/Documents/Cypher/P2P RL/.venv/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/simonlee/Documents/Cypher/P2P RL/.venv/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.start()
  File "/Users/simonlee/Documents/Cypher/P2P RL/.venv/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 205, in start
    self.asyncio_loop.run_forever()
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py", line 607, in run_forever
    self._run_once()
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/pyth

Episode  0
torch.Size([5, 19])
torch.Size([5])
torch.Size([5])
torch.Size([5])
torch.Size([5])
torch.Size([5])
torch.Size([5])
torch.Size([5])
torch.Size([5])
torch.Size([5])
torch.Size([5])
Optimizing lender 0
Tensor version before backward for lender 0: 0
Optimizing lender 1
Tensor version before backward for lender 1: 0


RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [128, 5]], which is output 0 of AsStridedBackward0, is at version 3; expected version 2 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!