

```

```

## **TASK 1: EDA + Preprocessing**

# CODE

In [None]:
import pandas as pd

df = pd.read_csv("/appl_accepted_20072019Q3.csv", low_memory=False)


In [None]:
# Filter only needed statuses
df = df[df['loan_status'].isin(['Fully Paid','Charged Off'])]

# Binary Target Mapping
df['target'] = df['loan_status'].map({'Fully Paid':0, 'Charged Off':1})

# Select Features
features = ['loan_amnt','int_rate','annual_inc','dti','emp_length',
            'term','home_ownership','purpose','grade','sub_grade',
            'verification_status','delinq_2yrs','revol_util','total_acc']
df = df[features + ['target']]

# Handle Missing & Encoding

In [None]:
from sklearn.preprocessing import LabelEncoder

cat_cols = ['home_ownership','purpose','grade','sub_grade','verification_status','term', 'emp_length']
for c in cat_cols:
    df[c] = LabelEncoder().fit_transform(df[c].astype(str))

df.fillna(df.median(), inplace=True)


# Train/Test Split

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

df.to_csv("processed_loan_data.csv", index=False)


# **TASK 2: Deep Learning Classification**

# Code

In [None]:
import torch
import torch.nn as nn
from sklearn.metrics import roc_auc_score, f1_score

class MLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.layers(x)

model = MLP(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)



# Train Loop

In [None]:
for epoch in range(20):
    model.train()
    optimizer.zero_grad()
    pred = model(torch.tensor(X_train.values, dtype=torch.float32)).squeeze()
    loss = criterion(pred, torch.tensor(y_train.values, dtype=torch.float32))
    loss.backward()
    optimizer.step()
    print(epoch, loss.item())


0 59.446720123291016
1 38.8918342590332
2 25.732641220092773
3 21.297805786132812
4 20.322559356689453
5 20.08755874633789
6 20.019641876220703
7 19.99201202392578
8 19.99458885192871
9 19.987287521362305
10 19.98943519592285
11 19.986858367919922
12 19.98814582824707
13 19.98814582824707
14 19.98814582824707
15 19.98814582824707
16 19.98814582824707
17 19.98814582824707
18 19.98814582824707
19 19.98814582824707


# Evaluation

In [None]:
model.eval()
pred_test = model(torch.tensor(X_test.values, dtype=torch.float32)).detach().numpy()
auc = roc_auc_score(y_test, pred_test)
f1 = f1_score(y_test, pred_test.round())
print("Test AUC:", auc)
print("Test F1:", f1)


Test AUC: 0.5
Test F1: 0.0


# **TASK 3: Offline RL Agent**

# Define RL Dataset


In [None]:
import numpy as np

df_rl = df.copy()
df_rl['reward'] = df_rl.apply(lambda r:
                              0 if r['target']==1 else
                              r['loan_amnt'] * r['int_rate'] if r['target']==0 else
                              -r['loan_amnt'], axis=1)

states = df_rl[features].values
actions = np.where(df_rl['target']==0, 1, 1) # historical always approved
rewards = df_rl['reward'].values

In [None]:
pip install d3rlpy


Collecting d3rlpy
  Downloading d3rlpy-2.8.1-py3-none-any.whl.metadata (11 kB)
Collecting gym>=0.26.0 (from d3rlpy)
  Downloading gym-0.26.2.tar.gz (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting structlog (from d3rlpy)
  Downloading structlog-25.5.0-py3-none-any.whl.metadata (9.5 kB)
Collecting colorama (from d3rlpy)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting dataclasses-json (from d3rlpy)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting gymnasium==1.0.0 (from d3rlpy)
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->d3rlpy)
  Downloading marshmallow-3.26.1-py3-none-any.whl.met

In [None]:
from d3rlpy.algos import DiscreteCQL, DiscreteCQLConfig
from d3rlpy.dataset import MDPDataset
import torch

dataset = MDPDataset(states, actions, rewards, terminals=np.ones(len(states)))

device = 'cuda' if torch.cuda.is_available() else 'cpu'

cql_config = DiscreteCQLConfig() # Create a default config object for DiscreteCQL
cql = DiscreteCQL(config=cql_config, device=device, enable_ddp=False)
cql.fit(dataset, n_steps=20000)

[2m2025-12-09 20:35.24[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('int64')], shape=[(1,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(14,)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(1,)])[0m
[2m2025-12-09 20:35.24[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.DISCRETE: 2>[0m
[2m2025-12-09 20:35.26[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m2[0m
[2m2025-12-09 20:35.26[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(14,)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], sha

Epoch 1/2:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2025-12-09 20:36.53[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251209203526: epoch=1 step=10000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0011439874887466432, 'time_algorithm_update': 0.007227797961235047, 'loss': 72264.63454589844, 'td_loss': 72264.63454589844, 'conservative_loss': 0.0, 'time_step': 0.008606693887710572}[0m [36mstep[0m=[35m10000[0m
[2m2025-12-09 20:36.53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251209203526/model_10000.d3[0m


Epoch 2/2:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2025-12-09 20:38.27[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251209203526: epoch=2 step=20000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0012074750185012818, 'time_algorithm_update': 0.0077506826877594, 'loss': 68763.53992460937, 'td_loss': 68763.41676210938, 'conservative_loss': 0.1231626708984375, 'time_step': 0.009257170701026916}[0m [36mstep[0m=[35m20000[0m
[2m2025-12-09 20:38.27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251209203526/model_20000.d3[0m


[(1,
  {'time_sample_batch': 0.0011439874887466432,
   'time_algorithm_update': 0.007227797961235047,
   'loss': 72264.63454589844,
   'td_loss': 72264.63454589844,
   'conservative_loss': 0.0,
   'time_step': 0.008606693887710572}),
 (2,
  {'time_sample_batch': 0.0012074750185012818,
   'time_algorithm_update': 0.0077506826877594,
   'loss': 68763.53992460937,
   'td_loss': 68763.41676210938,
   'conservative_loss': 0.1231626708984375,
   'time_step': 0.009257170701026916})]

# Evaluate Policy


In [None]:
import numpy as np
from d3rlpy.dataset import MDPDataset

# Filter df_rl to get the rows corresponding to X_test
df_test_rl = df_rl.loc[X_test.index]

# Prepare test states, actions, and rewards
states_test = df_test_rl[features].values
actions_test = np.where(df_test_rl['target']==0, 1, 1) # historical always approved
rewards_test = df_test_rl['reward'].values

# Create an MDPDataset for the test set
test_dataset = MDPDataset(states_test, actions_test, rewards_test, terminals=np.ones(len(states_test)))

# Evaluate the policy on the test dataset
predicted_actions_test = cql.predict(states_test)
q_values_test = cql.predict_value(states_test, predicted_actions_test)
estimated_policy_value_test = np.mean(q_values_test)
print("Estimated Policy Value on Test Set:", estimated_policy_value_test)

[2m2025-12-09 20:39.58[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('int64')], shape=[(1,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(14,)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(1,)])[0m
[2m2025-12-09 20:39.58[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.DISCRETE: 2>[0m
[2m2025-12-09 20:39.58[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m2[0m
Estimated Policy Value on Test Set: 140121.66
