<a href="https://colab.research.google.com/github/kamalydl/Shodh_ai_assessment/blob/main/Shodh_ai.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# Step 0: Setup Environment

# Install the two key libraries we need
!pip install d3rlpy

# Standard data science imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_auc_score, f1_score, classification_report, confusion_matrix

# Deep Learning (TensorFlow/Keras) imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping

# Offline Reinforcement Learning (d3rlpy) imports
import d3rlpy
from d3rlpy.dataset import MDPDataset
from d3rlpy.algos import CQLConfig, DiscreteCQL # Corrected import
from d3rlpy.ope import FQEConfig, FQE


# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("Libraries installed and imported successfully.")



Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


Libraries installed and imported successfully.


In [None]:

# Step 0.1: Load Data from Kaggle

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the dataset
!kaggle datasets download -d wordsforthewise/lending-club

# Unzip the specific file we need
!unzip -o lending-club.zip

print("Dataset downloaded and unzipped.")

N_ROWS_TO_SAMPLE = 500000

import subprocess
import os

csv_path = "accepted_2007_to_2018q4.csv/accepted_2007_to_2018Q4.csv"

total_lines = int(subprocess.check_output(f"wc -l < {csv_path}", shell=True)) - 1

skip_indices = np.random.choice(
    np.arange(1, total_lines + 1),
    total_lines - N_ROWS_TO_SAMPLE,
    replace=False
)
skip_indices.sort()

skiprows = skip_indices + 1

try:
    # Use the correct path to read the CSV
    df = pd.read_csv(csv_path, skiprows=skiprows, low_memory=False)
    print(f"Successfully loaded a sample of {len(df)} rows.")
except Exception as e:
    print(f"Error loading data: {e}")
    print(f"Please ensure '{csv_path}' is in your Colab environment.")


df.shape

Dataset URL: https://www.kaggle.com/datasets/wordsforthewise/lending-club
License(s): CC0-1.0
lending-club.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  lending-club.zip
  inflating: accepted_2007_to_2018Q4.csv.gz  
  inflating: accepted_2007_to_2018q4.csv/accepted_2007_to_2018Q4.csv  
  inflating: rejected_2007_to_2018Q4.csv.gz  
  inflating: rejected_2007_to_2018q4.csv/rejected_2007_to_2018Q4.csv  
Dataset downloaded and unzipped.
Successfully loaded a sample of 500001 rows.


(500001, 151)

In [None]:

# Task 1.1: Define Target & Filter

print("Original loan_status values:")
print(df['loan_status'].value_counts(normalize=True))

completed_loan_statuses = ['Fully Paid', 'Charged Off']
df_filtered = df[df['loan_status'].isin(completed_loan_statuses)].copy()

df_filtered['target'] = df_filtered['loan_status'].apply(lambda x: 1 if x == 'Charged Off' else 0)

print(f"\nFiltered data to {len(df_filtered)} completed loans.")
print("New 'target' distribution (1 = Default):")
print(df_filtered['target'].value_counts(normalize=True))



Original loan_status values:
loan_status
Fully Paid                                             0.475969
Current                                                0.389171
Charged Off                                            0.118806
Late (31-120 days)                                     0.009394
In Grace Period                                        0.003594
Late (16-30 days)                                      0.001890
Does not meet the credit policy. Status:Fully Paid     0.000838
Does not meet the credit policy. Status:Charged Off    0.000320
Default                                                0.000018
Name: proportion, dtype: float64

Filtered data to 297382 completed loans.
New 'target' distribution (1 = Default):
target
0    0.80025
1    0.19975
Name: proportion, dtype: float64


In [None]:

# Task 1.2: Feature Selection

features_to_keep = [
    # Core loan info
    'loan_amnt',
    'term',
    'int_rate',
    'grade',

    # Borrower info
    'annual_inc',
    'home_ownership',
    'emp_length',
    'verification_status',

    # Credit history info
    'dti',
    'open_acc',
    'pub_rec',
    'revol_util',
    'total_acc',
    'earliest_cr_line'
]

# Our target variable
target_column = 'target'

# Keep only the columns we need
df_model = df_filtered[features_to_keep + [target_column]].copy()

#  Feature Engineering

df_model['term'] = df_model['term'].str.replace(' months', '').str.strip().astype(float)

df_model['emp_length'] = df_model['emp_length'].str.replace('< 1 year', '0 years')
df_model['emp_length'] = df_model['emp_length'].str.replace('10+ years', '10 years')
df_model['emp_length'] = df_model['emp_length'].str.replace(' years', '').str.replace(' year', '')
df_model['emp_length'] = df_model['emp_length'].replace('n/a', np.nan)
df_model['emp_length'] = df_model['emp_length'].astype(float)

df_model['earliest_cr_line'] = pd.to_datetime(df_model['earliest_cr_line'], errors='coerce')

df_model['credit_history_age'] = (pd.to_datetime('now') - df_model['earliest_cr_line']).dt.days / 365.25

df_model = df_model.drop('earliest_cr_line', axis=1)

print("Feature engineering complete.")
df_model.info()

  df_model['earliest_cr_line'] = pd.to_datetime(df_model['earliest_cr_line'], errors='coerce')


Feature engineering complete.
<class 'pandas.core.frame.DataFrame'>
Index: 297382 entries, 0 to 499999
Data columns (total 15 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   loan_amnt            297382 non-null  float64
 1   term                 297382 non-null  float64
 2   int_rate             297382 non-null  float64
 3   grade                297382 non-null  object 
 4   annual_inc           297382 non-null  float64
 5   home_ownership       297382 non-null  object 
 6   emp_length           279946 non-null  float64
 7   verification_status  297382 non-null  object 
 8   dti                  297294 non-null  float64
 9   open_acc             297382 non-null  float64
 10  pub_rec              297382 non-null  float64
 11  revol_util           297181 non-null  float64
 12  total_acc            297382 non-null  float64
 13  target               297382 non-null  int64  
 14  credit_history_age   297382 non-null  float

In [None]:

# Task 1.3: Data Cleaning & Preprocessing

# Identify Numeric and Categorical Features

numeric_features = [
    'loan_amnt', 'term', 'int_rate', 'annual_inc', 'emp_length',
    'dti', 'open_acc', 'pub_rec', 'revol_util', 'total_acc',
    'credit_history_age'
]

categorical_features = [
    'grade', 'home_ownership', 'verification_status'
]

# Handle Missing Values

for col in numeric_features:
    median_val = df_model[col].median()
    df_model[col] = df_model[col].fillna(median_val)

for col in categorical_features:
    mode_val = df_model[col].mode()[0]
    df_model[col] = df_model[col].fillna(mode_val)

print("Missing values handled.")

#Enhanced Feature Engineering

# Risk ratio features

df_model['income_to_loan_ratio'] = df_model['annual_inc'] / (df_model['loan_amnt'] + 1e-6)
df_model['interest_to_income_ratio'] = df_model['int_rate'] / (df_model['annual_inc'] + 1e-6)
df_model['revol_dti_ratio'] = df_model['revol_util'] / (df_model['dti'] + 1e-6)

# Log-transform skewed columns, handle potential zero/negative values robustly
for col in ['annual_inc', 'loan_amnt', 'revol_util', 'dti', 'total_acc']:

    df_model[f'{col}_log'] = np.log1p(df_model[col].apply(lambda x: max(x, 0) + 1e-6))


# Grade interactions
grade_map = {g: i for i, g in enumerate(sorted(df_model['grade'].unique()))}
df_model['grade_encoded'] = df_model['grade'].map(grade_map)
df_model['grade_int_combo'] = df_model['grade_encoded'] * df_model['int_rate']

# Credit age bins
df_model['credit_age_bin'] = pd.cut(
    df_model['credit_history_age'],
    bins=[0, 5, 10, 20, 40, np.inf],
    labels=[0, 1, 2, 3, 4],
    right=False
)

print("Enhanced feature engineering complete.")
df_model.info()

# Encode Categorical Variables

df_processed = pd.get_dummies(df_model, columns=categorical_features, drop_first=True)

print("Categorical features encoded.")

# Split the Data (Train/Test)

X = df_processed.drop(target_column, axis=1)
y = df_processed[target_column]

# Standard 80/20 split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Data split: {len(X_train)} train, {len(X_test)} test rows.")

# Feature Scaling

scaler = StandardScaler()

X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()

numeric_cols_after_dummies = [col for col in X_train_scaled.columns if col in numeric_features +
                              [ 'income_to_loan_ratio', 'interest_to_income_ratio', 'revol_dti_ratio',
                                'annual_inc_log', 'loan_amnt_log', 'revol_util_log', 'dti_log', 'total_acc_log',
                                'grade_encoded', 'grade_int_combo', 'credit_age_bin']]


numeric_cols_for_scaling = [col for col in numeric_cols_after_dummies if col != 'credit_age_bin']

X_train_scaled[numeric_cols_for_scaling] = scaler.fit_transform(X_train_scaled[numeric_cols_for_scaling])
X_test_scaled[numeric_cols_for_scaling] = scaler.transform(X_test_scaled[numeric_cols_for_scaling])

print("Numeric features scaled.")
print("\nFinal training features view (scaled):")
print(X_train_scaled.head())

Missing values handled.
✅ Enhanced feature engineering complete.
<class 'pandas.core.frame.DataFrame'>
Index: 297382 entries, 0 to 499999
Data columns (total 26 columns):
 #   Column                    Non-Null Count   Dtype   
---  ------                    --------------   -----   
 0   loan_amnt                 297382 non-null  float64 
 1   term                      297382 non-null  float64 
 2   int_rate                  297382 non-null  float64 
 3   grade                     297382 non-null  object  
 4   annual_inc                297382 non-null  float64 
 5   home_ownership            297382 non-null  object  
 6   emp_length                297382 non-null  float64 
 7   verification_status       297382 non-null  object  
 8   dti                       297382 non-null  float64 
 9   open_acc                  297382 non-null  float64 
 10  pub_rec                   297382 non-null  float64 
 11  revol_util                297382 non-null  float64 
 12  total_acc                 

xgboost and keras mlp best model until now

In [None]:

# Train XGBoost & Deep Learning Models (Fixed + Improved)

!pip install xgboost --quiet

import pandas as pd
import numpy as np
import xgboost as xgb
import tensorflow as tf
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, classification_report, precision_recall_curve
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import AdamW


# Compute Class Weights for imbalance

classes = np.unique(y_train)
class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weight_dict = dict(zip(classes, class_weights))
print("Class Weights:", class_weight_dict)


# XGBOOST BASELINE MODEL (Fixed)


# Convert NumPy arrays back to DataFrames (important fix)
if isinstance(X_train_scaled, np.ndarray):
    X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
    X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

# Drop the 'credit_age_bin' column safely
if 'credit_age_bin' in X_train_scaled.columns:
    X_train_scaled_xgb = X_train_scaled.drop('credit_age_bin', axis=1)
    X_test_scaled_xgb = X_test_scaled.drop('credit_age_bin', axis=1)
else:
    X_train_scaled_xgb = X_train_scaled.copy()
    X_test_scaled_xgb = X_test_scaled.copy()

# Replace infinite values
if np.isinf(X_train_scaled_xgb).any().any():
    print("Found infinite values in X_train_scaled_xgb. Replacing with finite max.")
    max_finite = X_train_scaled_xgb[np.isfinite(X_train_scaled_xgb)].max().max()
    X_train_scaled_xgb = X_train_scaled_xgb.replace([np.inf, -np.inf], max_finite)

if np.isinf(X_test_scaled_xgb).any().any():
    print("Found infinite values in X_test_scaled_xgb. Replacing with finite max.")
    max_finite = X_test_scaled_xgb[np.isfinite(X_test_scaled_xgb)].max().max()
    X_test_scaled_xgb = X_test_scaled_xgb.replace([np.inf, -np.inf], max_finite)

# XGBoost Hyperparameters
xgb_model = xgb.XGBClassifier(
    n_estimators=700,
    learning_rate=0.03,
    max_depth=8,
    subsample=0.9,
    colsample_bytree=0.9,
    scale_pos_weight=class_weight_dict[0] / class_weight_dict[1],
    eval_metric='auc',
    reg_lambda=1.2,
    reg_alpha=0.8,
    random_state=42,
    n_jobs=-1
)

xgb_model.fit(X_train_scaled_xgb, y_train)

# Predict probabilities
y_pred_proba_xgb = xgb_model.predict_proba(X_test_scaled_xgb)[:, 1]

# Best threshold for F1
precisions_xgb, recalls_xgb, thresholds_xgb = precision_recall_curve(y_test, y_pred_proba_xgb)
f1_scores_xgb = 2 * (precisions_xgb * recalls_xgb) / (precisions_xgb + recalls_xgb)
best_f1_xgb = np.nanmax(f1_scores_xgb)
best_thresh_xgb = thresholds_xgb[np.nanargmax(f1_scores_xgb)]

# Final predictions
y_pred_xgb = (y_pred_proba_xgb >= best_thresh_xgb).astype(int)

print("\n XGBoost Results:")
print(f"Best Threshold: {best_thresh_xgb:.3f}")
print(f"AUC: {roc_auc_score(y_test, y_pred_proba_xgb):.4f}")
print(f"Best F1: {best_f1_xgb:.4f}")
print(f"Accuracy: {accuracy_score(y_test, y_pred_xgb):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_xgb, target_names=['Fully Paid (0)', 'Defaulted (1)']))


# DEEP LEARNING MODEL (Keras MLP)

# Define Focal Loss
def focal_loss(gamma=2., alpha=0.25):
    def loss(y_true, y_pred):
        bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
        p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred)
        return tf.reduce_mean(alpha * tf.pow(1 - p_t, gamma) * bce)
    return loss

# Build improved deep learning model
dl_model = Sequential([
    Input(shape=(X_train_scaled.shape[1],)),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

opt = AdamW(learning_rate=0.0008, weight_decay=1e-5)
dl_model.compile(optimizer=opt,
                 loss=focal_loss(gamma=2.0, alpha=0.25),
                 metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])

early_stop = EarlyStopping(monitor='val_auc', mode='max', patience=6, restore_best_weights=True)

history = dl_model.fit(
    X_train_scaled, y_train,
    validation_data=(X_test_scaled, y_test),
    epochs=40,
    batch_size=512,
    callbacks=[early_stop],
    verbose=1
)

# Predictions
y_pred_proba_dl = dl_model.predict(X_test_scaled).ravel()
if np.isnan(y_pred_proba_dl).any():
    mean_pred = np.nanmean(y_pred_proba_dl)
    y_pred_proba_dl = np.nan_to_num(y_pred_proba_dl, nan=mean_pred)

# Find best threshold
precisions_dl, recalls_dl, thresholds_dl = precision_recall_curve(y_test, y_pred_proba_dl)
f1_scores_dl = 2 * (precisions_dl * recalls_dl) / (precisions_dl + recalls_dl)
best_f1_dl = np.nanmax(f1_scores_dl)
best_thresh_dl = thresholds_dl[np.nanargmax(f1_scores_dl)]

y_pred_dl = (y_pred_proba_dl >= best_thresh_dl).astype(int)

print("\n Deep Learning Results:")
print(f"Best Threshold: {best_thresh_dl:.3f}")
print(f"AUC: {roc_auc_score(y_test, y_pred_proba_dl):.4f}")
print(f"Best F1: {best_f1_dl:.4f}")
print(f"Accuracy: {accuracy_score(y_test, y_pred_dl):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_dl, target_names=['Fully Paid (0)', 'Defaulted (1)']))


# Step 3: Side-by-side Summary

summary = pd.DataFrame({
    'Model': ['XGBoost', 'Deep Learning (MLP)'],
    'AUC': [
        roc_auc_score(y_test, y_pred_proba_xgb),
        roc_auc_score(y_test, y_pred_proba_dl)
    ],
    'Best_F1': [best_f1_xgb, best_f1_dl],
    'Accuracy': [
        accuracy_score(y_test, y_pred_xgb),
        accuracy_score(y_test, y_pred_dl)
    ]
})

print("\n==================== Summary ====================")
display(summary)
print("=================================================")


Class Weights: {np.int64(0): np.float64(0.6248030296663585), np.int64(1): np.float64(2.5031564992319186)}

✅ XGBoost Results:
Best Threshold: 0.065
AUC: 0.7109
Best F1: 0.4304
Accuracy: 0.6648

Classification Report:
                precision    recall  f1-score   support

Fully Paid (0)       0.88      0.67      0.76     47596
 Defaulted (1)       0.33      0.63      0.43     11881

      accuracy                           0.66     59477
     macro avg       0.60      0.65      0.60     59477
  weighted avg       0.77      0.66      0.70     59477

Epoch 1/40
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 28ms/step - accuracy: 0.7775 - auc: 0.6443 - loss: 0.0209 - val_accuracy: 0.8018 - val_auc: 0.7036 - val_loss: 0.0168
Epoch 2/40
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 28ms/step - accuracy: 0.7999 - auc: 0.6911 - loss: 0.0172 - val_accuracy: 0.8022 - val_auc: 0.7062 - val_loss: 0.0167
Epoch 3/40
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━

Unnamed: 0,Model,AUC,Best_F1,Accuracy
0,XGBoost,0.710905,0.4304,0.664812
1,Deep Learning (MLP),0.710639,0.428658,0.68223




In [None]:
pip install d3rlpy  # or latest d3rlpy available




In [None]:

# Offline Reinforcement Learning Agent (Discrete CQL)


!pip install d3rlpy==2.2.0 -q

import numpy as np
import torch
import d3rlpy
from d3rlpy.datasets import MDPDataset
from d3rlpy.algos import DiscreteCQLConfig
from d3rlpy.models.q_functions import MeanQFunctionFactory
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report

print("d3rlpy version:", d3rlpy.__version__)
print("CUDA available:", torch.cuda.is_available())


# Prepare Offline Dataset (with index fix)

# Convert to NumPy arrays for correct boolean masking
X_train_scaled = np.array(X_train_scaled)
y_train = np.array(y_train)
X_test_scaled = np.array(X_test_scaled)
y_test = np.array(y_test)

# Create balanced training set (optional)
majority_mask = (y_train == 0)
minority_mask = (y_train == 1)

X_majority = X_train_scaled[majority_mask]
y_majority = y_train[majority_mask]
X_minority = X_train_scaled[minority_mask]
y_minority = y_train[minority_mask]

# Optionally oversample minority for better reward signal
X_balanced = np.concatenate([X_majority, np.repeat(X_minority, 3, axis=0)])
y_balanced = np.concatenate([y_majority, np.repeat(y_minority, 3, axis=0)])

# Define rewards (positive for fully paid, negative for default)
observations = np.array(X_balanced, dtype=np.float32)
actions = np.array(y_balanced, dtype=np.int64)
rewards = np.where(actions == 0, 1.0, -1.0).astype(np.float32)

terminals = np.zeros_like(actions, dtype=bool)
terminals[::500] = True

dataset_train = MDPDataset(
    observations=observations,
    actions=actions,
    rewards=rewards,
    terminals=terminals
)
print("Offline dataset created with", dataset_train.size(), "samples")


# Configure and Create CQL Algorithm

config = DiscreteCQLConfig(
    batch_size=512,
    gamma=0.99,
    target_update_interval=100,
    q_func_factory=MeanQFunctionFactory()
)

device = "cuda" if torch.cuda.is_available() else "cpu"
algo = config.create(device=device)
algo.conservative_weight = 10.0


# Step 3: Train the Offline Agent

print("Training Discrete CQL offline agent...")

algo.fit(
    dataset_train,
    n_steps=30000,
    n_steps_per_epoch=3000
)


# Evaluate on Test Data

test_obs = np.array(X_test_scaled, dtype=np.float32)
pred_actions = algo.predict(test_obs)
preds = pred_actions.astype(int)


# Evaluation Metrics

accuracy = accuracy_score(y_test, preds)
f1 = f1_score(y_test, preds)
try:
    auc = roc_auc_score(y_test, preds)
except:
    auc = float('nan')

print("\n Offline RL Evaluation Results:")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"AUC: {auc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, preds, target_names=['Fully Paid (0)', 'Defaulted (1)']))


# Estimate Policy Value (EPV)

# Reward structure: +1 for fully paid, -1 for default
policy_rewards = np.where(preds == 0, 1, -1)
epv = np.mean(policy_rewards)

print(f"\n Estimated Policy Value (EPV): {epv:.4f}")

if epv > 0:
    print("Positive EPV → Agent policy yields net positive expected reward.")
else:
    print("Negative EPV → Policy likely over-approves risky applicants.")


✅ d3rlpy version: 2.8.1
✅ CUDA available: False
[2m2025-10-29 20:58.43[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('int64')], shape=[(1,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(35,)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(1,)])[0m
[2m2025-10-29 20:58.43[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.DISCRETE: 2>[0m
[2m2025-10-29 20:58.43[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m2[0m
✅ Offline dataset created with 666 samples
🚀 Training Discrete CQL offline agent...
[2m2025-10-29 20:58.43[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float32')], shape=

Epoch 1/10:   0%|          | 0/3000 [00:00<?, ?it/s]

[2m2025-10-29 21:01.08[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251029205843: epoch=1 step=3000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.01814632797241211, 'time_algorithm_update': 0.02917502474784851, 'loss': 1.3122941099802652, 'td_loss': 0.6021419643561046, 'conservative_loss': 0.710152146478494, 'time_step': 0.04757251787185669}[0m [36mstep[0m=[35m3000[0m
[2m2025-10-29 21:01.08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251029205843/model_3000.d3[0m


Epoch 2/10:   0%|          | 0/3000 [00:00<?, ?it/s]

[2m2025-10-29 21:03.17[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251029205843: epoch=2 step=6000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.016262041012446087, 'time_algorithm_update': 0.026131717681884766, 'loss': 1.70131507452329, 'td_loss': 0.999771522740523, 'conservative_loss': 0.7015435509284338, 'time_step': 0.042592597802480064}[0m [36mstep[0m=[35m6000[0m
[2m2025-10-29 21:03.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251029205843/model_6000.d3[0m


Epoch 3/10:   0%|          | 0/3000 [00:00<?, ?it/s]

[2m2025-10-29 21:05.18[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251029205843: epoch=3 step=9000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.015184528907140097, 'time_algorithm_update': 0.0247641445795695, 'loss': 1.9389546328783036, 'td_loss': 1.2393256282607714, 'conservative_loss': 0.6996290038029352, 'time_step': 0.04013282982508341}[0m [36mstep[0m=[35m9000[0m
[2m2025-10-29 21:05.18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251029205843/model_9000.d3[0m


Epoch 4/10:   0%|          | 0/3000 [00:00<?, ?it/s]

[2m2025-10-29 21:07.27[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251029205843: epoch=4 step=12000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.01618043303489685, 'time_algorithm_update': 0.026024441639582317, 'loss': 2.1908353580236435, 'td_loss': 1.4918391520778338, 'conservative_loss': 0.6989962061842283, 'time_step': 0.042397143522898355}[0m [36mstep[0m=[35m12000[0m
[2m2025-10-29 21:07.27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251029205843/model_12000.d3[0m


Epoch 5/10:   0%|          | 0/3000 [00:00<?, ?it/s]

[2m2025-10-29 21:09.33[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251029205843: epoch=5 step=15000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.015707767009735106, 'time_algorithm_update': 0.02564649852116903, 'loss': 2.4565874586105347, 'td_loss': 1.7585443563858667, 'conservative_loss': 0.6980431011120478, 'time_step': 0.04155013839403788}[0m [36mstep[0m=[35m15000[0m
[2m2025-10-29 21:09.33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251029205843/model_15000.d3[0m


Epoch 6/10:   0%|          | 0/3000 [00:00<?, ?it/s]

[2m2025-10-29 21:11.39[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251029205843: epoch=6 step=18000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.01573836064338684, 'time_algorithm_update': 0.025629783471425374, 'loss': 2.6024184861977897, 'td_loss': 1.9043631616036096, 'conservative_loss': 0.6980553226669629, 'time_step': 0.04156168532371521}[0m [36mstep[0m=[35m18000[0m
[2m2025-10-29 21:11.39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251029205843/model_18000.d3[0m


Epoch 7/10:   0%|          | 0/3000 [00:00<?, ?it/s]

[2m2025-10-29 21:13.43[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251029205843: epoch=7 step=21000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.015607053200403849, 'time_algorithm_update': 0.025261588255564373, 'loss': 2.6906464047431946, 'td_loss': 1.9922297084728877, 'conservative_loss': 0.6984166966080666, 'time_step': 0.04105500459671021}[0m [36mstep[0m=[35m21000[0m
[2m2025-10-29 21:13.43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251029205843/model_21000.d3[0m


Epoch 8/10:   0%|          | 0/3000 [00:00<?, ?it/s]

[2m2025-10-29 21:15.46[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251029205843: epoch=8 step=24000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.01522535785039266, 'time_algorithm_update': 0.025043222586313883, 'loss': 2.8683822389443714, 'td_loss': 2.170914914806684, 'conservative_loss': 0.6974673253099124, 'time_step': 0.040453273057937625}[0m [36mstep[0m=[35m24000[0m
[2m2025-10-29 21:15.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251029205843/model_24000.d3[0m


Epoch 9/10:   0%|          | 0/3000 [00:00<?, ?it/s]

[2m2025-10-29 21:17.46[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251029205843: epoch=9 step=27000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.014939372698465982, 'time_algorithm_update': 0.02453925824165344, 'loss': 2.9758197515805564, 'td_loss': 2.2782468688488007, 'conservative_loss': 0.6975728851358096, 'time_step': 0.03965910037358602}[0m [36mstep[0m=[35m27000[0m
[2m2025-10-29 21:17.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251029205843/model_27000.d3[0m


Epoch 10/10:   0%|          | 0/3000 [00:00<?, ?it/s]

[2m2025-10-29 21:20.01[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251029205843: epoch=10 step=30000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.017114166577657065, 'time_algorithm_update': 0.02725493041674296, 'loss': 2.9932286473910015, 'td_loss': 2.2947842930555344, 'conservative_loss': 0.6984443541963895, 'time_step': 0.044560258309046426}[0m [36mstep[0m=[35m30000[0m
[2m2025-10-29 21:20.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251029205843/model_30000.d3[0m

✅ Offline RL Evaluation Results:
Accuracy: 0.8032
F1 Score: 0.1245
AUC: 0.5281

Classification Report:
                precision    recall  f1-score   support

Fully Paid (0)       0.81      0.99      0.89     47596
 Defaulted (1)       0.56      0.07      0.12     11881

      accuracy                           0.80     59477
     macro avg       0.68      0.53      0.51     59477
  weighted avg       0.76      0.80      0.74     594