In [1]:
from platform import python_version

print(python_version())

3.12.9


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from Feature_Engineering import test, rest

class FTTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, depth=6, heads=8, dropout=0.1):
        super(FTTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, 128)
        encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=heads, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=depth)
        self.fc = nn.Linear(128, num_classes)
    
    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer_encoder(x.unsqueeze(1)).squeeze(1)
        x = self.fc(x)
        return x

class LoanStatusModel:
    def __init__(self, file_path):
        self.file_path = file_path
        self.df = None
        self.model = None
        self.features = []
        self.train_loader = None
        self.test_loader = None
        self.scaler = StandardScaler()
    
    def load_and_preprocess_data(self):
        self.df = pd.read_csv(self.file_path)
        target_col = "loan_status"
        self.features = self.df.drop(columns=[target_col]).columns.tolist()
        
        X = self.df[self.features].values.astype(np.float32)
        y = self.df[target_col].values.astype(np.int64)
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
        test_dataset = TensorDataset(torch.tensor(X_test), torch.tensor(y_test))
        
        self.train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
        self.test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
    
    def initialize_model(self):
        self.model = FTTransformer(input_dim=len(self.features), num_classes=2)
    
    def train_model(self):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(device)
        
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=2e-5)
        
        epochs = 10
        for epoch in range(epochs):
            self.model.train()
            total_loss = 0
            for X_batch, y_batch in self.train_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                output = self.model(X_batch)
                loss = criterion(output, y_batch)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            print(f"Epoch {epoch+1}, Loss: {total_loss/len(self.train_loader):.4f}")
        
        torch.save(self.model.state_dict(), "./ft_transformer_loan_model.pth")
    
    def plot_data_distribution(self):
        plt.figure(figsize=(8, 6))
        self.df['loan_status'].value_counts().plot(kind='bar', color=['blue', 'orange'])
        plt.xlabel('Loan Status')
        plt.ylabel('Count')
        plt.title('Loan Status Distribution')
        plt.show()

if __name__ == "__main__":
    test()
    rest('abc')
    loan_model = LoanStatusModel("..\\ChatGPT\\processed_data\\feature_engineered.csv")
    loan_model.load_and_preprocess_data()
    loan_model.initialize_model()
    loan_model.train_model()
    loan_model.plot_data_distribution()


Checking import
Taking rest for { abc } seconds
