In [1]:
import pandas as pd
import numpy as np
from openpyxl.styles.builtins import output
from scipy.stats import zscore

In [10]:
def load_data(file_path):
    """
    Load data from a CSV file.
    
    Args:
        file_path (str): Path to the CSV file.

    Returns:
        pd.DataFrame: Loaded data as a pandas DataFrame.
    """
    return pd.read_csv(file_path)

def convert_numeric_columns(df, columns):
    """
    Convert specified columns to numeric values by removing special characters.

    Args:
        df (pd.DataFrame): Dataset containing the columns to convert.
        columns (list): List of column names to convert.

    Returns:
        pd.DataFrame: Dataset with numeric columns converted.
    """
    for column in columns:
        df[column] = df[column].replace({r'\$': '', r',': ''}, regex=True).astype(float)
    return df

def filter_data(df, product_type, loan_amount, loan_term, credit_score, income):
    """
    Filter the dataset based on user input criteria.

    Args:
        df (pd.DataFrame): Dataset to filter.
        product_type (str): Desired product type (e.g., "Personal - Secured").
        loan_amount (int): Desired loan amount.
        loan_term (int): Desired loan term in months.
        credit_score (int): User's credit score.
        income (int): User's income.

    Returns:
        pd.DataFrame: Filtered dataset matching the criteria.
    """
    filtered_df = df[
        (df['Product_Type'].str.contains(product_type, case=False, na=False)) &
        (df['MinAmount'] <= loan_amount) &
        (df['MaxAmount'] >= loan_amount) &
        (df['MinTerm'] <= loan_term) &
        (df['MaxTerm'] >= loan_term) &
        (df['MinCreditScore'] <= credit_score) &
        (df['MinIncome'] <= income)
    ]
    return filtered_df

def get_best_options(filtered_df, top_n=3):
    """
    Get the top N best loan options sorted by minimum interest rate.

    Args:
        filtered_df (pd.DataFrame): Filtered dataset.
        top_n (int): Number of top options to return.

    Returns:
        pd.DataFrame: Top N loan options.
    """
    return filtered_df.sort_values(by='MinInterestRate').head(top_n)

def main(user_input):
    """
    Main function to process user input and return the best loan options as a numpy array.

    Args:
        user_input (list): List containing user criteria in the following order:
                          [product_type, loan_amount, loan_term, credit_score, income]

    Returns:
        np.ndarray: The best loan options as a numpy array.
    """
    # Load data
    file_path = '/Users/aliozsayin/Desktop/Axio_Prediction_Model_1-Demo/Book1.csv'
    data = load_data(file_path)

    # Convert necessary columns to numeric
    numeric_columns = ['MinIncome', 'MinAmount', 'MaxAmount', 'MinInterestRate', 'MaxInterestRate']
    data = convert_numeric_columns(data, numeric_columns)

    # Unpack user input
    product_type, loan_amount, loan_term, credit_score, income = user_input

    # Filter data
    filtered_data = filter_data(data, product_type, loan_amount, loan_term, credit_score, income)

    if filtered_data.empty:
        print("No options available based on your criteria.")
        return np.array([])  # Return an empty array if no options are available
    else:
        # Get best options
        best_options = get_best_options(filtered_data)
        return best_options.to_numpy()  # Return the best options as a numpy array

In [11]:
user_input = ["Personal - Secured", 10000, 36, 700, 50000] #Will get 
nplist = main(user_input)
nplist

array([['NF_PLS', 'NowFinance', 'Personal - Secured', 420, 35000.0, 6.75,
        26.95, 24, 84, 5000.0, 100000.0, '$990 ', '$0 ', '$0 ', '$0 '],
       ['Plenti_PLS', 'Plenti', 'Personal - Secured', 525, 35000.0, 9.49,
        24.99, 24, 84, 5000.0, 150000.0, '$990 ', '$499 ', '$9 ', '$0 '],
       ['Money3_PLS', 'Money3', 'Personal - Secured', 200, 20000.0,
        12.95, 27.99, 24, 36, 5000.0, 50000.0, '$990 ', '$330 ', '$0 ',
        '$0 ']], dtype=object)

# Working on the actual model

# Imports


In [2]:
import torch.nn as nn
import torch
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

import numpy as np
from statistics import mean

import pandas
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split 

In [3]:
# Load the dataset
data = pd.read_csv('synthetic_data.csv')
label = data['interest_rate']  # Extract target variable

# Drop the target column from the dataset
data = data.drop('interest_rate', axis=1)

# Convert data and labels to numpy arrays
label = np.array(label)
data = np.array(data)

In [4]:
# Normalize the data and labels using z-score normalization
data = zscore(data)
label = zscore(label)

# Convert numpy arrays to PyTorch tensors
label = torch.tensor(label)
data = torch.tensor(data)

# Ensure tensors are of float type for compatibility with PyTorch models
data = data.float()
label = label.float()

In [5]:
train_d , test_d, train_l, test_l = train_test_split(data, label, test_size=0.05,shuffle=True)
train_l = train_l.unsqueeze(dim=1)
test_l = test_l.unsqueeze(dim=1)

train_set = TensorDataset(train_d,train_l)
test_set = TensorDataset(test_d,test_l)

train_split = DataLoader(train_set, batch_size=8, shuffle=True)

# ModelMaker

In [6]:
class ModelMaker(nn.Module):
    def __init__(self):
        super().__init__()
        self.input = nn.Linear(13, 32)
        self.hidden = nn.Linear(32, 64)
        self.output = nn.Linear(64, 1)
        self.dropout = nn.Dropout(p=0.0)  # Regularization with dropout

    def forward(self, x):
        x = F.relu(self.input(x))
        x = self.dropout(x)  # Apply dropout after the first layer
        x = F.relu(self.hidden(x))
        x = self.dropout(x)  # Apply dropout after the second layer
        x = self.output(x)  # No activation for regression
        return x



## Creating the model and setting parameters

In [7]:
test_model = ModelMaker() #The model we will use throughout
optimizer = torch.optim.SGD(test_model.parameters(), lr=0.01) #optimizer
lossfunc = nn.MSELoss() 

# Trainer Function:

In [8]:

epoch = 700
lossarr = np.zeros(epoch)

def trainer():
    # Loop through the specified number of epochs
    for e in range(epoch):
        # Iterate over batches in the training set
        for x, y in train_set:
            yHat = test_model(x)  # Forward pass
            loss = lossfunc(yHat, y)  # Compute loss
            
            optimizer.zero_grad()  # Reset gradients
            loss.backward()  # Backpropagation
            optimizer.step()  # Update model parameters
        
        print(loss)  # Print loss after each epoch


In [9]:
trainer() #Training our model

tensor(1.6895, grad_fn=<MseLossBackward0>)
tensor(1.3627, grad_fn=<MseLossBackward0>)
tensor(1.1479, grad_fn=<MseLossBackward0>)
tensor(1.0062, grad_fn=<MseLossBackward0>)
tensor(0.9777, grad_fn=<MseLossBackward0>)
tensor(0.9063, grad_fn=<MseLossBackward0>)
tensor(0.9828, grad_fn=<MseLossBackward0>)
tensor(0.8687, grad_fn=<MseLossBackward0>)
tensor(1.0627, grad_fn=<MseLossBackward0>)
tensor(0.9706, grad_fn=<MseLossBackward0>)
tensor(1.0535, grad_fn=<MseLossBackward0>)
tensor(1.1365, grad_fn=<MseLossBackward0>)
tensor(0.8789, grad_fn=<MseLossBackward0>)
tensor(0.9757, grad_fn=<MseLossBackward0>)
tensor(0.8387, grad_fn=<MseLossBackward0>)
tensor(0.9864, grad_fn=<MseLossBackward0>)
tensor(0.5930, grad_fn=<MseLossBackward0>)
tensor(0.3670, grad_fn=<MseLossBackward0>)
tensor(1.1048, grad_fn=<MseLossBackward0>)
tensor(0.6756, grad_fn=<MseLossBackward0>)
tensor(0.3183, grad_fn=<MseLossBackward0>)
tensor(0.4544, grad_fn=<MseLossBackward0>)
tensor(0.2142, grad_fn=<MseLossBackward0>)
tensor(0.20

# Saving our model:

In [59]:
def save_model(test_model, path="Axio_Model.pth"):
    # Save the entire model, including architecture and weights
    torch.save(test_model, path)
    print(f"Model saved to {path}")

save_model(test_model, 'Axio_Model.pth')

Model saved to Axio_Model.pth


# Test Accuracy:

In [53]:
correct = 0
total = 0
for x, y in train_set:
    yHat = test_model(x)
    loss = lossfunc(yHat, y)
    
    tolerance = 0.1
    correct += ((yHat - y).abs() <= tolerance).sum().item()
    total += y.size(0)

accuracy = (correct / total) * 100 if total > 0 else 0
print(f"Overall Accuracy: {accuracy:.2f}%")

correct = 0
total = 0

for x, y in test_set:
    yHat = test_model(x)
    loss = lossfunc(yHat, y)

    tolerance = 0.3
    correct += ((yHat - y).abs() <= tolerance).sum().item()
    total += y.size(0)

accuracy = (correct / total) * 100 if total > 0 else 0
print(f"Overall Accuracy: {accuracy:.2f}%")



Overall Accuracy: 99.68%
Overall Accuracy: 16.00%


# <span style="color:red;">CODE FOR THE SYNTHETIC DATA</span>


In [1]:
import pandas as pd
import random

# Defining patterns with some relationships
patterns = {
    "ProductType": [0, 1],
    "term": [12, 24, 36, 48, 60, 72, 84],
    "amount": (5000, 15000),
    "Secured": [0, 1],
    "coApplicant": [0, 1],
    "CreditScore": (300, 850),  # More realistic credit score range
    "Residency": [0, 1, 2],
    "CitizenOrNot": [0, 1],
    "visaSubclass": (0, 100),
    "visaTimeLeftInMonths": (0, 36),
    "age": (18, 70),
    "LoanPurpose": [0, 1, 2, 3, 4, 5, 6],
    "RepaymentFrequency": [0, 1, 2],
    "interest_rate": (15.0, 25.0),
}

# Generating 1000 rows with relationships
rows = []
for _ in range(1000):
    credit_score = random.randint(*patterns["CreditScore"])
    term = random.choice(patterns["term"])
    amount = random.randint(*patterns["amount"])

    # Relationships: lower credit score -> higher interest rate
    interest_rate = round(random.uniform(15.0, 25.0) - (credit_score / 1000 * 5), 2)

    # Longer terms for higher loan amounts
    if amount > 12000:
        term = random.choice([48, 60, 72, 84])

    # Younger applicants more likely to have co-applicants
    age = random.randint(*patterns["age"])
    co_applicant = 1 if age < 30 else random.choice(patterns["coApplicant"])

    row = {
        "ProductType": random.choice(patterns["ProductType"]),
        "term": term,
        "amount": amount,
        "Secured": random.choice(patterns["Secured"]),
        "coApplicant": co_applicant,
        "CreditScore": credit_score,
        "Residency": random.choice(patterns["Residency"]),
        "CitizenOrNot": random.choice(patterns["CitizenOrNot"]),
        "visaSubclass": random.randint(*patterns["visaSubclass"]),
        "visaTimeLeftInMonths": random.randint(*patterns["visaTimeLeftInMonths"]),
        "age": age,
        "LoanPurpose": random.choice(patterns["LoanPurpose"]),
        "RepaymentFrequency": random.choice(patterns["RepaymentFrequency"]),
        "interest_rate": interest_rate,
    }
    rows.append(row)

# Create a DataFrame
synthetic_data = pd.DataFrame(rows)

# Display the first few rows of the DataFrame
synthetic_data.head()

Unnamed: 0,ProductType,term,amount,Secured,coApplicant,CreditScore,Residency,CitizenOrNot,visaSubclass,visaTimeLeftInMonths,age,LoanPurpose,RepaymentFrequency,interest_rate
0,1,12,6917,1,1,424,1,1,92,29,49,6,0,14.68
1,1,36,5331,0,1,387,1,1,31,5,25,1,1,14.77
2,1,60,9778,0,1,631,1,0,43,20,41,3,0,20.12
3,0,84,14900,0,0,385,0,0,14,28,64,3,2,18.8
4,1,60,14688,1,0,700,1,1,56,26,53,1,1,20.57
