In [1]:
# Imports
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression

from Utils.prep import Preprocessor as Prep

In [2]:
# LOAD DATA TO PREDICT ON
df_loans = pd.read_csv('Data/SBA_loan_applications.csv')
df_loans

Unnamed: 0,Loan,Name,City,Date,Loan amount requested,SBA portion guaranteed,Secured by real estate?
0,1,Carmichael Realty,"Carmichael, CA",Current (not recession),"$1,000,000","$750,000",Yes
1,2,SV Consulting,"San Leandro, CA",Current (not recession),"$100,000","$40,000",No


In [3]:
# PREPARING DATA FOR MODEL
df_loans_prep = Prep.unsign(df_loans, ['Loan amount requested', 'SBA portion guaranteed'])
df_loans_prep['RealEstate'] = df_loans['Secured by real estate?'].map({'Yes':1, 'No':0})
df_loans_prep['Portion'] = (
    df_loans_prep['SBA portion guaranteed'] / df_loans_prep['Loan amount requested']
)
df_loans_prep['Recession'] = 0
df_loans_input = df_loans_prep[['Loan', 'Name', 'RealEstate', 'Portion', 'Recession']]
df_loans_input

Unnamed: 0,Loan,Name,RealEstate,Portion,Recession
0,1,Carmichael Realty,1,0.75,0
1,2,SV Consulting,0,0.4,0


In [4]:
# Load dataset
df_sba_case = pd.read_csv('Data/SBAcase.csv')

# Define predictors
predictors = ['RealEstate', 'Portion', 'Recession']

# Training subset
df_train = df_sba_case[df_sba_case['Selected'] == 1].copy()

# Features & target
X_train = df_train[predictors]
y_train = df_train['Default']

In [5]:
# Initialize and train logistic regression
sk_logreg = LogisticRegression()
sk_logreg.fit(X_train, y_train)

In [6]:
# Prepare input features
X = df_loans_input[predictors]

# Predict probability of default (class 1)
df_loans['Estimated probability of default'] = sk_logreg.predict_proba(X)[:, 1]

# Approve if probability <= 0.5
df_loans['Approve?'] = df_loans['Estimated probability of default'] <= 0.5

In [7]:
# Format probability to 2 decimal places
df_loans['Estimated probability of default'] = df_loans['Estimated probability of default'].astype(float).round(2)

# Map boolean to Yes/No
df_loans['Approve?'] = df_loans['Approve?'].map({True: 'Yes', False: 'No'})

df_loans

Unnamed: 0,Loan,Name,City,Date,Loan amount requested,SBA portion guaranteed,Secured by real estate?,Estimated probability of default,Approve?
0,1,Carmichael Realty,"Carmichael, CA",Current (not recession),"$1,000,000","$750,000",Yes,0.06,Yes
1,2,SV Consulting,"San Leandro, CA",Current (not recession),"$100,000","$40,000",No,0.52,No


In [10]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import onnx

# List of predictors
predictors = ['RealEstate', 'Portion', 'Recession']

# Define input type for ONNX
initial_type = [('float_input', FloatTensorType([None, len(predictors)]))]

# Convert Scikit-learn model to ONNX
onnx_model = convert_sklearn(sk_logreg, initial_types=initial_type)

# Save ONNX model to file
with open("sba_sk_logreg.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())