In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split

from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score

import warnings
warnings.filterwarnings("ignore")

In [2]:
df=pd.read_csv("loan_data.csv")
df

Unnamed: 0,credit.policy,purpose,int.rate,installment,log.annual.inc,dti,fico,days.with.cr.line,revol.bal,revol.util,inq.last.6mths,delinq.2yrs,pub.rec,not.fully.paid
0,1,debt_consolidation,0.1189,829.10,11.350407,19.48,737,5639.958333,28854,52.1,0,0,0,0
1,1,credit_card,0.1071,228.22,11.082143,14.29,707,2760.000000,33623,76.7,0,0,0,0
2,1,debt_consolidation,0.1357,366.86,10.373491,11.63,682,4710.000000,3511,25.6,1,0,0,0
3,1,debt_consolidation,0.1008,162.34,11.350407,8.10,712,2699.958333,33667,73.2,1,0,0,0
4,1,credit_card,0.1426,102.92,11.299732,14.97,667,4066.000000,4740,39.5,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9573,0,all_other,0.1461,344.76,12.180755,10.39,672,10474.000000,215372,82.1,2,0,0,1
9574,0,all_other,0.1253,257.70,11.141862,0.21,722,4380.000000,184,1.1,5,0,0,1
9575,0,debt_consolidation,0.1071,97.81,10.596635,13.09,687,3450.041667,10036,82.9,8,0,0,1
9576,0,home_improvement,0.1600,351.58,10.819778,19.18,692,1800.000000,0,3.2,5,0,0,1


#### Converting annual amount and Renaming "fico" as "credit_scr"

In [3]:
df['log.annual.inc'] = np.exp(df['log.annual.inc'])

df.rename(columns={'log.annual.inc': 'annual_income'}, inplace=True)
df.rename(columns={'fico': 'credit_scr'}, inplace=True)
df

Unnamed: 0,credit.policy,purpose,int.rate,installment,annual_income,dti,credit_scr,days.with.cr.line,revol.bal,revol.util,inq.last.6mths,delinq.2yrs,pub.rec,not.fully.paid
0,1,debt_consolidation,0.1189,829.10,85000.000385,19.48,737,5639.958333,28854,52.1,0,0,0,0
1,1,credit_card,0.1071,228.22,65000.000073,14.29,707,2760.000000,33623,76.7,0,0,0,0
2,1,debt_consolidation,0.1357,366.86,31999.999943,11.63,682,4710.000000,3511,25.6,1,0,0,0
3,1,debt_consolidation,0.1008,162.34,85000.000385,8.10,712,2699.958333,33667,73.2,1,0,0,0
4,1,credit_card,0.1426,102.92,80799.999636,14.97,667,4066.000000,4740,39.5,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9573,0,all_other,0.1461,344.76,195000.000479,10.39,672,10474.000000,215372,82.1,2,0,0,1
9574,0,all_other,0.1253,257.70,68999.999753,0.21,722,4380.000000,184,1.1,5,0,0,1
9575,0,debt_consolidation,0.1071,97.81,39999.999876,13.09,687,3450.041667,10036,82.9,8,0,0,1
9576,0,home_improvement,0.1600,351.58,49999.999779,19.18,692,1800.000000,0,3.2,5,0,0,1


In [4]:
base_savings_rate = 0.20

def calculate_investment(row):
    # Calculate Disposable Income
    disposable_income = row['annual_income'] * (1 - row['dti'] / 100)
    
    if row['credit_scr'] > 700:
        adjusted_savings_rate = base_savings_rate + 0.05
    elif 650 <= row['credit_scr'] <= 700:
        adjusted_savings_rate = base_savings_rate + 0.025
    else:
        adjusted_savings_rate = base_savings_rate
    
    # Calculate Investment Amount
    investment_amount = disposable_income * adjusted_savings_rate
    return investment_amount

# Apply calculation
df['calculated_investment'] = df.apply(calculate_investment, axis=1)

dt=df[['annual_income', 'credit_scr', 'calculated_investment']]
dt

Unnamed: 0,annual_income,credit_scr,calculated_investment
0,85000.000385,737,17110.500077
1,65000.000073,707,13927.875016
2,31999.999943,682,6362.639989
3,85000.000385,712,19528.750088
4,80799.999636,667,15458.453930
...,...,...,...
9573,195000.000479,672,39316.387596
9574,68999.999753,722,17213.774938
9575,39999.999876,687,7821.899976
9576,49999.999779,692,9092.249960


In [5]:
X = dt[['annual_income', 'credit_scr']]
y = dt['calculated_investment']

# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Defining the models
ridge = Ridge()
xgboost_model = xgb.XGBRegressor()
decision_tree = DecisionTreeRegressor()
svr = SVR()

# Training the models
ridge.fit(X_train, y_train)
xgboost_model.fit(X_train, y_train)
decision_tree.fit(X_train, y_train)
svr.fit(X_train, y_train)

# Making predictions
ridge_preds = ridge.predict(X_test)
xgboost_preds = xgboost_model.predict(X_test)
decision_tree_preds = decision_tree.predict(X_test)
svr_preds = svr.predict(X_test)

# Evaluating the models
ridge_mse = mean_squared_error(y_test, ridge_preds)
ridge_r2 = r2_score(y_test, ridge_preds)

xgboost_mse = mean_squared_error(y_test, xgboost_preds)
xgboost_r2 = r2_score(y_test, xgboost_preds)

decision_tree_mse = mean_squared_error(y_test, decision_tree_preds)
decision_tree_r2 = r2_score(y_test, decision_tree_preds)

svr_mse = mean_squared_error(y_test, svr_preds)
svr_r2 = r2_score(y_test, svr_preds)

# Creating a DataFrame to store the results
results = pd.DataFrame({
    'Model': ['Ridge Regression', 'XGBoost', 'Decision Tree', 'SVR'],
    'MSE': [ridge_mse, xgboost_mse, decision_tree_mse, svr_mse],
    'R2 Score': [ridge_r2, xgboost_r2, decision_tree_r2, svr_r2]
})

# Finding the best model based on MSE
best_model = results.loc[results['MSE'].idxmin(), 'Model']

print("The best model is:", best_model)
results

The best model is: Ridge Regression


Unnamed: 0,Model,MSE,R2 Score
0,Ridge Regression,2778526.0,0.989646
1,XGBoost,7159654.0,0.973319
2,Decision Tree,8189260.0,0.969482
3,SVR,261138000.0,0.026853


In [8]:
annual_income = float(input("Enter annual income: "))
credit_score = int(input("Enter credit score: "))

new_data = pd.DataFrame({
    'annual_income': [annual_income],
    'credit_scr': [credit_score]
})

predicted_investment = ridge.predict(new_data[['annual_income', 'credit_scr']])

new_data['predicted_investment'] = predicted_investment

print("\nPredicted Investment:")
new_data[['annual_income', 'credit_scr', 'predicted_investment']]

Enter annual income: 1200000
Enter credit score: 750

Predicted Investment:


Unnamed: 0,annual_income,credit_scr,predicted_investment
0,1200000.0,750,272110.321189


In [9]:
272110.321189/12

22675.860099083333