# Final Model

In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# ===============================
# TRAINING PHASE: Create Synthetic Dataset and Train Model
# ===============================

np.random.seed(42)
n_samples = 200

# --- Personal Financial Data (varying across training samples) ---
average_monthly_income = np.random.randint(3000, 15000, n_samples)
average_monthly_expenses = np.random.randint(1000, 8000, n_samples)
total_assets_value = np.random.randint(1000, 50000, n_samples)
target_savings = np.random.randint(5000, 50000, n_samples)
time_horizon = np.random.choice([6, 12, 18, 24], n_samples)

# --- Loan Option Details ---
min_amount = np.random.randint(1000, 10000, n_samples)
max_amount = min_amount + np.random.randint(1000, 15000, n_samples)
interest_rate = np.random.uniform(5, 25, n_samples)
term_months = np.random.choice([6, 12, 18, 24, 36], n_samples)
# Loan names
loan_names = np.random.choice(
    ['Starter Loan', 'AgriFlex', 'Business Boost', 'SmartSaver', 'GreenGrowth', 'HomeEase', 'QuickFund', 'EduPlan'],
    n_samples
)

# --- Derived Calculations ---
# Loan amount is set as the midpoint between min and max amounts:
loan_amount = (min_amount + max_amount) / 2

# Estimate monthly loan payment using a simple approximation:
monthly_loan_payment = loan_amount * (interest_rate / 100) / 12

# Calculate monthly surplus:
monthly_surplus = average_monthly_income - average_monthly_expenses

# Calculate achievable savings:
achievable_savings = (monthly_surplus - monthly_loan_payment) * time_horizon + loan_amount

# Savings margin is the difference between achievable savings and target savings:
savings_margin = achievable_savings - target_savings

# The regression target will be the continuous savings margin:
y = savings_margin

# Assemble the training features into a DataFrame
df_train = pd.DataFrame({
    'average_monthly_income': average_monthly_income,
    'average_monthly_expenses': average_monthly_expenses,
    'total_assets_value': total_assets_value,
    'interest_rate': interest_rate,
    'min_amount': min_amount,
    'max_amount': max_amount,
    'term_months': term_months,
    'target_savings': target_savings,
    'time_horizon': time_horizon,
    "loan_names": loan_names,
})

print("Training DataFrame (first 5 rows):")
df_train.head()





Training DataFrame (first 5 rows):


Unnamed: 0,average_monthly_income,average_monthly_expenses,total_assets_value,interest_rate,min_amount,max_amount,term_months,target_savings,time_horizon,loan_names
0,10270,4445,1699,22.891045,1055,9251,24,29052,18,AgriFlex
1,3860,4743,1190,20.997105,9717,22442,12,43513,18,EduPlan
2,8390,6727,11492,13.50427,9150,21709,6,31092,6,EduPlan
3,8191,2495,36743,5.449386,3125,7493,6,16338,6,AgriFlex
4,14964,4304,7102,10.373547,1364,6161,36,5412,6,SmartSaver


In [6]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# --- Preprocessing: One-Hot Encoding for loan_names ---
# Convert loan_names to one-hot encoded columns
column_transformer = ColumnTransformer(
    transformers=[
        ('loan_names', OneHotEncoder(), ['loan_names'])  # Apply OneHotEncoder to loan_names column
    ], 
    remainder='passthrough'  # Keep the rest of the columns as they are
)

# --- Define the model pipeline ---
pipeline = Pipeline(steps=[
    ('preprocessor', column_transformer),  # Preprocess the data
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))  # Train using RandomForest
])



y = df_train['target_savings']
X = df_train.drop(columns=['target_savings'])


# --- Train the model ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)

# --- Evaluate the model ---
y_pred = pipeline.predict(X_test)

# --- Calculate Metrics ---
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")


Mean Absolute Error: 14094.752000000002
Mean Squared Error: 263681516.01436996
R-squared: -0.24884435895993584


In [7]:
# ===============================
# PREDICTION PHASE: Using the Trained Model
# ===============================

# --- Step 1: Define the Constant User Financial Profile ---
user_profile = {
    'total_assets_value': 25000,
    'average_monthly_income': 12000,
    'average_monthly_expenses': 4000,
    'target_savings': 30000,
    'time_horizon': 12  # Example: 12 months
}

# --- Step 2: Define Multiple Loan Options to Evaluate ---
# Each row represents one loan option candidate.
loan_options = pd.DataFrame({
    'min_amount': [5000, 7000, 6000],
    'interest_rate': [10.0, 12.5, 11.0],
    'max_amount': [8000, 10000, 9000],
    'term_months': [12, 24, 18],
    'loan_names': ['Business Boost', 'SmartSaver', 'AgriFlex']  # Example loan names
})

# --- Step 3: Calculate the Derived Loan Amount and Monthly Payment ---
loan_options['loan_amount'] = (loan_options['min_amount'] + loan_options['max_amount']) / 2
loan_options['monthly_loan_payment'] = loan_options['loan_amount'] * (loan_options['interest_rate'] / 100) / 12

# --- Step 4: Calculate Monthly Surplus ---
monthly_surplus = user_profile['average_monthly_income'] - user_profile['average_monthly_expenses']

# --- Step 5: Calculate Achievable Savings ---
loan_options['achievable_savings'] = (monthly_surplus - loan_options['monthly_loan_payment']) * user_profile['time_horizon'] + loan_options['loan_amount']

# --- Step 6: Calculate Savings Margin ---
loan_options['savings_margin'] = loan_options['achievable_savings'] - user_profile['target_savings']

# --- Step 7: Prepare the Prediction Data ---
# Create a DataFrame of user profile information and loan options
X_predict = pd.DataFrame({
    'total_assets_value': [user_profile['total_assets_value']] * len(loan_options),
    'average_monthly_income': [user_profile['average_monthly_income']] * len(loan_options),
    'average_monthly_expenses': [user_profile['average_monthly_expenses']] * len(loan_options),
    'interest_rate': loan_options['interest_rate'],
    'min_amount': loan_options['min_amount'],
    'max_amount': loan_options['max_amount'],
    'term_months': loan_options['term_months'],
    'target_savings': [user_profile['target_savings']] * len(loan_options),
    'time_horizon': [user_profile['time_horizon']] * len(loan_options),
    'loan_names': loan_options['loan_names']
})

# --- Step 8: Preprocess and Make Predictions ---
# Use the trained pipeline to predict the savings margin
predicted_savings_margin = pipeline.predict(X_predict)

# --- Step 9: Display the Predictions ---
loan_options['predicted_savings_margin'] = predicted_savings_margin
print("Loan Options with Predicted Savings Margin:")
print(loan_options[['loan_names', 'predicted_savings_margin']])


Loan Options with Predicted Savings Margin:
       loan_names  predicted_savings_margin
0  Business Boost                  24899.04
1      SmartSaver                  23501.90
2        AgriFlex                  26316.77
