In [46]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [50]:
# Export data to CSV
data=pd.read_csv('JAVA_CAC2_Project_data.csv')

In [51]:
data.head()

Unnamed: 0,Monthly Income,Monthly Expenses,Existing Savings,Debt Obligations,Risk Factors,Household Size,Emergency Funds
0,88268,28118,64707,2156,2,4,101736.201076
1,63567,16540,82214,907,2,5,91697.678596
2,62613,13418,155062,5026,0,2,142498.053194
3,65891,18778,63195,91,2,4,83421.037108
4,41243,31430,164382,1841,0,5,143975.890219


In [52]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 7 columns):
Monthly Income      500 non-null int64
Monthly Expenses    500 non-null int64
Existing Savings    500 non-null int64
Debt Obligations    500 non-null int64
Risk Factors        500 non-null int64
Household Size      500 non-null int64
Emergency Funds     500 non-null float64
dtypes: float64(1), int64(6)
memory usage: 27.5 KB


In [53]:
data.describe()

Unnamed: 0,Monthly Income,Monthly Expenses,Existing Savings,Debt Obligations,Risk Factors,Household Size,Emergency Funds
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,60905.066,24967.344,123571.022,4946.622,1.052,3.014,120906.396466
std,23418.912047,8748.29981,44078.49886,2894.2935,0.816474,1.404903,36827.278117
min,20093.0,10062.0,50739.0,43.0,0.0,1.0,37943.543935
25%,40174.0,17401.0,84892.5,2534.0,0.0,2.0,90109.73356
50%,61597.0,24556.5,123731.0,4879.0,1.0,3.0,121828.46878
75%,81725.0,32960.25,162368.5,7400.0,2.0,4.0,148426.190783
max,99983.0,39946.0,199925.0,9993.0,2.0,5.0,218405.972201


In [54]:
# Split the data into training and testing sets
X = data.drop(columns=['Emergency Funds'])
y = data['Emergency Funds']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [55]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [56]:
# Train the linear regression model
regression_model = LinearRegression()
regression_model.fit(X_train_scaled, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [57]:
# Make predictions on the testing set
y_pred = regression_model.predict(X_test_scaled)

In [58]:
# Calculate R-squared score
r2 = r2_score(y_test, y_pred)
print(f"R-squared Score: {r2}")

R-squared Score: 0.915633545727743


In [61]:
# Function to get user input and predict emergency fund
def predict_emergency_fund():
    # Get user input for features
    monthly_income = float(input("Enter Monthly Income: "))
    monthly_expenses = float(input("Enter Monthly Expenses: "))
    existing_savings = float(input("Enter Existing Savings: "))
    debt_obligations = float(input("Enter Debt Obligations: "))
    risk_factor = input("Enter Risk Factor (Low/Medium/High): ")
    household_size = int(input("Enter Household Size: "))
    
    # Encode risk factor
    risk_factor_encoder = {'Low': 0, 'Medium': 1, 'High': 2}
    risk_factor_encoded = risk_factor_encoder.get(risk_factor.capitalize(), None)
    
    if risk_factor_encoded is None:
        print("Invalid Risk Factor. Please enter Low, Medium, or High.")
        return
    
    # Prepare input data for prediction
    user_input = pd.DataFrame({
        'Monthly Income': [monthly_income],
        'Monthly Expenses': [monthly_expenses],
        'Existing Savings': [existing_savings],
        'Debt Obligations': [debt_obligations],
        'Risk Factors': [risk_factor_encoded],
        'Household Size': [household_size]
    })
    
    # Scale input data
    user_input_scaled = scaler.transform(user_input)
    
    # Predict emergency fund
    predicted_emergency_fund = regression_model.predict(user_input_scaled)[0]
    print(f"Predicted Emergency Fund: {predicted_emergency_fund:.2f}")
    
    return predicted_emergency_fund

# Call the function to predict emergency fund based on user input
predicted_emergency_fund = predict_emergency_fund()

# Function to suggest insurance type based on predicted emergency funds
def suggest_insurance_type(predicted_emergency_fund):
    if predicted_emergency_fund >= 15000:
        return 'Comprehensive Health Insurance'
    elif predicted_emergency_fund >= 10000:
        return 'Standard Health Insurance'
    elif predicted_emergency_fund >= 5000:
        return 'Basic Health Insurance'
    else:
        return 'No Health Insurance Needed'

# Call the function to suggest insurance type based on predicted emergency funds
insurance_type = suggest_insurance_type(predicted_emergency_fund)
print(f"Predicted Insurance Type: {insurance_type}")


Enter Monthly Income: 24000
Enter Monthly Expenses: 12000
Enter Existing Savings: 34000
Enter Debt Obligations: 2000
Enter Risk Factor (Low/Medium/High): Low
Enter Household Size: 4
Predicted Emergency Fund: 37371.03
Predicted Insurance Type: Comprehensive Health Insurance
