In [1]:
# task5
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import PolynomialFeatures

In [3]:
df = pd.read_csv('loan dataset_cleaned.csv')

In [4]:
df.fillna(0, inplace=True) 

In [5]:
features = ['Balance', 'Interest_Rate', 'Loan_Amount', 'EMI_Amount', 
            'cibil_score', 'Salary', 'Age', 'income', 'tenure', 'loan_amount']
target = 'Loan_Amount'

In [6]:
X = df[features]
y = df[target]


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)

linear_mse = mean_squared_error(y_test, y_pred_linear)
linear_rmse = np.sqrt(linear_mse)
linear_mae = mean_absolute_error(y_test, y_pred_linear)
linear_r2 = r2_score(y_test, y_pred_linear)

print("\n--- Linear Regression Performance ---")
print(f"Mean Squared Error (MSE): {linear_mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {linear_rmse:.4f}")
print(f"Mean Absolute Error (MAE): {linear_mae:.4f}")
print(f"R-Squared (R²): {linear_r2:.4f}")




--- Linear Regression Performance ---
Mean Squared Error (MSE): 0.0000
Root Mean Squared Error (RMSE): 0.0000
Mean Absolute Error (MAE): 0.0000
R-Squared (R²): 1.0000


In [9]:
poly = PolynomialFeatures(degree=2) 
X_train_poly = poly.fit_transform(X_train[['Balance']])  
X_test_poly = poly.transform(X_test[['Balance']])

poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)
y_pred_poly = poly_model.predict(X_test_poly)

poly_mse = mean_squared_error(y_test, y_pred_poly)
poly_r2 = r2_score(y_test, y_pred_poly)

print("\n--- Polynomial Regression Performance ---")
print(f"Polynomial Regression (Degree 2) -> MSE: {poly_mse:.4f}, R²: {poly_r2:.4f}")


--- Polynomial Regression Performance ---
Polynomial Regression (Degree 2) -> MSE: 722338710387.2926, R²: 0.0099


In [10]:
coefficients = linear_model.coef_
coefficients_df = pd.DataFrame({'Feature': features, 'Coefficient': coefficients})

coefficients_df = coefficients_df.sort_values(by='Coefficient', ascending=False)

print("\n--- Feature Importance (Linear Model) ---")
print(coefficients_df)



--- Feature Importance (Linear Model) ---
         Feature   Coefficient
2    Loan_Amount  1.000000e+00
1  Interest_Rate  1.871481e-10
4    cibil_score  1.953539e-13
3     EMI_Amount  1.165734e-15
0        Balance -4.339632e-17
7         income -9.714451e-17
5         Salary -1.110223e-16
9    loan_amount -1.110223e-16
6            Age -6.416343e-13
8         tenure -3.563224e-12
