In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR

## Importing the dataset

In [None]:
dataset = pd.read_csv('loan_samp1.csv')
X = dataset.iloc[:, 1:-2].values 
y = dataset.iloc[:, -2].values

In [None]:
dataset

Unnamed: 0,Loan_ID,Gender,Age,Married,No_Of_Dependents,Edu_Qualification,Employment_Status,Applicant_Annual_Income,Co_Applicant_Annual_Income,Applicant_Credit_History,Credit_Rating,Existing_No_of_Loans,Loan_Amount_Availed,Car Purchased
0,55769,M,59,No,1,Graduate,Priv Sector,2314491,219621,571,Average,1,23144.92,Hyundai i10
1,13302,M,57,No,1,Graduate,Agriculturist,1074953,0,770,Excellent,1,107495.38,Hyundai i10
2,23921,F,37,Yes,2,Not Graduate,Not-Employed,2232122,1095821,560,Average,1,223212.23,Tata Indigo
3,86901,M,32,No,3,Not Graduate,Agriculturist,1598704,0,774,Excellent,0,559546.46,Mahinddra Bolero
4,94393,F,61,Yes,2,Not Graduate,Agriculturist,1719946,0,759,Excellent,0,429986.55,Hyundai i10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,41138,F,48,No,3,Graduate,Not-Employed,1980231,790447,656,Good,0,297034.74,Maruti Suzuki Desire
996,92291,F,59,Yes,3,Not Graduate,Agriculturist,2071673,0,762,Excellent,0,517918.44,Tata Indigo
997,73340,F,18,Yes,3,Not Graduate,Not-Employed,1498143,1255224,540,Poor,1,0.00,No Vehicle
998,65542,M,24,No,3,Not Graduate,Self-Employed,811224,863150,551,Average,1,121683.66,Tata Indigo


In [None]:
print(X)

[['M' 59 'No' ... 571 'Average' 1]
 ['M' 57 'No' ... 770 'Excellent' 1]
 ['F' 37 'Yes' ... 560 'Average' 1]
 ...
 ['F' 18 'Yes' ... 540 'Poor' 1]
 ['M' 24 'No' ... 551 'Average' 1]
 ['F' 34 'No' ... 558 'Average' 0]]


In [None]:
print(X[0])

['M' 59 'No' 1 'Graduate' 'Priv Sector' 2314491 219621 571 'Average' 1]


In [None]:
print(y)

[  23144.92  107495.38  223212.23  559546.46  429986.55  129865.43
   87561.88  136702.6   116183.25  647466.88  152996.73       0.
   37606.86  181470.09    8058.38  547153.28   18503.51  106670.63
  575037.31  109780.98       0.    143965.55       0.    218262.5
  441485.1    39491.33  260041.43  191486.58       0.         0.
  184669.06  205622.37  101314.8  1132340.02  530346.15  281072.26
  412244.47  564017.91  435218.43  859867.89  831390.48  899595.9
  388020.35       0.    208675.16  197274.47  200620.26       0.
  148792.72       0.    320717.94  188868.08  104773.69  228392.41
       0.         0.    305485.04       0.    483029.16  148872.14
       0.   1139035.59  196612.56  535650.28  218833.49  867834.22
  772898.02  172468.23  313530.15  225577.95       0.     80331.11
       0.    870373.28  354380.99   37741.88       0.     21117.56
       0.    475136.36  295505.85   93044.55       0.     44414.7
  449877.27       0.    295068.39  552528.85  303569.76  726705.63
  50

In [None]:
# ['M' 59 'No' 1 'Graduate' 'Priv Sector' 2314491 219621 571 'Average' 1]

ct = ColumnTransformer(
    [
        ("scaling", StandardScaler(), [1, 3, 6, 7, 8, 10]),
        ("onehot", OneHotEncoder(sparse=False), [0, 2, 4, 5, 9]),
    ]
)

X = np.array(ct.fit_transform(X))



In [None]:
print(X)

[[ 1.2818569  -0.47504504  0.42039914 ...  0.          0.
   0.        ]
 [ 1.13694882 -0.47504504 -0.68656251 ...  1.          0.
   0.        ]
 [-0.31213201  0.42980266  0.34684002 ...  0.          0.
   0.        ]
 ...
 [-1.68875879  1.33465036 -0.30863533 ...  0.          0.
   1.        ]
 [-1.25403455  1.33465036 -0.92208405 ...  0.          0.
   0.        ]
 [-0.52949413 -1.37989274 -0.08274885 ...  0.          0.
   0.        ]]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
y = y.reshape(len(y),1)

In [None]:
sc_X = StandardScaler()
sc_y = StandardScaler()
X = sc_X.fit_transform(X)
y = sc_y.fit_transform(y)

In [None]:
regressor = SVR(kernel = 'rbf')
regressor.fit(X, y)

  y = column_or_1d(y, warn=True)


In [None]:
y_pred = sc_y.inverse_transform(regressor.predict(X_test).reshape(-1,1))

In [None]:
r2 = r2_score(y_test, y_pred)
print('R² score:', r2)

R² score: 0.9823565533047057


In [None]:
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[ 6.22e+05  7.20e+05]
 [ 1.34e+04  0.00e+00]
 [ 1.15e+05  1.20e+05]
 [ 8.72e+04  1.08e+05]
 [ 1.92e+03  0.00e+00]
 [ 5.26e+05  5.23e+05]
 [ 1.95e+05  1.91e+05]
 [ 1.89e+05  2.01e+05]
 [ 4.34e+05  4.09e+05]
 [ 2.73e+04  0.00e+00]
 [ 4.79e+05  5.11e+05]
 [ 6.43e+05  6.10e+05]
 [ 3.14e+05  2.84e+05]
 [ 4.46e+05  4.33e+05]
 [-1.15e+04  0.00e+00]
 [ 2.08e+03  0.00e+00]
 [ 3.37e+05  3.25e+05]
 [ 1.21e+04  0.00e+00]
 [-5.64e+03  8.06e+03]
 [ 3.86e+04  2.11e+04]
 [ 2.26e+05  2.06e+05]
 [ 2.18e+04  1.69e+04]
 [ 8.59e+04  8.95e+04]
 [ 2.43e+05  2.13e+05]
 [ 5.40e+05  5.10e+05]
 [ 2.47e+05  2.39e+05]
 [ 2.13e+05  1.97e+05]
 [ 2.91e+05  2.78e+05]
 [-9.35e+02  0.00e+00]
 [ 8.82e+04  1.07e+05]
 [ 1.22e+05  9.47e+04]
 [ 4.15e+05  3.78e+05]
 [ 3.25e+05  3.15e+05]
 [ 1.79e+05  1.95e+05]
 [-1.95e+03  2.12e+04]
 [ 1.91e+05  2.19e+05]
 [ 2.90e+05  3.16e+05]
 [ 5.35e+05  5.57e+05]
 [ 8.35e+04  8.29e+04]
 [ 5.54e+05  5.14e+05]
 [ 4.39e+05  4.44e+05]
 [ 5.01e+05  5.18e+05]
 [ 1.49e+05  1.51e+05]
 [ 1.84e+05

In [None]:
# Fit a polynomial regression model to the training data
poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train)
poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)

In [None]:
# Predict the values of y for the testing data using the model
X_test_poly = poly_features.transform(X_test)
y_pred = poly_model.predict(X_test_poly)

In [None]:
# Evaluate the performance of the model
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("RMSE:", rmse)
print("R² score:", r2)

RMSE: 46291.79683239393
R² score: 0.9703013316360823


In [None]:
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[ 644450.25  719676.46]
 [  19938.25       0.  ]
 [ 131570.25  120031.13]
 [ 105090.25  107813.25]
 [ -22589.75       0.  ]
 [ 541570.25  522939.45]
 [ 222370.25  191486.58]
 [ 207570.25  201265.26]
 [ 412130.25  409407.33]
 [  32706.25       0.  ]
 [ 473346.25  510588.08]
 [ 631266.25  609933.06]
 [ 281698.25  284024.39]
 [ 452514.25  433425.32]
 [  12626.25       0.  ]
 [ -28381.75       0.  ]
 [ 328946.25  324512.48]
 [ -72797.75       0.  ]
 [  -1501.75    8058.38]
 [   7842.25   21117.56]
 [ 246978.25  205622.37]
 [  59426.25   16880.53]
 [  79458.25   89481.68]
 [ 257714.25  213403.38]
 [ 542866.25  510356.04]
 [ 229634.25  239438.94]
 [ 233298.25  197274.47]
 [ 268066.25  277723.22]
 [ -88221.75       0.  ]
 [  99330.25  107495.38]
 [ 144130.25   94723.93]
 [ 388386.25  377721.02]
 [ 313618.25  315485.  ]
 [ 197866.25  194775.47]
 [ -14253.75   21207.45]
 [ 182546.25  219240.79]
 [ 262322.25  315526.3 ]
 [ 607986.25  557193.72]
 [  64290.25   82893.94]
 [ 628538.25  514148.56]
