In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('credit.csv')

print(data.head())

X = data[[ 'Income','Limit', 'Rating', 'Cards', 'Age', 'Education']].values

y = data['Balance'].values

    Income  Limit  Rating  Cards  Age  Education  Balance
0   14.891   3606     283      2   34         11      333
1  106.025   6645     483      3   82         15      903
2  104.593   7075     514      4   71         11      580
3  148.924   9504     681      3   36         11      964
4   55.882   4897     357      2   68         16      331


In [3]:
X

array([[1.48910e+01, 3.60600e+03, 2.83000e+02, 2.00000e+00, 3.40000e+01,
        1.10000e+01],
       [1.06025e+02, 6.64500e+03, 4.83000e+02, 3.00000e+00, 8.20000e+01,
        1.50000e+01],
       [1.04593e+02, 7.07500e+03, 5.14000e+02, 4.00000e+00, 7.10000e+01,
        1.10000e+01],
       ...,
       [5.78720e+01, 4.17100e+03, 3.21000e+02, 5.00000e+00, 6.70000e+01,
        1.20000e+01],
       [3.77280e+01, 2.52500e+03, 1.92000e+02, 1.00000e+00, 4.40000e+01,
        1.30000e+01],
       [1.87010e+01, 5.52400e+03, 4.15000e+02, 5.00000e+00, 6.40000e+01,
        7.00000e+00]])

In [4]:
y

array([ 333,  903,  580,  964,  331, 1151,  203,  872,  279, 1350, 1407,
          0,  204, 1081,  148,    0,    0,  368,  891, 1048,   89,  968,
          0,  411,    0,  671,  654,  467, 1809,  915,  863,    0,  526,
          0,    0,  419,  762, 1093,  531,  344,   50, 1155,  385,  976,
       1120,  997, 1241,  797,    0,  902,  654,  211,  607,  957,    0,
          0,  379,  133,  333,  531,  631,  108,    0,  133,    0,  602,
       1388,  889,  822, 1084,  357, 1103,  663,  601,  945,   29,  532,
        145,  391,    0,  162,   99,  503,    0,    0, 1779,  815,    0,
        579, 1176, 1023,  812,    0,  937,    0,    0, 1380,  155,  375,
       1311,  298,  431, 1587, 1050,  745,  210,    0,    0,  227,  297,
         47,    0, 1046,  768,  271,  510,    0, 1341,    0,    0,    0,
        454,  904,    0,    0,    0, 1404,    0, 1259,  255,  868,    0,
        912, 1018,  835,    8,   75,  187,    0, 1597, 1425,  605,  669,
        710,   68,  642,  805,    0,    0,    0,  5

In [5]:
class MLR:
  def __init__(self):
    self.coef = None
    self.intercept=None
  def fit(self,X_train,y_train):
    X_train = np.insert(X_train,0,1,axis=1)

    betas=np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train)
    self.intercept = betas[0]
    self.coef =betas[1:]

  def predict(self,X_test):
    y_pred = np.dot(X_test,self.coef) + self.intercept
    return y_pred


In [6]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [7]:
mlr=MLR()

In [8]:
mlr.fit(X_train,y_train)


In [9]:
X_train.shape

(320, 6)

In [10]:
y_train.shape

(320,)

In [11]:
X_test.shape

(80, 6)

In [13]:

y_pred=mlr.predict(X_test)

In [14]:
y_pred.shape

(80,)

In [15]:
mlr.coef

array([-7.61146433,  0.11606681,  2.208801  , 13.01792739, -0.95182104,
        1.87314114])

In [16]:
mlr.intercept

-477.09824186260346

In [17]:
mse = np.mean((y_test - y_pred) ** 2)
rmse = np.sqrt(mse)
r_squared = 1 - (np.sum((y_test - y_pred) ** 2) / np.sum((y_test - np.mean(y_test)) ** 2))

print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared (R^2) score:", r_squared)

Mean Squared Error (MSE): 22509.774790793454
Root Mean Squared Error (RMSE): 150.0325790979861
R-squared (R^2) score: 0.8857803994520866


In [18]:
X2 = data[[ 'Income', 'Rating', 'Cards', 'Age', 'Education']].values

y2 = data['Balance'].values

X_train2,X_test2,y_train2,y_test2 = train_test_split(X2,y2,test_size=0.2,random_state=2)
mlr2=MLR()
mlr2.fit(X_train2,y_train2)
y_pred2=mlr2.predict(X_test2)

mlr2.intercept

-518.3648649492288

In [19]:
mlr2.coef

array([-7.56803953,  3.93258227,  4.51957773, -0.97760584,  2.15972987])

In [20]:
mse2 = np.mean((y_test2 - y_pred2) ** 2)
rmse2 = np.sqrt(mse)
r_squared2 = 1 - (np.sum((y_test2 - y_pred2) ** 2) / np.sum((y_test2 - np.mean(y_test2)) ** 2))

print("Mean Squared Error (MSE):", mse2)
print("Root Mean Squared Error (RMSE):", rmse2)
print("R-squared (R^2) score:", r_squared2)

Mean Squared Error (MSE): 23045.682028574218
Root Mean Squared Error (RMSE): 150.0325790979861
R-squared (R^2) score: 0.8830610870112048


In [22]:


data['Rating_Age'] = data['Rating'] * data['Age']

X1= data[['Income', 'Rating', 'Cards', 'Age', 'Education', 'Rating_Age']].values
y1 = data['Balance'].values
X_train1,X_test1,y_train1,y_test1 = train_test_split(X1,y1,test_size=0.2,random_state=2)





In [23]:
mlr1=MLR()
mlr1.fit(X_train1,y_train1)
y_pred1=mlr1.predict(X_test1)

In [25]:
mlr1.coef

array([-7.54413535e+00,  3.99638055e+00,  4.39895369e+00, -5.77780006e-01,
        2.19583714e+00, -1.12052284e-03])

In [26]:
mlr1.intercept

-542.0295652291595

In [24]:
mse1 = np.mean((y_test1 - y_pred1) ** 2)
rmse1 = np.sqrt(mse1)
r_squared1 = 1 - (np.sum((y_test1 - y_pred1) ** 2) / np.sum((y_test1 - np.mean(y_test1)) ** 2))

print("Mean Squared Error (MSE):", mse1)
print("Root Mean Squared Error (RMSE):", rmse1)
print("R-squared (R^2) score:", r_squared1)

Mean Squared Error (MSE): 22926.183192536282
Root Mean Squared Error (RMSE): 151.4139464928389
R-squared (R^2) score: 0.8836674506663301
