In [1]:
import numpy as np
import pandas as pd

In [2]:
placement_df = pd.read_csv('../data/placement.csv')

In [3]:
from sklearn.model_selection import train_test_split

X = placement_df.iloc[:, :-1]
y = placement_df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
placement_df.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [5]:
X_train.shape

(160, 1)

In [6]:
y_train.shape

(160,)

- Formulas
- m = (Î£(x - x.mean)(y - y.mean))/(x - x.mean) ^ 2
- b = y - m * X.mean 

In [7]:
class CustomLR:

    def __init__(self):
        self.m = None
        self.b = None

    def fit(self, X_train, y_train):

        X_train = np.array(X_train).flatten()
        y_train = np.array(y_train).flatten()

        num = 0
        den = 0

        for i in range(X_train.shape[0]):

            num = num + ((X_train[i] - X_train.mean()) * (y_train[i] - y_train.mean()))
            den = den + ((X_train[i] - X_train.mean()) * (X_train[i] - X_train.mean()))

        self.m = num / den
        self.b = y_train.mean() - self.m * X_train.mean()

        # print(self.m)
        # print(self.b)

    def predict(self, X_test):
        X_test = np.array(X_test).flatten()
        return self.m * X_test + self.b

In [8]:
from sklearn.metrics import r2_score

cus_lr = CustomLR()

cus_lr.fit(X_train, y_train)

ycus_pred = cus_lr.predict(X_test)

print(r2_score(y_test, ycus_pred))

0.7730984312051673


In [9]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()

lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

print(r2_score(y_test, y_pred))

0.7730984312051673
