In [1]:
import pandas as pd
import numpy as np

class LinearRegression:
    def __init__(self, max_iteration = 10000, max_mse = None, patience = 5,learning_rate = 0.001,threshold = 10):
        self.max_iteration=max_iteration
        self.max_mse = max_mse
        self.patience = patience
        self.learning_rate = learning_rate
        self.threshold = threshold
        return 

    def fit(self, X, Y):
        if len(X)!=len(Y):
            raise ValueError("Data and Label Size Must Be Same")
        if isinstance(X,pd.Series):
            X = X.to_frame()
        Y = Y.to_frame()
        self.n = len(X)
        self.coeff = [0 for _ in range(len(X.columns))]
        self.intercept = 0
        self.mse=[]
        self.n_iteration = 0
        X = X.values
        Y = Y.values
        while(True):
            y_pred = np.sum(X*self.coeff,axis=1) + self.intercept
            y_pred = y_pred.reshape(self.n,1)
            current_mse = np.square(np.subtract(Y,y_pred)).mean()
            self.mse.append(current_mse)
            Dm = -2*(X*(Y-y_pred).reshape(self.n,1)).mean(axis=0)
            Dc = -2*(Y-y_pred).mean()
            self.coeff = self.coeff - Dm*self.learning_rate
            self.intercept = self.intercept - Dc*self.learning_rate
            self.n_iteration = self.n_iteration + 1
            if self.max_mse==None:
                if self.n_iteration >= self.max_iteration:
                    break
            else:
                if self.check_for_break():
                    break
        
    def check_for_break(self):
        if abs(self.mse[-1])<=self.max_mse:
            return True
        elif len(self.mse)<self.patience:
             return False
        else:
            mse0 = self.mse[-self.patience]
            mse1 = self.mse[-1]
            if abs(abs(mse0)-abs(mse1))<=self.threshold:
                return True
            else:
                return False

    def predict(self,X):
        if isinstance(X,list):
            X = [X]
        if isinstance(X,pd.Series):
            X = X.to_frame()
        if isinstance(X,pd.DataFrame):
            X = X.values
        n = len(X)
        return (np.sum(X*self.coeff,axis=1) + self.intercept).reshape(n,1)

In [2]:
X = [np.random.uniform(0,5) for _ in range(100)]
X1 = [np.random.uniform(0,5) for _ in range(100)]
Y = [np.random.uniform(1000,5000) for _ in range(100)]
df=pd.DataFrame({"X":X,"X1":X1,"class":Y})
y_train = df['class']
x_train = df.drop(['class'],axis=1)

In [3]:
from sklearn.model_selection import train_test_split
df = pd.read_csv("MLData.csv")
columns = ["Timestamp","Level","Class","Scale","Gender","Age","Residence","RelationshipStatus","FinanceState","CopeWithInstitute","RelationWithFamily","Pressure","AcademicResult","LivingPlace","SupportedBy","SocialMediaIn6","InferiorityComplex","MealSatisfaction","Health","OtherPositiveActivity","SleepTime"]
df.columns = columns
df = df.sample(frac=1).reset_index(drop=True)
Class_Status = df["Class"]
Class_Scale = df["Scale"]
df.drop(["Class","Scale", "Timestamp"], axis=1, inplace=True)
columns = ["Level","Gender","Residence","RelationshipStatus","FinanceState","CopeWithInstitute","RelationWithFamily","Pressure","AcademicResult","LivingPlace","SupportedBy","SocialMediaIn6","InferiorityComplex","MealSatisfaction","Health","OtherPositiveActivity"]
df_Enc = pd.get_dummies(df, columns=columns)
x_train, x_test, y_train, y_test= train_test_split(df_Enc, Class_Scale, test_size= 0.2, random_state=1) 

In [4]:
lr = LinearRegression(max_iteration=1000,learning_rate=0.001)
lr.fit(x_train, y_train)

In [5]:
from sklearn.metrics import r2_score, mean_squared_error
y_pred = lr.predict(x_train)
train_r2 = r2_score(y_train, y_pred)
print("Training R2 Score: {}".format(train_r2))
train_mse = mean_squared_error(y_train,y_pred)
print("Training mse Score: {}".format(train_mse))

y_pred = lr.predict(x_test)
test_r2 = r2_score(y_test, y_pred)
print("Testing r2 Score: {}".format(test_r2))
test_mse = mean_squared_error(y_test,y_pred)
print("Testing mse Score: {}".format(test_mse))


print(lr.coeff)
print(lr.intercept)

Training R2 Score: 0.5826471131124226
Training mse Score: 346.50706064814904
Testing r2 Score: 0.6495406990028209
Testing mse Score: 268.9328164111436
[ 1.81976418  0.59513472 -0.82525936  1.54450654 -1.08022666  0.42246914
  0.06148966 -0.12266002 -2.75470665  2.93885632  0.31663785 -0.25514819
 -1.95942669  2.02091635 -0.31581309 -3.53009061 -4.71840864  8.24802398
  0.37777803 -0.36057478  0.42206444  3.0644485  -3.00295884 -4.49538284
 -0.34964196  4.90651447  0.4658092  -0.40431954  0.37777803 -0.24198252
 -0.07430585  1.07017528 -1.00868562  4.57102094 -4.50953128  0.17208406
  0.79018046 -0.90077486  3.0638027  -3.00231304 -6.75855794  6.8200476 ]
0.06148965996993739
