In [None]:
import numpy as np
import pandas as pd
from sklearn import model_selection, preprocessing
from sklearn.linear_model import LinearRegression

In [None]:
class MyLinearRegression:
    def __init__(self, learning_rate=0, num_of_iterations=0):
        self.learning_rate = learning_rate
        self.num_of_iterations = num_of_iterations
        self.min_cost=0
        self.M=0
        self.n=0
        self.coef_= np.zeros(self.n)
        self.intercept_= 0

    def __cost(self, X, Y, m):
        total_cost=0  #Total cost
        for i in range(self.M):
            x=X[i,:]
            y=Y[i]
            total_cost+=(1/self.M)*((y-(m*x).sum())**2)   #Total Cost
        return total_cost
    
    def __sto_step_gradient(self, X, Y, m):
        slope=np.zeros(X.shape[1])   #slope of tangent  
        for i in range(self.M):
            x= X[i,:]
            y= Y[i]
            slope+= (-2/self.M)*(y-(m*x).sum())*x   #slope of tangent
            m= m-self.learning_rate*slope
        return m
    
    def __sto_gradient_descent(self, points):
        try:
            X=points[0]            #Input vector
            self.M=X.shape[0]      #Total number of datapoints in X
            self.n=X.shape[1]      #Total number of features in X
            Y=points[1]       #Output
            X_n=np.append(X,np.ones(self.M).reshape(-1,1),axis=1)     #n+1 column is 1
            m=np.zeros(self.n+1)   #here m is slope of best fit line
            for i in range(self.num_of_iterations):
                m = self.__sto_step_gradient(X_n, Y, m)
                print(f"Cost after {i}th iterations is: {self.__cost(X_n, Y, m)}.")
            self.min_cost=self.__cost(X_n, Y, m)
            return m[:-1], m[-1]
        except Exception as e:
            print(e)
            return np.zeros(self.n),0
    
    def fit(self, X, Y):
        self.coef_, self.intercept_ = self.__sto_gradient_descent((X,Y))
        
    def predict(self, X_test):
        val= np.zeros(X_test.shape[0])
        add= X_test*self.coef_
        for j in range(X_test.shape[0]):
            for i in range(X_test.shape[1]):
                val[j]+=add[j][i]
        return val + self.intercept_
    
    def score(self, Y_truth, Y_pred):
        u=np.sum((Y_truth-Y_pred)**2)
        v=np.sum((Y_truth-Y_truth.mean())**2)
        coeff=1-(u/v)
        return coeff

In [None]:
df= pd.read_csv("training_ccpp_x_y_train.csv")

In [None]:
df.head()

In [None]:
X_train= df.values[:,:-1]
Y_train= df.values[:,-1]

In [None]:
scaler=preprocessing.StandardScaler()
scaler.fit(X_train)

In [None]:
X_train= scaler.transform(X_train)

In [None]:
learning_rate = 0.0001
num_iterations = 76

In [None]:
alg1= MyLinearRegression(learning_rate, num_iterations)
alg1.fit(X_train, Y_train)

In [None]:
alg1.coef_, alg1.intercept_

In [None]:
X_test= np.loadtxt("test_ccpp_x_test.csv",delimiter=",")
X_test= scaler.transform(X_test)

In [None]:
# test data
y_test_pred = alg1.predict(X_test)

#train data
y_train_pred = alg1.predict(X_train)
print("Train Score: ", alg1.score(Y_train, y_train_pred))

In [None]:
#np.savetxt("predictions_ccpp.csv",y_test_pred,delimiter=",",fmt="%.18f")

# Sklearn Linear Regression

In [None]:
alg2=LinearRegression()
alg2.fit(X_train, Y_train)

In [None]:
alg2.coef_, alg2.intercept_

In [None]:
# test data
y_test_pred_skl = alg2.predict(X_test)

#train data
y_train_pred_skl = alg2.predict(X_train)
print("Train Score: ", alg2.score(X_train, Y_train))

# Plotting Graphs

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.scatter(y_test_pred, y_test_pred_skl)
x= np.arange(400,500,0.1)
plt.plot(x,x,color="r")
plt.show()