### Importing the Libraries

In [1]:
import numpy as np # Creating numpy nd arrays
from sklearn.model_selection import train_test_split # For random splitting of dataset
from sklearn import linear_model # For using inbuilt Linear Regression Model
from sklearn.preprocessing import MinMaxScaler # For Scaling the features

### Fetching data from Train and Test Set

In [2]:
train_set = np.loadtxt('ccpp_train.csv', delimiter = ',')
train_set

array([[   8.58,   38.38, 1021.03,   84.37,  482.26],
       [  21.79,   58.2 , 1017.21,   66.74,  446.94],
       [  16.64,   48.92, 1011.55,   78.76,  452.56],
       ...,
       [  29.8 ,   69.34, 1009.36,   64.74,  437.65],
       [  16.37,   54.3 , 1017.94,   63.63,  459.97],
       [  30.11,   62.04, 1010.69,   47.96,  444.42]])

In [3]:
test_set = np.loadtxt('ccpp_test.csv', delimiter = ',')
test_set

array([[  11.95,   42.03, 1017.58,   90.89],
       [  12.07,   38.25, 1012.67,   81.66],
       [  26.91,   74.99, 1005.64,   78.98],
       ...,
       [  24.32,   66.25, 1009.09,   91.89],
       [  23.49,   42.8 , 1013.96,   65.31],
       [  21.76,   60.27, 1018.96,   85.06]])

In [4]:
x = train_set[:,:-1] # Fetching all the features from the train_set
y = train_set[:,-1]  # Fetching all the results from train_set

### Feature Scaling

In [5]:
scaler = MinMaxScaler()
x = scaler.fit_transform(x)
test_set = scaler.transform(test_set)

### Creating class for Linear Regression

In [6]:
# Creating class of Linear Regression for n features using Gradient Descent
class LinearRegression:
    def __init__(self):
        print('Algorithm Initiated')
    
    # Defining the cost function
    def __cost(self,x,y):
        try:
            cost = np.zeros(self.__M)
            for rowNo in range(self.__M):
                cost[rowNo] = (y[rowNo]-((self.__constants*x[rowNo]).sum()))**2
            return cost.mean()
        except Exception as e:
            print('Error:', e)
    
    # Defining function for gradient descent for minimizing the cost
    def __step_gradient_descent(self,x,y,alpha):
        try:
            costderivative = np.zeros(self.__N)
            for colNo in range(self.__N):
                for rowNo in range(self.__M):
                    # Adding to derivative of cost with respect to each coefficient
                    derivative = (-2/self.__M)*(y[rowNo]-((self.__constants*x[rowNo]).sum()))*x[rowNo,colNo]
                    costderivative[colNo] += derivative
            self.__constants = self.__constants-alpha*costderivative
        except Exception as e:
            print('Error:', e)

    # Training the algorithm to get coefficients having minimum cost
    def __gradient_descent(self,x,y,alpha,iterations):
        try:
            for count in range(iterations):
                self.__step_gradient_descent(x,y,alpha)
        except Exception as e:
            print('Error:', e)
    
    # Defining fit function which contains x as values of features, y as their corresponding output
    # alpha is learning rate and iteration is no of rounds in the order of minimize of values of coefficient
    # of features.
    def fit(self,x,y,alpha,iterations):
        try:
            self.__M = x.shape[0] # No of rows or values in the dataset
            self.__N = x.shape[1] # No of columns or features in the dataset
            self.__constants = np.zeros(self.__N) # Creating a np array for values of coefficients of features
            self.__gradient_descent(x,y,alpha,iterations)
            self.coeff = self.__constants[:-1]    # Seperating coefficients from intercept
            self.intercept = self.__constants[-1] # Y-intercept
        except Exception as e:
            print('Error:', e)
    
    # Creating Predict Function to predict the values of test inputs
    def predict(self,test):
        try:
            y_pred = np.zeros(test.shape[0])
            for index in range(test.shape[0]):
                y_pred[index] = (self.__constants*test[index]).sum()
            return y_pred
        except Exception as e:
            print('Error:', e)
    
    # Score function
    def score(self,x,y):
        try:
            y_pred = self.predict(x)
            u = ((y-y_pred)**2).sum()
            v = ((y-y.mean())**2).sum()
            return 1-u/v
        except Exception as e:
            print('Error:', e)

In [7]:
# Appending the column containing 1s to end of the x
ones1 = np.ones(x.shape[0]).reshape(x.shape[0],1)
x = np.append(x,ones1,axis = 1)
ones2 = np.ones(test_set.shape[0]).reshape(test_set.shape[0],1)
test_set = np.append(test_set,ones2,axis=1)
print(x.shape)
print(test_set.shape)

(7176, 5)
(2392, 5)


### Testing the algorithm

In [8]:
# Splitting the x,y into x_train,x_test,y_train,y_test for testing the algorithm
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state = 1)
algo = LinearRegression()
algo.fit(x_train,y_train,0.4,1000)
print('Self Made Algo Score:', algo.score(x_train,y_train))
print('Self Made Algo Predictions:')
print(algo.predict(x_train))

Algorithm Initiated
Self Made Algo Score: 0.9258705178942184
Self Made Algo Predictions:
[482.40260472 460.51674958 447.18818506 ... 473.41896103 449.4671688
 445.15473579]


In [9]:
print(algo.coeff)
print(algo.intercept)

[-58.90274912 -17.8876033    6.96413074  -8.04051343]
496.1128722532661


In [10]:
inbuilt_algo = linear_model.LinearRegression()
inbuilt_algo.fit(x_train,y_train)
print('Inbuilt algo score:', inbuilt_algo.score(x_train,y_train))
print('Inbuilt algo predictions:')
print(inbuilt_algo.predict(x_train))

Inbuilt algo score: 0.9282823477634228
Inbuilt algo predictions:
[484.39621611 460.91793105 448.07928111 ... 473.44893106 449.3921115
 444.41006337]


### Predicting Output for test data

In [12]:
# Creating a new object of LinearRegression algorithm that we created
algo1 = LinearRegression()
algo1.fit(x,y,0.4,1000) # Training as per given inputs
print(algo1.score(x,y)) # Printing the score of training set
y_pred = algo1.predict(test_set) # Predicting the output for test_set
y_pred = np.round(y_pred,5) # Rounding off the values to the 5 decimal places
np.savetxt('ans_ccpp.csv',y_pred,delimiter = ',') # Saving the predictions into the csv file

Algorithm Initiated
0.9262753082044313
