# Testing Our Linear Regression Algorithm

## Importing Libraries

In [1]:
import numpy as np
import gradient_descent
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import time
import math

## Creating a 2D - Dataset of random variables

In [14]:
# Creating a dataset of N features which 1000 random values
N = 10        # No of features
M = 1000       # No of rows
x = np.random.rand(M, N)
# Setting coefficient and intercept 
# (you can add features or change coefficient as per your requirement)
# Make sure that no of coefficient should be same as no of features
actual_m = np.random.rand(N)
actual_c = np.random.rand(1)[0]

# Creating X and Y
y = np.array([(actual_m * x[row]).sum() + actual_c for row in range(M)])

## Testing Our Algorithm

In [15]:
'''
After the training the algorithm, we will test on training data, check how good algorithm
is trained after training with training data and then test will testing_data
'''
# We can run train the algorithm, ntimes and
# check the average training score and testing score

total_Training_Score = 0
total_Test_Score = 0

start_Time = time.time()
n = 1
for count in range(n):
    # Since X and Y, as a complete data
    # We need to split into training data and test data
    # So, that we can use training data for training the
    # algorithm, and testing data for testing it

    # Splitting the data into train test, it will split 3:1, mean
    # if dataset has 1000 rows, 750 will be in training and 250 in
    # testing. And the rows will be selected on random basis.
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    algo = gradient_descent.LinearRegression(stoppage = 1e-15, iterations = 2000)
    algo.fit(x_train, y_train)
    total_Training_Score += algo.score(x_train, y_train)
    total_Test_Score += algo.score(x_test, y_test)

end_Time = time.time()

In [16]:
# Some Analysis
print('Average Training Score:', total_Training_Score/n)
print('Average Test Score:', total_Test_Score/n)
print('Total Time:', (end_Time - start_Time)/60)

Average Training Score: 0.9999999999997862
Average Test Score: 0.9999999999997627
Total Time: 0.6475985010464986


In [17]:
print('Predicted Coefficients:', algo.coef_)
print('Predicted Intercept:', algo.intercept_)

Predicted Coefficients: [0.28595185 0.94449151 0.74079598 0.19033311 0.28495338 0.16561867
 0.65564073 0.40379673 0.06295421 0.38295455]
Predicted Intercept: 0.8093172977141329


In [18]:
print('Actual Coefficients:', actual_m)
print('Actual Intercept:', actual_c)

Actual Coefficients: [0.28595164 0.94449128 0.74079582 0.19033288 0.28495323 0.16561841
 0.6556405  0.40379642 0.06295391 0.38295428]
Actual Intercept: 0.8093184896033505


In [19]:
for log in algo.logs: print(log)

Iteration: 1, Previous Cost: 8.447820892626064, Current Cost: 310.96327879171093, Difference: 302.5154578990849, Alpha: 1
Iteration: 2, Previous Cost: 8.447820892626064, Current Cost: 310.96327879171093, Difference: 302.5154578990849, Alpha: 1.0
Iteration: 3, Previous Cost: 8.447820892626064, Current Cost: 54.33775961306526, Difference: 45.8899387204392, Alpha: 0.5
Iteration: 4, Previous Cost: 8.447820892626064, Current Cost: 5.050842695569851, Difference: 3.396978197056213, Alpha: 0.25
Iteration: 5, Previous Cost: 5.050842695569851, Current Cost: 3.025745472169186, Difference: 2.025097223400665, Alpha: 0.25
Iteration: 6, Previous Cost: 3.025745472169186, Current Cost: 1.8180115899022355, Difference: 1.2077338822669506, Alpha: 0.25
Iteration: 7, Previous Cost: 1.8180115899022355, Current Cost: 1.0973000229814451, Difference: 0.7207115669207904, Alpha: 0.25
Iteration: 8, Previous Cost: 1.0973000229814451, Current Cost: 0.666814503304735, Difference: 0.4304855196767101, Alpha: 0.25
Itera