# Testing Our Linear Regression Algorithm

## Importing Libraries

In [1]:
import numpy as np
import gradient_descent
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import time
import math

## Creating a 2D - Dataset of random variables

In [8]:
# Creating a dataset of N features which 1000 random values
N = 10        # No of features
M = 100000       # No of rows
x = np.random.rand(M, N)
# Setting coefficient and intercept 
# (you can add features or change coefficient as per your requirement)
# Make sure that no of coefficient should be same as no of features
actual_m = np.random.rand(N)
actual_c = np.random.rand(1)[0]

# Creating X and Y
y = np.array([(actual_m * x[row]).sum() + actual_c for row in range(M)])

## Testing Our Algorithm

In [9]:
'''
After the training the algorithm, we will test on training data, check how good algorithm
is trained after training with training data and then test will testing_data
'''
# We can run train the algorithm, ntimes and
# check the average training score and testing score

total_Training_Score = 0
total_Test_Score = 0

start_Time = time.time()
n = 1
for count in range(n):
    # Since X and Y, as a complete data
    # We need to split into training data and test data
    # So, that we can use training data for training the
    # algorithm, and testing data for testing it

    # Splitting the data into train test, it will split 3:1, mean
    # if dataset has 1000 rows, 750 will be in training and 250 in
    # testing. And the rows will be selected on random basis.
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    algo = gradient_descent.LinearRegression(stoppage = 1e-15, iterations = 2000)
    algo.fit(x_train, y_train)
    total_Training_Score += algo.score(x_train, y_train)
    total_Test_Score += algo.score(x_test, y_test)

end_Time = time.time()

In [10]:
# Some Analysis
print('Average Training Score:', total_Training_Score/n)
print('Average Test Score:', total_Test_Score/n)
print('Total Time:', (end_Time - start_Time)/60)

Average Training Score: 0.9999999999998893
Average Test Score: 0.9999999999998892
Total Time: 0.07958566347757975


In [11]:
print('Predicted Coefficients:', algo.coef_)
print('Predicted Intercept:', algo.intercept_)

Predicted Coefficients: [0.83305748 0.8086154  0.87185522 0.65515963 0.67542715 0.12145348
 0.88255248 0.01934143 0.83021482 0.03651281]
Predicted Intercept: 0.7891258319236325


In [12]:
print('Actual Coefficients:', actual_m)
print('Actual Intercept:', actual_c)

Actual Coefficients: [0.83305769 0.80861562 0.87185544 0.65515985 0.67542738 0.1214537
 0.88255271 0.01934165 0.83021503 0.03651304]
Actual Intercept: 0.7891247007734952


In [13]:
for log in algo.logs: print(log)

Iteration: 1, Previous Cost: 13.730006739671877, Current Cost: 508.9970629822431, Difference: 495.26705624257124, Alpha: 1
Iteration: 2, Previous Cost: 13.730006739671877, Current Cost: 508.9970629822431, Difference: 495.26705624257124, Alpha: 1.0
Iteration: 3, Previous Cost: 13.730006739671877, Current Cost: 89.08104328839973, Difference: 75.35103654872785, Alpha: 0.5
Iteration: 4, Previous Cost: 13.730006739671877, Current Cost: 8.334902120896373, Difference: 5.395104618775504, Alpha: 0.25
Iteration: 5, Previous Cost: 8.334902120896373, Current Cost: 5.069667414537376, Difference: 3.2652347063589975, Alpha: 0.25
Iteration: 6, Previous Cost: 5.069667414537376, Current Cost: 3.0926821010676493, Difference: 1.9769853134697266, Alpha: 0.25
Iteration: 7, Previous Cost: 3.0926821010676493, Current Cost: 1.894957198277913, Difference: 1.1977249027897363, Alpha: 0.25
Iteration: 8, Previous Cost: 1.894957198277913, Current Cost: 1.1686649348070428, Difference: 0.7262922634708702, Alpha: 0.25
