In [1]:
import ols_regression

In [2]:
ols_regression.main()

Running OLS Regression test...
0.397060294305394, 0.39706029430539047
33.36980039565309, 33.369800395653094
Test complete.


In [6]:
from collections.abc import Sequence


In [1]:
from collections.abc import Sequence
from numbers import Number
from typing import Tuple
import unittest

def covar(x: Sequence, y: Sequence, mean_x: Number=None, mean_y: Number=None) -> Number:
    """
    Computes the sample covariance between two sequences of numerics. 
    The two sequences must be of same length.
    
    @param x: sequence of numeric type
    @param y: sequence of numeric type
    @param mean_x: optional, expectation of sequence x
    @param mean_y: optional, expectation of sequence y
    @return: covariance of x and y
    @raise Exception: if x and y are not of equi-length
    """
    # check inputs are of equi-length
    n = len(x)
    if n != len(y):
        raise Exception("Inputs x and y should be of same length.")
    
    mean_x = sum(x)/n if mean_x is None else mean_x
    mean_y = sum(y)/n if mean_y is None else mean_y
    
    return sum([(x[i] - mean_x)*(y[i] - mean_y) for i in range(0,n)])/(n-1)


def ols_estimator(x: Sequence, y: Sequence) -> Tuple[float, float]:
    """
    Estimate univariate OLS linear regression coefficients for y = a + b*x.
    
    @param x: regressor, sequence of numeric type
    @param y: regressand, sequence of numeric type
    @return: a tuple of (slope, intercept)
    @raise Exception: if input sequences are ill defined
    """
    # check inputs are of equi-length
    n = len(x)
    if n != len(y):
        raise Exception("Inputs x and y should be of same length.")
    
    if n < 2:
        raise Exception("Input sequence should have length greater than one.")
        
        
    mean_x = sum(x)/n
    mean_y = sum(y)/n
    var_x = covar(x,x,mean_x,mean_x)
    
    if var_x == 0:
        raise Exception("OLS Regressor cannot have zero variance.")
    
    beta = covar(x,y,mean_x,mean_y)/var_x
    alpha = mean_y - beta*mean_x
    
    return (beta, alpha)


# test case 1:
# a = range(0,10)
# b = range(1,11)
# ols_estimator(a,b)


# test case 2:
# a = [10,3,5,9]
# b = [7,5,6,7]
# ols_estimator(a,b)

In [2]:
import ols_regression

In [1]:
a = [10,3,5,9]
b = [7,5,6,7]

In [2]:
import numpy as np

In [8]:
c = np.array(a).reshape(-1,1)

In [10]:
isinstance(a, Sequence)

True

In [11]:
fit = LinearRegression().fit(np.array(a).reshape(-1,1), np.array(b).reshape(-1,1))

In [26]:
round(fit.intercept_[0],6)

4.343511

In [6]:
ols_regression.ols_estimator(a,b)

(0.2824427480916031, 4.343511450381679)

In [31]:
x = [random.random() for _ in range(10)]

In [34]:
        y = [random.random() + random.randrange(1, 50, 1)*v for v in x]


In [35]:
x

[0.31371464443581065,
 0.2025528296337511,
 0.533276039982301,
 0.058253013659730346,
 0.2549287359702317,
 0.6890614720286445,
 0.9901674846586829,
 0.9367285584307641,
 0.3624460829427989,
 0.6755277976279365]

In [36]:
y

[5.183754158860053,
 2.0118868805172374,
 16.053386374963768,
 1.6105893871793842,
 6.057856156549194,
 17.328613963906843,
 28.71539893671797,
 46.81366991896193,
 5.538485003315605,
 19.168656878060865]

In [40]:
import numpy as np
import random
from sklearn.linear_model import LinearRegression

class TestOLSRegression(unittest.TestCase):
    def test_ols_regression(self):
        x = [random.random() for _ in range(10)]
        seed = random.randrange(1, 50, 1)
        y = [random.random() + seed*v for v in x]
        
        fit = LinearRegression().fit(np.array(x).reshape(-1,1), np.array(y).reshape(-1,1))
        actual_beta = fit.coef_[0][0]
        actual_alpha = fit.intercept_[0]
        
        test_beta, test_alpha = ols_regression.ols_estimator(x,y)
        print(actual_beta)
        print(test_beta)
        self.assertEqual(round(actual_alpha,6), round(test_alpha, 6), 'Intercept estimate incorrect.')
        self.assertEqual(round(actual_beta,6), round(test_beta, 6), 'Slope estimate incorrect.')


In [43]:
test = TestOLSRegression()

AttributeError: 'TestOLSRegression' object has no attribute 'main'

In [94]:
def ols_estimator(x: Sequence, y: Sequence) -> Tuple[float, float]:
    """
    Estimate univariate OLS linear regression coefficients for y = a + b*x.
    
    @param x: regressor, sequence of numeric type
    @param y: regressand, sequence of numeric type
    @return: a tuple of (slope, intercept)
    @raise Exception: if x and y are not of equi-length
    """
    # check inputs are of equi-length
    n = len(x)
    if n != len(y):
        raise Exception("Inputs x and y should be of same length.")
        
    mean_x = sum(x)/n
    mean_y = sum(y)/n
    
    beta = covar(x,y,mean_x,mean_y)/covar(x,x,mean_x,mean_x)
    alpha = mean_y - beta*mean_x
    
    return (beta, alpha)
    

(1.0, 1.0)

In [122]:
a = [10,3,5,9]
b = [7, 5, 6, 7]

ols_estimator(a,b)

(0.2824427480916031, 4.343511450381679)