# Linear Regression

In [1]:
import math
from statistics import mean

In [6]:
def calc_pearsonr(x,y):
    """
    Calculate pearson correlation coefficient (PCC) a.k.a. Pearson's r of a linear regression
    @args: x and y, arrays
    """
    
    # mean of x and y
    mean_x = mean(x)
    mean_y = mean(y)

    # correlation coefficient is covariance divided by standard deviations
    # covariance = sum ((x[i]-mean_x)*(y[i]-mean_y)) 
    # stddev_x = sqrt(sum(x[i]-mean_x)^2)
    # r = covariance / stddev_x*stddev_y)
    
    covar,dev_x,dev_y = 0,0,0
    
    for i in range(len(x)):
        covar += (x[i]-mean_x)*(y[i]-mean_y)
        dev_x += (x[i]-mean_x)**2
        dev_y += (y[i]-mean_y)**2

    r = covar / (math.sqrt(dev_x)*math.sqrt(dev_y))
    return r

In [8]:
x = [15 , 12 , 8  , 8 ,  7 ,  7 ,  7 ,  6   , 5  , 3]
y = [10  ,25 , 17  ,11 , 13 , 17 , 20 , 13 , 9  , 15]
r = calc_pearsonr(x,y)
print('{0:.3f}'.format(r))

0.145


In [9]:
def calc_slope(x,y):
    """
    Calculate the slope of a linear regression
    @args: x and y, arrays
    """
    
    # mean of x and y
    mean_x = mean(x)
    mean_y = mean(y)

    # correlation coefficient is covariance divided by standard deviations
    # covariance = sum ((x[i]-mean_x)*(y[i]-mean_y)) 
    # stddev_x = sqrt(sum(x[i]-mean_x)^2)
    # r = covariance / stddev_x*stddev_y)

    covar,var_x,var_y = 0,0,0
    for i in range(len(x)):
        covar += (x[i]-mean_x)*(y[i]-mean_y)
        var_x += (x[i]-mean_x)**2
        var_y += (y[i]-mean_y)**2

    r = float(covar) / (math.sqrt(var_x*var_y))
    
    # slope is r * (stddev_y/stddev_x)
    
    stddev_x = math.sqrt( (float(var_x)/(len(x))))
    stddev_y = math.sqrt( (float(var_y)/(len(y))))
    
    s = r*stddev_y/stddev_x

    return s

In [10]:
x = [15, 12, 8, 8, 7, 7, 7, 6, 5, 3]
y = [10, 25, 17, 11, 13, 17, 20, 13, 9, 15]

result = calc_slope(x,y)

print("%.3f" % result)

0.208


In [13]:
def calc_intercept(x,y):
    """
    Calculate the y-intercept of a linear regression
    @args: x and y, arrays
    """
    # mean of x and y
    mean_x = mean(x)
    mean_y = mean(y)

    y_intercept = mean_y - calc_slope(x,y)*mean_x

    return y_intercept

In [15]:
x = [15, 12, 8, 8, 7, 7, 7, 6, 5, 3]
y = [10, 25, 17, 11, 13, 17, 20, 13, 9, 15]
slope = calc_slope(x,y)
y_intercept = calc_intercept(x,y)

score = 10
result = slope*score + y_intercept
print("%.3f" % result)

15.458


# Polynomial Regression

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [None]:
m = [[0.44,0.68,511.14]
    ,[0.99,0.23,717.1]
    ,[0.84,0.29,607.91]
    ,[0.28,0.45,270.4]]