# Linear Regression

In [1]:
import math
from statistics import mean

In [6]:
def calc_pearsonr(x,y):
    """
    Calculate pearson correlation coefficient (PCC) a.k.a. Pearson's r of a linear regression
    @args: x and y, arrays
    """
    
    # mean of x and y
    mean_x = mean(x)
    mean_y = mean(y)

    # correlation coefficient is covariance divided by standard deviations
    # covariance = sum ((x[i]-mean_x)*(y[i]-mean_y)) 
    # stddev_x = sqrt(sum(x[i]-mean_x)^2)
    # r = covariance / stddev_x*stddev_y)
    
    covar,dev_x,dev_y = 0,0,0
    
    for i in range(len(x)):
        covar += (x[i]-mean_x)*(y[i]-mean_y)
        dev_x += (x[i]-mean_x)**2
        dev_y += (y[i]-mean_y)**2

    r = covar / (math.sqrt(dev_x)*math.sqrt(dev_y))
    return r

In [9]:
def calc_slope(x,y):
    """
    Calculate the slope of a linear regression
    @args: x and y, arrays
    """
    
    # mean of x and y
    mean_x = mean(x)
    mean_y = mean(y)

    # correlation coefficient is covariance divided by standard deviations
    # covariance = sum ((x[i]-mean_x)*(y[i]-mean_y)) 
    # stddev_x = sqrt(sum(x[i]-mean_x)^2)
    # r = covariance / stddev_x*stddev_y)

    covar,var_x,var_y = 0,0,0
    for i in range(len(x)):
        covar += (x[i]-mean_x)*(y[i]-mean_y)
        var_x += (x[i]-mean_x)**2
        var_y += (y[i]-mean_y)**2

    r = float(covar) / (math.sqrt(var_x*var_y))
    
    # slope is r * (stddev_y/stddev_x)
    
    stddev_x = math.sqrt( (float(var_x)/(len(x))))
    stddev_y = math.sqrt( (float(var_y)/(len(y))))
    
    s = r*stddev_y/stddev_x

    return s

In [13]:
def calc_intercept(x,y):
    """
    Calculate the y-intercept of a linear regression
    @args: x and y, arrays
    """
    # mean of x and y
    mean_x = mean(x)
    mean_y = mean(y)

    y_intercept = mean_y - calc_slope(x,y)*mean_x

    return y_intercept