# Assessments
YW
* write a function to find the mean of a list of numbers
* write a function that calculates a dot product
* write a function that centers an array on the mean 
* write a function to calculate the standard deviation of a list of a numbers (preferably using a dot product)
* write a function to calculate the correlation and covariance of two lists
* write a function to calculate the cost between ytrue and ypred

In [2]:
from math import sqrt

def array_mean(X):
    if not isinstance(X, list):
        raise TypeError(f"X={repr(X)} is not a list.")
        return
    len_X = len(X)
    if len_X == 0:
        raise ValueError(f"array X={repr(X)} is empty.")
        return
    X_total = 0
    for x in X:
        X_total += x
    return X_total/len_X

def mean_center(X):
    """
    takes in a list of numbers X
    and returns that list but centered on the mean
    
    in other words x_i -> (x_i - x_bar)
    """
    if not isinstance(X, list):
        raise TypeError(f"X={repr(X)} is not a list.")
        return
    len_X = len(X)
    if len_X == 0:
        raise ValueError(f"array X={repr(X)} is empty.")
        return
    x_bar = array_mean(X)
    return [x - x_bar for x in X]


def dot_product(X1, X2):  
    """
    takes in lists of numbers X1 and X2
    and returns their dot product
    """
    len_X1 = len(X1)
    if len_X1 == 0:
        raise ValueError(f"array X1={repr(X1)} is empty.")
        return
    len_X2 = len(X2)
    if len_X2 == 0:
        raise ValueError(f"array X2={repr(X2)} is empty.")
        return
    if not len_X1 == len_X2:
        raise ValueError(f"arrays X1 and X2 have different lengths.")
        return
    dot_total = 0
    for x1, x2 in list(zip(X1, X2)):
        dot_total += x1 * x2
    return dot_total

# Standard Deviation Formula
![](images/standard-deviation.png)

In [5]:
def variance(X):
    """
    takes in a list of numbers X 
    and returns its variance
    """
    if not isinstance(X, list):
        raise TypeError(f"X={repr(X)} is not a list.")
        return
    len_X = len(X)
    if len_X == 0:
        raise ValueError(f"array X={repr(X)} is empty.")
        return
    
    N = len_X
    X_center = mean_center(X)
    dot_Xc_Xc = dot_product(X_center, X_center)
    
    variance_numer = dot_Xc_Xc
    variance_denom = N
    return variance_numer/variance_denom

def standard_deviation(X):
    """
    takes in a list of numbers X 
    and returns its standard deviation
    """
    return sqrt(variance(X))

# Covariance Formula
![](images/covariance.png)

In [6]:
def covariance(X, Y):
    """
    takes in two lists of numbers 
    and returns their covariance
    """
    if not isinstance(X, list):
        raise TypeError(f"X={repr(X)} is not a list.")
        return
    if not isinstance(Y, list):
        raise TypeError(f"Y={repr(Y)} is not a list.")
        return
    len_X = len(X)
    if len_X == 0:
        raise ValueError(f"array X={repr(X)} is empty.")
        return
    len_Y = len(Y)
    if len_Y == 0:
        raise ValueError(f"array Y={repr(Y)} is empty.")
        return
    if not len_X == len_Y:
        raise ValueError(f"arrays X and Y have different lengths.")
        return

    X_center = mean_center(X)
    Y_center = mean_center(Y)
    dot_Xc_Yc = dot_product(X_center, Y_center)
    N = len_X
    
    covariance_numer = dot_Xc_Yc
    covariance_denom = N
    return covariance_numer/covariance_denom

# Correlation Formula
![](images/correlation.png)

In [7]:
def correlation(X, Y):
    """
    takes in two lists of numbers 
    and returns their correlation
    """
    if not isinstance(X, list):
        raise TypeError(f"X={repr(X)} is not a list.")
        return
    if not isinstance(Y, list):
        raise TypeError(f"Y={repr(Y)} is not a list.")
        return
    len_X = len(X)
    if len_X == 0:
        raise ValueError(f"array X={repr(X)} is empty.")
        return
    len_Y = len(Y)
    if len_Y == 0:
        raise ValueError(f"array Y={repr(Y)} is empty.")
        return
    if not len_X == len_Y:
        raise ValueError(f"arrays X and Y have different lengths.")
        return

    X_center = mean_center(X)
    Y_center = mean_center(Y)
    dot_Xc_Yc = dot_product(X_center, Y_center)
    dot_Xc_Xc = dot_product(X_center, X_center)
    dot_Yc_Yc = dot_product(Y_center, Y_center)
    
    correlation_numer = dot_Xc_Yc
    correlation_denom = sqrt(dot_Xc_Xc * dot_Yc_Yc)
    return correlation_numer/correlation_denom

# RMSE Formula
![](images/rmse.png)

In [9]:
def rss(ytrue, ypred):
    """
    takes in ytrue and ypred
    and returns their rss
    """
    if not isinstance(ytrue, list):
        raise TypeError(f"ytrue={repr(ytrue)} is not a list.")
        return
    if not isinstance(ypred, list):
        raise TypeError(f"ypred={repr(ypred)} is not a list.")
        return
    len_ytrue = len(ytrue)
    if len_ytrue == 0:
        raise ValueError(f"array ytrue={repr(ytrue)} is empty.")
        return
    len_ypred = len(ypred)
    if len_ypred == 0:
        raise ValueError(f"array ypred={repr(ypred)} is empty.")
        return
    if not len_ytrue == len_ypred:
        raise ValueError(f"arrays ytrue and ypred have different lengths.")
        return
    
    Y_diff = [yp - yt for yp, yt in list(zip(ypred, ytrue))]
    return dot_product(Y_diff, Y_diff)

def mse(ytrue, ypred):
    """
    takes in ytrue and ypred
    and returns their mean squared error
    """
    mse_numer = rss(ytrue, ypred)
    mse_denom = len(ytrue)
    return mse_numer/mse_denom   

def rmse(ytrue, ypred):
    """
    takes in ytrue and ypred
    and returns their root mean squared error
    """
    return sqrt(mse(ytrue, ypred))


# RSS Formula 
![](images/rss.png)

In [10]:
# see rss function in above code block.  