## Supervised Learning Algorithm

---

### Import Relevant Libraries

In [1]:
import pandas as pd # for data handling
import numpy as np # for numerical operations

---

### Data

In [2]:
data_array = np.array([[-20.00, -69.00], [-10.00, -36.00], [-5.00, -20.00], 
                       [0.00, -8.00], [4.00, 6.00], [9.00, 21.00], [15.00, 35.00], [30.00, 81.00]])
data = pd.DataFrame(data_array, columns=['X', 'Y'])
print(data)

      X     Y
0 -20.0 -69.0
1 -10.0 -36.0
2  -5.0 -20.0
3   0.0  -8.0
4   4.0   6.0
5   9.0  21.0
6  15.0  35.0
7  30.0  81.0


---

### Supervised Learning Algorithm (SLA) Function
#### Returns:
- sorted data
- pairwise slopes and intercepts
- model parameters
- prediction/actual discrepencies
- mean squared error

#### Equations Implemented:
##### slope-intercept form of a line:
$$
y = mx + b
$$
##### y_hat:
$$
\hat{y}\ = mx + b
$$
##### mean squared error:
$$
MSE = \frac{1}{n} ||\hat{y}\ - y||^{2}_{2} = \frac{1}{n} \sum_{i = 0}^{n - 1} (m . x_i + b - y_i)^2
$$

In [5]:
def SLA(data): # supervised learning algorithm function
    n = len(data) # n is the number of elements
    slopes_list = [] # list to hold the slope values
    intercepts_list = [] # list to hold the intercept values
    yhats = [] # list to hold the yhat values
    mse = 0  # Initialize the mean squared error to zero

    """sorted data section"""
    data = sorted(data.to_numpy(), key=lambda point: point[0]) # sort the data into a list of tuples starting at index 0

    """pairwise slopes and intercepts section"""
    for i in range(n - 1): # iteration for comparing point pairs
        x0, y0 = data[i] # a pair of corresponding points
        x1, y1 = data[i + 1] # the next pair of corresponding points
        m = (y1 - y0) / (x1 - x0) # slope
        b = ((y1 * x0) - (y0 * x1)) / (x0 - x1) # intercept
        slopes_list.append(round(m, 2)) # add the newly calculated slope to the slopes list, round to two decimal places
        intercepts_list.append(round(b, 2)) # add the newly calculated intercept to the intercepts list, round to two decimal places

    slope_model_parameters = np.mean(slopes_list) # find the slope model parameter once all slopes are computed
    intercept_model_parameters = np.mean(intercepts_list) # find the intercept model parameter once all intercepts are computed

    """model parameters section"""
    slope_model_parameters_rounded = round(slope_model_parameters, 2) # round slope model parameter to two decimal places
    intercept_model_parameters_rounded = round(intercept_model_parameters, 2) # round intercept model parameter to two decimal places

    """yhat section"""
    for i, (x, y) in enumerate(data): # iterate through x values and y values by index i
        yhat = slope_model_parameters * x + intercept_model_parameters # yhat formula
        yhat = round(yhat, 2) # round yhat value to two decimal places
        yhats.append((i, yhat)) # add to yhat values list

    """mean squared error section"""
    if n > 0: # assume n is zero initially, until the length of the data determines the number of elements above
        error = 0 # combined error is initially zero until computed
        for x, y in data: # loop through the x values and the y values
            error += ((slope_model_parameters * x) + intercept_model_parameters - y) ** 2  # square the individual errors then add them
        MSE = error / n  # calculate the finalized mean squared error before rounding

    MSE = round(MSE, 6) # round the mean squared error to six decimal places

    """Returning the sorted data, slopes and intercepts, model parameters, discrepencies, and MSE"""
    print(f"\nAfter sorting the dataset of {n} points:\n") # print information of impending output

    for i, (x, y) in enumerate(data): # loop through the data by index
        print(f"X[{i}] = {x} Y[{i}] = {y}") # print each x, y with index

    print(f"\nThe pairwise slopes and intercepts:\n") # print information of impending output

    for i, (m, b) in enumerate(zip(slopes_list, intercepts_list)): # loop through the slopes and intercepts, zipping them according to their indices
        print(f"m[{i}] = {m} b[{i}] = {b}") # print each slope and intercepts per index

    print(f"\nThe model parameters obtained by the program:\n") # print information of impending output
    print(f"Slope Estimate = {slope_model_parameters_rounded}") # print the model slope estimate
    print(f"Intercept Estimate = {intercept_model_parameters_rounded}") # print the model intercept estimate

    print(f"\nThe discrepancies between the model predictions and the actual values of the dataset:\n") # print information of impending output

    for i, yhat in yhats: # loop through the individual yhat values by index
        y = data[i][1] # obtain the actual y value corresponding to that index
        print(f"yhat[{i}] = {yhat}, y[{i}] = {y}") # print the yhat value then the y value per index

    print(f"\nMean Squared Error = {MSE}") # print the mean squared error   

In [6]:
SLA(data)


After sorting the dataset of 8 points:

X[0] = -20.0 Y[0] = -69.0
X[1] = -10.0 Y[1] = -36.0
X[2] = -5.0 Y[2] = -20.0
X[3] = 0.0 Y[3] = -8.0
X[4] = 4.0 Y[4] = 6.0
X[5] = 9.0 Y[5] = 21.0
X[6] = 15.0 Y[6] = 35.0
X[7] = 30.0 Y[7] = 81.0

The pairwise slopes and intercepts:

m[0] = 3.3 b[0] = -3.0
m[1] = 3.2 b[1] = -4.0
m[2] = 2.4 b[2] = -8.0
m[3] = 3.5 b[3] = -8.0
m[4] = 3.0 b[4] = -6.0
m[5] = 2.33 b[5] = -0.0
m[6] = 3.07 b[6] = -11.0

The model parameters obtained by the program:

Slope Estimate = 2.97
Intercept Estimate = -5.71

The discrepancies between the model predictions and the actual values of the dataset:

yhat[0] = -65.14, y[0] = -69.0
yhat[1] = -35.43, y[1] = -36.0
yhat[2] = -20.57, y[2] = -20.0
yhat[3] = -5.71, y[3] = -8.0
yhat[4] = 6.17, y[4] = 6.0
yhat[5] = 21.03, y[5] = 21.0
yhat[6] = 38.86, y[6] = 35.0
yhat[7] = 83.43, y[7] = 81.0

Mean Squared Error = 5.195102


---