EM 538-001: Practical Machine Learning for Enginering Analystics (Spring 2025)  
Instructor: Fred Livingston (fjliving@ncsu.edu) 

### Load and Prepare Datasets

In [8]:
import pandas as pd
# chicago_taxi_dataset = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/chicago_taxi_train.csv")
chicago_taxi_dataset = pd.read_csv("chicago_taxi_train.csv")

In [None]:
chicago_taxi_dataset.head()

In [None]:
import seaborn as sns
sns.scatterplot(data=chicago_taxi_dataset, y="FARE", x="TRIP_MILES")

In [11]:
from sklearn.model_selection import train_test_split

X = chicago_taxi_dataset["TRIP_MILES"]
y = chicago_taxi_dataset["FARE"]

X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.2, 
                         shuffle=True, random_state=123)

In [12]:
import matplotlib.pyplot as plt
import matplotlib.axes as ax
from matplotlib.animation import FuncAnimation
import numpy as np

class LinearRegression: 
    def __init__(self): 
        self.parameters = {} 

    def forward_propagation(self, train_input): 
        w = self.parameters['w'] 
        b = self.parameters['b'] 
        predictions = np.multiply(w, train_input) + b 
        return predictions 

    def cost_function(self, predictions, train_output): 
        cost = np.mean((train_output - predictions) ** 2) 
        return cost 

    def backward_propagation(self, train_input, train_output, predictions): 
        derivatives = {} 
        df = (predictions-train_output) 
        # dw= 2/n * mean of (predictions-actual) * input 
        dw = 2 * np.mean(np.multiply(train_input, df)) 
        # db = 2/n * mean of (predictions-actual) 
        db = 2 * np.mean(df) 
        derivatives['dw'] = dw 
        derivatives['db'] = db 
        return derivatives 

    def update_parameters(self, derivatives, learning_rate): 
        self.parameters['w'] = self.parameters['w'] - learning_rate * derivatives['dw'] 
        self.parameters['b'] = self.parameters['b'] - learning_rate * derivatives['db'] 

    def train(self, train_input, train_output, learning_rate, iters): 
        # Initialize random parameters 
        self.parameters['w'] = np.random.uniform(0, 1) * -1
        self.parameters['b'] = np.random.uniform(0, 1) * -1

        # Initialize loss 
        self.loss = [] 

        # Initialize figure and axis for animation 
        fig, ax = plt.subplots() 
        x_vals = np.linspace(min(train_input), max(train_input), 100) 
        line, = ax.plot(x_vals, self.parameters['w'] * x_vals +
                        self.parameters['b'], color='red', label='Regression Line') 
        ax.scatter(train_input, train_output, marker='o', 
                color='green', label='Training Data') 

        # Set y-axis limits to exclude negative values 
        ax.set_ylim(0, max(train_output) + 1) 

        def update(frame): 
            # Forward propagation 
            predictions = self.forward_propagation(train_input) 

            # Cost function 
            cost = self.cost_function(predictions, train_output) 

            # Back propagation 
            derivatives = self.backward_propagation( 
                train_input, train_output, predictions) 

            # Update parameters 
            self.update_parameters(derivatives, learning_rate) 

            # Update the regression line 
            line.set_ydata(self.parameters['w'] 
                        * x_vals + self.parameters['b']) 

            # Append loss and print 
            self.loss.append(cost) 
            print("Iteration = {}, Loss = {}, w = {}, b = {}".format(frame + 1, cost, self.parameters['w'], self.parameters['b'])) 
           
            return line, 
        # Create animation 
        ani = FuncAnimation(fig, update, frames=iters, interval=200, blit=True) 

        # Save the animation as a video file (e.g., MP4) 
        ani.save('linear_regression_A.gif', writer='ffmpeg') 

        plt.xlabel('Input') 
        plt.ylabel('Output') 
        plt.title('Linear Regression') 
        plt.legend() 
        plt.show() 

        return self.parameters, self.loss 

In [None]:
#Example usage
linear_reg = LinearRegression()
parameters, loss = linear_reg.train(X_train, y_train, 0.001, 80)

In [14]:
# Try adjusting the learning rate and number of iterations to see how it affects the loss and the regression line
# Try adjusting the ecpochs to see how it affects the loss and the regression line