In [None]:
# Generate data specifically cubic data, y= x^{3}- 3x + 2

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

x_data = np.arange(-3, 3, .1)
y_numeric = x_data ** 3 - 3 * x_data + 2


In [None]:
# Plot original data

plt.plot(x_data, y_numeric, -3, 3)
plt.show()

# Neural Network 1: Regression, no hidden layer

In [None]:
# Neural network 1, no hidden layer, regression (should duplicate linear regression)

# Activation function is identity function (not sigmoid)
# Loss function is squared error

X_train = np.array(x_data).reshape((-1, 1))
y_train = y_numeric

W = np.array([[.1]])
b = np.array([.2])

def relu(z):
    return np.maximum(0, z)

def relu_deriv(z):
    if z >= 0: 
        return 1 
    else: return 0

def compute_loss(a, y):
    return .5 * (a - y) ** 2

def compute_cost(X_data, y_data, W, b):
    m = X_data.shape[0] # number of data points
    cost = 0
    
    for i in range(m):
        y_hat = run_model(X_data[i], W, b)
        y = y_data[i]
        cost += compute_loss(y_hat, y)
    total_cost = (1 / (2 * m)) * cost

    return total_cost

def run_model(x, W, b):
    return x @ W + b

def loss_deriv(a, y):
    return a-y

def backward_prop(x, y, W, b):
    pass

In [None]:
run_model(X_train[0], W, b)

In [None]:
compute_cost(X_train, y_train, W, b)

In [None]:
backward_prop(X_train[0], y_train[0], W, b)

In [None]:
# gradient descent:

W = np.array([[-.1]])
b = np.array([.2])
ALPHA = .0006

J_sequence = []

#print("x train is", X_train)
#print("y train is", y_train)

for ctr in range(0, 6000):
    
    grads = [0,0]
    for i in range(X_train.shape[0]):   # m
        dL_dw0, dL_db = backward_prop(X_train[i], y_train[i], W, b)
        grads[0] += dL_dw0
        grads[1] += dL_db   
    W[0][0] -= ALPHA * grads[0]/X_train.shape[1]
    b[0] -= ALPHA * grads[1]/X_train.shape[1]
                              
    J_sequence.append(compute_cost(X_train, y_train, W, b))
    
print("Final W:", W, b)
print("Final cost:", compute_cost(X_train, y_train, W, b))

In [None]:
# Let's plot the cost as a function of number of iterations of the
# gradient descent algorithm.

plt.scatter(range(0, len(J_sequence)), J_sequence)
plt.show()

In [None]:
# Recreate original plot

y_predicted = [run_model([x], W, b) for x in x_data]
plt.plot(x_data, y_numeric, -3, 3)
plt.plot(x_data, y_predicted)
plt.show()

In [None]:
# Verify this matches linear regression

# Download and install scikit-learn if not already done:
%pip install scikit-learn

# Import the logistic regression functionality from scikit-learn
from sklearn.linear_model import LinearRegression

# Create a logistic regression model and train it on our training data:
model = LinearRegression(fit_intercept=True).fit(X_train, y_train)

# model.coef_ contains the w vector that this linear regression model was able to find

print("w found through scikit-learn:", model.coef_, model.intercept_)

# Neural Network 2, 1 hidden layer with 2 neurons, regression



In [None]:
# Neural network 1, no hidden layer, regression (should duplicate linear regression)

# Activation function is identity function (not sigmoid)
# Loss function is squared error

X_train = np.array(x_data).reshape((-1, 1))
y_train = y_numeric

W1 = np.array()
W2 = np.array()
b1 = np.array()
b2 = np.array()

def relu(z):
    return np.maximum(0, z)

def relu_deriv(z):
    if z >= 0: 
        return 1 
    else: return 0

def compute_loss(a, y):
    return .5 * (a - y) ** 2

def compute_cost(X_data, y_data, W1, b1, W2, b2)
    m = X_data.shape[0] # number of data points
    cost = 0
    
    for i in range(m):
        y_hat = run_model(X_data[i], W1, b1, W2, b2)
        y = y_data[i]
        cost += compute_loss(y_hat, y)
    total_cost = (1 / (2 * m)) * cost

    return total_cost

def run_model(x, W1, b1, W2, b2):
    return x @ W + b

def loss_deriv(a, y):
    return a-y

def backward_prop(x, y, W1, b1, W2, b2):
    pass

In [None]:
run_model(X_train[0], W1, b1, W2, b2)

In [None]:
compute_cost(X_train, y_train, W1, b1, W2, b2)

In [None]:
backward_prop(X_train[0], y_train[0], W1, b1, W2, b2)

In [None]:
# gradient descent:

W = np.array([[-.1]])
b = np.array([.2])
ALPHA = .0006

J_sequence = []

#print("x train is", X_train)
#print("y train is", y_train)

for ctr in range(0, 6000):

    grads = [0,0]
    for i in range(X_train.shape[0]):   # m
       
        dL_dw0, dL_db = backward_prop(X_train[i], y_train[i], W1, b1, W2, b2)
        grads[0] += dL_dw0
        grads[1] += dL_db   
    W[0][0] -= ALPHA * grads[0]/X_train.shape[1]
    b[0] -= ALPHA * grads[1]/X_train.shape[1]
                              
    J_sequence.append(compute_cost(X_train, y_train, W1, b1, W2, b2))
    
print("Final W:", W1, b1, W2, b2)
print("Final cost:", compute_cost(X_train, y_train, W1, b1, W2, b2))

In [None]:
# Let's plot the cost as a function of number of iterations of the
# gradient descent algorithm.

plt.scatter(range(0, len(J_sequence)), J_sequence)
plt.show()

In [None]:
# Recreate original plot
print(W1, b1, W2, b2)
y_predicted = [make_prediction([1, x], W1, b1, W2, b2) for x in x_data]
plt.plot(x_data, y_numeric, -3, 3)
plt.plot(x_data, y_predicted)
plt.show()