# Project: SimplesML
Given 
- Equation: $ax^2 + bx + c = y$
- Dataset: $x_i, y_i$ in dataset.json

Find
- $a, b, c$ that fit the equation

Solution
- $a = 2, b = 4, c = 5$

## Libraries

In [46]:
import numpy as np
import json
import itertools
import random
import time

## Variables and data

In [47]:
def equation(x, a, b, c):
    # Calculate the value of the function with the given parameters
    return (a * x ** 2) + (b * x) + c
def solution_equation(x):
    # The solution of the equation which we want to find
    return ( 2 * x ** 2) + (4 * x) + 5

In [48]:
# Load the data
dataset = json.load(open('dataset_v1.json'))

# Observes the first 10 elements of the dataset
num_obs = 10
for i, (x, y) in enumerate(dataset.items()):
    y_solution = solution_equation(int(x))
    print("x {} || y  {} || {}  y_solution".format(x, y, y_solution))
    if i == num_obs: break

x -654 || y  852821 || 852821  y_solution
x -2832 || y  16029125 || 16029125  y_solution
x 2702 || y  14612421 || 14612421  y_solution
x 4494 || y  40410053 || 40410053  y_solution
x 101 || y  20811 || 20811  y_solution
x 2015 || y  8128515 || 8128515  y_solution
x 4892 || y  47882901 || 47882901  y_solution
x -775 || y  1198155 || 1198155  y_solution
x -3660 || y  26776565 || 26776565  y_solution
x -334 || y  221781 || 221781  y_solution
x -3370 || y  22700325 || 22700325  y_solution


In [49]:
# Randomly initialize the parameters
def get_random_parameters():
    a = np.random.uniform(low=-10, high=10)
    b = np.random.uniform(low=-10, high=10)
    c = np.random.uniform(low=-10, high=10)
    return a, b, c

## Loss function

$loss = ((ax^2 + bx + c) - y_t)^2$

$grad_a = 2(ax^2 + bx + c - y_t) * x^2$

$grad_b = 2(ax^2 + bx + c - y_t) * x$

$grad_c = 2(ax^2 + bx + c - y_t)$

In [50]:
# Create a function to calculate the derivative of the function
def loss_function(y_true, y_pred):
    # Here y_true is the value of the function in the dataset
    # y_pred is the value of the function with the parameters
    return (y_pred - y_true) ** 2

def gradient_loss_function(y_true, y_pred, x):
    # Some python libraries have a function to calculate the derivative of the function
    # But we will do it manually and return the gradient of the parameters
    # based on the derivative of the function, we can calculate the gradient of the parameters
    grad_a = 2 * (y_pred - y_true) * (x ** 2)
    grad_b = 2 * (y_pred - y_true) * (x)
    grad_c = 2 * (y_pred - y_true)
    return grad_a, grad_b, grad_c

## Try to find the best parameters

In [53]:
# get initial parameters
a, b, c = get_random_parameters()
# Learning rate
lr = 1e-3
# The number of times that the algorithm will run
steps = 100000
# Create an infinity data generator
data_generator = itertools.cycle(dataset.items())
# Scale the data to 0-1
x_max = max([float(x) for x in dataset.keys()])
y_max = max([float(y) for y in dataset.values()])
for step in range(1,steps):
    # Get the next data
    x, y_true = next(data_generator)
    x, y_true = float(x), float(y_true)
    x_scaled = x / x_max
    y_true_scaled = y_true / y_max
    # Try to predict the value of the function with the parameters
    y_predict = equation(x_scaled, a, b, c)
    # Calculate the loss function
    loss = loss_function(y_true_scaled, y_predict)
    # Calculate the gradient of the parameters
    grad_a, grad_b, grad_c = gradient_loss_function(y_true_scaled, y_predict, x_scaled)
    # Update the parameters
    a = a - (lr * grad_a)
    b = b - (lr * grad_b)
    c = c - (lr * grad_c)
    # Logging the result
    if step % 1000 == 0:
        print("Step {} | Loss {:.15f} | a {:.10f} | b {:.10f} | c {:.10f}".format(step, loss, a, b, c))
        time.sleep(1)

Step 1000 | Loss 3.368866032265830 | a 4.4763760236 | b 0.9729171043 | c 0.0050826952
Step 2000 | Loss 0.173027542705219 | a 3.6664069301 | b 0.5273870280 | c -0.8546877752
Step 3000 | Loss 0.024330083009278 | a 3.2478414234 | b 0.2883283514 | c -0.8215657023
Step 4000 | Loss 0.005262836759447 | a 2.9176623019 | b 0.1630291699 | c -0.7119643310
Step 5000 | Loss 0.001252000149521 | a 2.6377805559 | b 0.0968579261 | c -0.6095227299
Step 6000 | Loss 0.000294295643783 | a 2.3985609448 | b 0.0611914872 | c -0.5209545212
Step 7000 | Loss 0.000064871729325 | a 2.1940118052 | b 0.0413407254 | c -0.4451182124
Step 8000 | Loss 0.000012450640131 | a 2.0191631798 | b 0.0297838384 | c -0.3802833082
Step 9000 | Loss 0.000001716386064 | a 1.8697407215 | b 0.0226581293 | c -0.3248760155
Step 10000 | Loss 0.000000063766672 | a 1.7420674822 | b 0.0179679651 | c -0.2775337350
Step 11000 | Loss 0.000000050639282 | a 1.6329882742 | b 0.0146718777 | c -0.2370864209
Step 12000 | Loss 0.000000173376892 | a 1.

## Inference

In [54]:
def inference_equation(x, a, b, c, x_max, y_max):
    # In the training process, we scaled the data to 0-1.
    # When we want to predict the value of the function, we need to scale the data to 0-1
    # And rescale the result to the original value
    # scale the data to 0-1
    x_scaled = x / x_max
    # calculate the value of the function
    y_scaled = equation(x_scaled, a, b, c)
    # rescale the result to the original value
    y = y_scaled * y_max
    return y

In [58]:
x = random.randint(1, x_max)
y_solution = solution_equation(x)
y_predict = inference_equation(x, a, b, c, x_max, y_max)
y_solution, y_predict

(8963381, 8963376.283888731)

# Project: A little bit more complex
Given 
- Equation: 

$a_1x_1 + a_2x_2 + b_1 = y_1$ 

$a_3x_1 + a_4x_2 + b_2 = y_2$

$y = (y_1 + y_2) / 2$
- Dataset: $(x_{1i}, x_{2i}), (y_{1i}, y_{2i})$ in dataset.json

Find
- $a_1, a_2, b_1$
- $a_3, a_4, b_2$ that fit the equation

Solution
- $a_1 = 2, a_2 = 4, b_1 = 9$
- $a_3 = 4, a_4 = 3, b_2 = 5$

In [59]:
import numpy as np
import json
import itertools
import random
import time

## Variables and data

In [60]:
def equation(inputs, weights, bias):
    # Calculate the value of the function with the given parameters
    return np.mean(np.dot(inputs, weights) + bias) # inputs@weights + bias
def solution_equation(inputs):
    # The solution of the equation which we want to find
    x1, x2 = inputs
    y1 = 2*x1 + 4*x2 + 9
    y2 = 4*x1 + 3*x2 + 5
    y = (y1 + y2) / 2
    return y

In [117]:
# Load the data
dataset = json.load(open('dataset_v2.json'))
x_set, y_set = dataset['x'], dataset['y']
# Observes the first 10 elements of the dataset
num_obs = 10
for i in range(num_obs):
    x = x_set[i]
    y = y_set[i]
    y_solution = solution_equation(x)
    print("x {} || y  {} || {}  y_solution".format(x, y, y_solution))

x [-2418, -2318] || y  -15360.0 || -15360.0  y_solution
x [-1990, -1890] || y  -12578.0 || -12578.0  y_solution
x [-2086, -1986] || y  -13202.0 || -13202.0  y_solution
x [-2059, -1959] || y  -13026.5 || -13026.5  y_solution
x [-4095, -3995] || y  -26260.5 || -26260.5  y_solution
x [1755, 1855] || y  11764.5 || 11764.5  y_solution
x [-2579, -2479] || y  -16406.5 || -16406.5  y_solution
x [3623, 3723] || y  23906.5 || 23906.5  y_solution
x [97, 197] || y  987.5 || 987.5  y_solution
x [-4441, -4341] || y  -28509.5 || -28509.5  y_solution


In [118]:
# Randomly initialize the parameters
def get_random_parameters():
    weights = np.random.rand(2,2)
    bias = np.random.rand(2,1)
    return weights, bias

## Loss function

$loss = ((ax^2 + bx + c) - y_t)^2$

$grad_a = 2(ax^2 + bx + c - y_t) * x^2$

$grad_b = 2(ax^2 + bx + c - y_t) * x$

$grad_c = 2(ax^2 + bx + c - y_t)$

In [119]:
# Create a function to calculate the derivative of the function
def loss_function(y_true, y_pred):
    # Here y_true is the value of the function in the dataset
    # y_pred is the value of the function with the parameters
    return (y_pred - y_true) ** 2

def gradient_loss_function(y_true, y_pred, x):
    # Now we need to calculate the derivative of the matrix
    # each element of the matrix will have a derivative
    # here we have 4 parameters for weights and 2 parameters for bias
    # [a1 a2] [b1]
    # [a3 a4] [b2]
    # base on chain rule, we can calculate the derivative of the matrix
    # loss = (y_pred - y_true) ** 2
    # d(loss)/d(y_pred) = 2 * (y_pred - y_true)
    # d(loss)/d(a1) = d(loss)/d(y_pred) * d(y_pred)/d(a1)
    #               = 2 * (y_pred - y_true) * x1
    grad_a1 = 2 * (y_pred - y_true) * x[0]
    grad_a2 = 2 * (y_pred - y_true) * x[1]
    grad_a3 = 2 * (y_pred - y_true) * x[0]
    grad_a4 = 2 * (y_pred - y_true) * x[1]

    grad_b1 = 2 * (y_pred - y_true)
    grad_b2 = 2 * (y_pred - y_true)
    
    # Create matrix gradient of the parameters
    grads_weights = np.array([[grad_a1, grad_a2], [grad_a3, grad_a4]])
    grads_bias = np.array([[grad_b1], [grad_b2]])
    return grads_weights, grads_bias

## Try to find the best parameters

In [140]:
# get initial parameters
weights, bias = get_random_parameters()
# Learning rate
lr = 1e-3
# The number of times that the algorithm will run
steps = 100000
# Create an infinity data generator
data_generator = itertools.cycle(zip(x_set, y_set))
# Scale the data to 0-1
x_max = np.max(x_set)
y_max = np.max(y_set)
for step in range(1,steps):
    # Get the next data
    x, y_true = next(data_generator)
    # Convert the data to numpy array
    x, y_true = np.array(x), np.array(y_true)
    # Scale the data to 0-1
    x_scaled = x / x_max
    y_true_scaled = y_true / y_max
    # Try to predict the value of the function with the parameters
    y_predict = equation(x_scaled, weights, bias)
    # Calculate the loss function
    loss = loss_function(y_true_scaled, y_predict)
    # Calculate the gradient of the parameters
    grad_weights, grads_bias = gradient_loss_function(y_true_scaled, y_predict, x_scaled)
    # Update the parameters
    weights = weights - (lr * grad_weights)
    bias = bias - (lr * grads_bias)
    # Logging the result
    # if step % 1000 == 0:
print("Step {} | Loss {:.15f} | a_0 {:.10f} | b_0 {:.10f}".format(step, loss, weights[0][0], bias[0][0]))

Step 99999 | Loss 0.000000000000000 | a_0 0.0135797524 | b_0 0.1946643906


## Inference

In [141]:
def inference_equation(inputs, weights, bias, x_max, y_max):
    # In the training process, we scaled the data to 0-1.
    # When we want to predict the value of the function, we need to scale the data to 0-1
    # And rescale the result to the original value
    # scale the data to 0-1
    x_scaled = x / x_max
    # calculate the value of the function
    y_scaled = equation(x_scaled, weights, bias)
    # rescale the result to the original value
    y = y_scaled * y_max
    return y

In [150]:
# Get the next data
x, y_true = next(data_generator)
# Convert the data to numpy array
x, y_true = np.array(x), np.array(y_true)
y_solution = solution_equation(x)
y_predict = inference_equation(x, weights, bias, x_max, y_max)
y_solution, float(y_true), y_predict

(23906.5, 23906.5, 23906.500000000007)