# Assignment 1

In this assignment, you will investigate the precision issues in computing the gradient. You will also implement a simple linear regression model using the custom autograd engine.

## Task 1: Precision issues

In [1]:
def f(x):
    return 3 * x ** 2 - 4 * x + 5

def gradient(f, x, h=0.0001):
    return (f(x + h) - f(x)) / h

gradient(f, 2)

8.000300000023941

In [4]:
# What happens if we keep decreasing h?
gradient(f, 2, h=0.0000000000000001)

################################################################################
# TODO:                                                                        #
# Why is the gradient 0?                                                       #
# If you don't know, google it!                                                #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
# ANSWER:

# The gradient is 0 because of floating-point precision errors.
# When h becomes extremely small, the difference (f(x + h) - f(x)) may be too tiny to be accurately represented, causing it to round to zero.
# This leads to the computed gradient being 0 instead of the actual derivative.

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

0.0

## Task 2: Linear Regression

Let's review the training loop of a simple linear regression model.

### Importing Libraries

In [5]:
import math
import random

In [6]:
class Tensor:
    def __init__(self, data, _children=(), _operation=''):
        self.data = data
        self._prev = set(_children)
        self.gradient = 0
        self._backward = lambda: None

    def __repr__(self):
        return f"tensor=({self.data})"

    def __add__(self, other):  # self + other
        output = Tensor(self.data + other.data, (self, other), '+')
        def _backward():
            self.gradient = 1 * output.gradient
            other.gradient = 1 * output.gradient
        output._backward = _backward
        return output

    def __mul__(self, other):  # self * other
        output = Tensor(self.data * other.data, (self, other), '*')
        def _backward():
            self.gradient = other.data * output.gradient
            other.gradient = self.data * output.gradient
        output._backward = _backward
        return output

    def tanh(self):  # tanh(self)
        output = Tensor(math.tanh(self.data), (self,), 'tanh')
        def _backward():
            self.gradient = (1.0 - math.tanh(self.data) ** 2) * output.gradient
        output._backward = _backward
        return output

    def __pow__(self, power):  # self ** power
        assert isinstance(power, (int, float)), "Power must be an int or a float"
        output = Tensor(self.data ** power, (self,), f'**{power}')
        def _backward():
            self.gradient = power * (self.data ** (power - 1)) * output.gradient
        output._backward = _backward
        return output

    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.gradient = 1
        for node in reversed(topo):
            node._backward()

    def __neg__(self): # -self
        return self * Tensor(-1.0)

    def __sub__(self, other): # self - other
        return self + (-other)

### Data

**GOAL: Find the best line that fits the following data.**

![Data](https://github.com/qorjiwon/LLM101n/blob/master/assets/linear.png?raw=1)

(Image credit: MIT 18.06)

(1, 1), (2, 2), (3, 2)

In [37]:
# Input, Target data
input = [Tensor(1), Tensor(2), Tensor(3)]
target = [Tensor(1), Tensor(2), Tensor(2)]

### Model

In [95]:
# Linear regression model
class Linear:
    def __init__(self):
        self.a = Tensor(random.uniform(-1, 1))
        self.b = Tensor(random.uniform(-1, 1))

    def __call__(self, x):
        y = self.a * x + self.b
        return y

    def parameters(self):
        return self.a, self.b

In [109]:
# Initialize the model
model = Linear()

# Example forward pass
print(f"Output: {model(input[0])}")

Output: tensor=(-0.14676618555264231)


### Training

Implement the training loop for the linear regression model.

Choose an appropriate learning rate.

In [160]:
lr = 2e-4  # learning rate

# Training loop
for step in range(10):
    total_loss = Tensor(0)

    # Forward pass
    for x, y in zip(input, target):
        ################################################################################
        # TODO:                                                                        #
        # Implement the forward pass.                                                  #
        ################################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        loss = (model(x) - y) ** 2
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        total_loss += loss

    # Backward pass
    ################################################################################
    # TODO:                                                                        #
    # Implement the backward pass.                                                 #
    ################################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    total_loss.backward()
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    # Update weights
    ################################################################################
    # TODO:                                                                        #
    # Update the weights of the model using the gradients.                         #
    ################################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    for param in model.parameters():
        param.data = param.data - lr * param.gradient
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    print(f"Step: {step}, Loss: {total_loss.data}")

Step: 0, Loss: 0.1752271399811644
Step: 1, Loss: 0.17528633643702055
Step: 2, Loss: 0.1751106680516117
Step: 3, Loss: 0.17516957943355327
Step: 4, Loss: 0.17522862834526534
Step: 5, Loss: 0.17528781125719772
Step: 6, Loss: 0.17534312449305675
Step: 7, Loss: 0.17540257482020352
Step: 8, Loss: 0.1754621488832028
Step: 9, Loss: 0.1755218433005984


In [161]:
# Print the final weights of the model
print(f"y = {model.a.data}*x + {model.b.data}")

y = 0.4423618540228212*x + 0.7559280547894417


## Extra Credit

Linear regression is the simplest form of neural networks. It actually does not require gradient descent to solve for the weights.

**Find a way to get the weights of the linear regression model without using gradient descent.**

In [162]:
################################################################################
# TODO:                                                                        #
# y = ax + b                                                                   #
# x, y = (1, 1), (2, 2), (3, 2)                                                #
# Find the values of a and b without using gradient descent.                   #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
# Given points
x = [1, 2, 3]
y = [1, 2, 2]

# Calculate the sums
sum_x = sum(x)
sum_y = sum(y)
sum_xy = sum(xi * yi for xi, yi in zip(x, y))
sum_x_squared = sum(xi ** 2 for xi in x)
N = len(x)

# Calculate a and b using the normal equation
a = (N * sum_xy - sum_x * sum_y) / (N * sum_x_squared - sum_x ** 2)
b = (sum_y - a * sum_x) / N
loss = 0
for i in range(3):
  loss += (a*x[i] - b - y[i])**2

# Print the results
print(f"a = {a}")
print(f"b = {b}")
print(f"lost = {loss}")

# a = ??
# b = ??
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

a = 0.5
b = 0.6666666666666666
lost = 5.499999999999998
