# 

# Gradiant descent with minibatches

# Libraries and helper functions

In [1]:
import random
from typing import TypeVar, List, Iterator

In [2]:
Vector = List[float]

In [3]:
def add(vector1: Vector, vector2: Vector) -> Vector:
    assert len(vector1) == len(vector2)
    return [v1 + v2 for v1, v2 in zip(vector1, vector2)]

In [4]:
def vector_sum(vectors: List[Vector]) -> Vector:
    assert vectors
    
    vector_length = len(vectors[0])
    assert all(len(v) == vector_length for v in vectors)

    sums = [0] * vector_length
    for vector in vectors:
        sums = add(sums, vector)

    return sums

In [5]:
def scalar_multiply(c: float, vector: Vector) -> Vector:
    return [c * v for v in vector]

In [6]:
def vector_mean(vectors: List[Vector]) -> Vector:
    n = len(vectors)
    return scalar_multiply(1/n, vector_sum(vectors))

In [7]:
def gradient_step(v: Vector, gradient: Vector, step_size: float) -> Vector:
    """Return vector adjusted with step. Step is gradient times step size.
    """
    step = scalar_multiply(step_size, gradient)
    return add(v, step)


In [8]:
def linear_gradient(x: float, y: float, theta: Vector) -> Vector:
    slope, intercept = theta
    predicted = slope * x + intercept
    error = (predicted - y) #** 2
    # print(x, y, theta, predicted, error)
    return [2 * error * x, 2 * error]

## Minibatch gradient

In [9]:
T = TypeVar('T')

In [10]:
def minibatches(dataset: List[T], batch_size=int, shuffle: bool = True) -> Iterator[List[T]]:
    batch_starts = [start for start in range(0, len(dataset), batch_size)]

    if shuffle: random.shuffle(batch_starts)

    for start in batch_starts:
        end = start + batch_size
        yield dataset[start:end]

In [11]:
inputs = [(x, 20 * x + 5) for x in range(-50, 50)]
inputs[:10]

[(-50, -995),
 (-49, -975),
 (-48, -955),
 (-47, -935),
 (-46, -915),
 (-45, -895),
 (-44, -875),
 (-43, -855),
 (-42, -835),
 (-41, -815)]

In [12]:
for batch in minibatches(inputs, batch_size=5, shuffle=False):
    print(batch)

[(-50, -995), (-49, -975), (-48, -955), (-47, -935), (-46, -915)]
[(-45, -895), (-44, -875), (-43, -855), (-42, -835), (-41, -815)]
[(-40, -795), (-39, -775), (-38, -755), (-37, -735), (-36, -715)]
[(-35, -695), (-34, -675), (-33, -655), (-32, -635), (-31, -615)]
[(-30, -595), (-29, -575), (-28, -555), (-27, -535), (-26, -515)]
[(-25, -495), (-24, -475), (-23, -455), (-22, -435), (-21, -415)]
[(-20, -395), (-19, -375), (-18, -355), (-17, -335), (-16, -315)]
[(-15, -295), (-14, -275), (-13, -255), (-12, -235), (-11, -215)]
[(-10, -195), (-9, -175), (-8, -155), (-7, -135), (-6, -115)]
[(-5, -95), (-4, -75), (-3, -55), (-2, -35), (-1, -15)]
[(0, 5), (1, 25), (2, 45), (3, 65), (4, 85)]
[(5, 105), (6, 125), (7, 145), (8, 165), (9, 185)]
[(10, 205), (11, 225), (12, 245), (13, 265), (14, 285)]
[(15, 305), (16, 325), (17, 345), (18, 365), (19, 385)]
[(20, 405), (21, 425), (22, 445), (23, 465), (24, 485)]
[(25, 505), (26, 525), (27, 545), (28, 565), (29, 585)]
[(30, 605), (31, 625), (32, 645), 

In [13]:
batch

[(45, 905), (46, 925), (47, 945), (48, 965), (49, 985)]

In [14]:
theta = [random.uniform(-1, 1), random.uniform(-1, 1)]
vector_mean([linear_gradient(x, y, theta) for x, y in batch])

[-93150.48112529561, -1980.142091882065]

In [15]:
inputs = [(x, 20 * x + 5) for x in range(-50, 50)]
theta = [random.uniform(-1, 1), random.uniform(-1, 1)]
learning_rate = 0.001

for epoch in range(1000):
    for batch in minibatches(inputs, batch_size=20):
        grad = vector_mean([linear_gradient(x, y, theta) for x, y in batch])
        theta = gradient_step(theta, grad, -learning_rate)
    print(epoch, theta)

0174439, 4.987695719934339]
542 [19.999981615582296, 4.9877593372925615]
543 [20.000000596031473, 4.988289930402195]
544 [19.99986600291718, 4.9883239135764414]
545 [19.99499138463032, 4.988639932226969]
546 [19.999106275087044, 4.9889237404077695]
547 [19.999498300298526, 4.988854260750452]
548 [20.000271229385476, 4.988978006337807]
549 [20.000789860846815, 4.989009143704562]
550 [20.004410855748826, 4.989321208985817]
551 [20.001955283508405, 4.99015464962172]
552 [20.0001065591595, 4.990301335875948]
553 [19.999684782625707, 4.990416463383617]
554 [19.998480665319182, 4.990629232891488]
555 [19.999335716817637, 4.990639007902794]
556 [20.000209184834983, 4.990763925347079]
557 [20.00033959027654, 4.99081923331454]
558 [19.999487304052245, 4.990849736132178]
559 [20.00058632993606, 4.990893751553599]
560 [19.9996341266611, 4.9908995554867275]
561 [19.999047782377176, 4.990976456020754]
562 [19.999816091389658, 4.990944596978783]
563 [19.997947199881725, 4.991045100709673]
564 [20.00

In [16]:
theta

[19.999998311807346, 4.99998961815273]