In [7]:
#* get autorelod for the notesbook
%load_ext autoreload
%autoreload 3

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
import numpy as np
from minitorch.tensor.tensor import Tensor
from minitorch.train.training import CosineSchedule, clip_grad_norm

In [2]:
def unit_tests_cosine_scheduler():
    print('Unit Tests: Testing the cosine scheduler ....')
    scheduler = CosineSchedule()
    tolerance = 1e-6
    
    #* Test basic schedule
    lr_start = scheduler.get_lr(0)
    lr_quarter = scheduler.get_lr(25)
    lr_middle = scheduler.get_lr(50)
    lr_end = scheduler.get_lr(100)
    
    print(f"Learning rate at epoch 0: {lr_start:.4f}")
    print(f"Learning rate at epoch 25: {lr_middle:.4f}")
    print(f"Learning rate at epoch 50: {lr_middle:.4f}")
    print(f"Learning rate at epoch 100: {lr_end:.4f}")
    
    #* validate behavior
    assert abs(lr_start - 0.1) < tolerance, f'Expected 0.1 at start, got {lr_start}'
    assert abs(lr_end - 0.01) < tolerance, f'Expected 0.01 at the end, got {lr_end}'
    assert 0.01 < lr_middle < 0.1, f'Expected middle lr to be between 0.01 and 0.1, got {lr_middle}'
    
    #* monotonic test
    assert lr_quarter > lr_middle, 'Lr should decrease monotonically in first half'
    
    print('Cosine Scheduler works perfectly')
    
unit_tests_cosine_scheduler()
    

Unit Tests: Testing the cosine scheduler ....
Learning rate at epoch 0: 0.1000
Learning rate at epoch 25: 0.0550
Learning rate at epoch 50: 0.0550
Learning rate at epoch 100: 0.0100
Cosine Scheduler works perfectly


In [40]:
def unit_tests_clip_grad_norm():
    print('Unit Tests: Testing the clip_grad_norm function ....')
    x = Tensor(np.array([[2.0, 3.0, 4.6,7.0],
                        [4.0,5.0,8.0,10.0],
                        [5.6,7.0, 11.1,1.0],
                        [2.0, 3.0,0.0,-1.0],
                        [4.0,5.0,-2.0, -10.0],
                        [5.6,7.0, 11.9,12.0]], dtype='float32'), requires_grad=True)
    y = Tensor(np.array([1.0, 2.0, 3.0, 3.0, 4.0,5.0], dtype='float32'), requires_grad=True)
    
    
    x.grad = np.random.randint(x.shape[0], size=x.shape, dtype='int32').astype('float32')
    y.grad = np.random.randint(y.shape[0], size=y.shape, dtype='int32').astype('float32')
    
    #* clip manually to verify
    x_grad = np.sum(x.grad ** 2)
    y_grad = np.sum(y.grad ** 2)
    total_norm = np.sqrt(x_grad + y_grad)
    
    if total_norm > 1.0:
        clip_coef = 1.0 / (total_norm + 1e-6)
        x.grad *= clip_coef
        y.grad *= clip_coef
        
    #* now use the function to verify it does the same thing
    x_copy = x.copy()
    y_copy = y.copy()
    x_copy.grad = x.grad.copy()
    y_copy.grad = y.grad.copy()
    
    norm = clip_grad_norm([x_copy,y_copy], max_norm=1.0)
    
    
    #* compare manual clipping with function clipping
    assert np.allclose(x_copy.grad, x.grad), "x grad should be the same"
    assert np.allclose(y_copy.grad, y.grad), "y grad should be the same"
    # assert norm > 1.0, f"Total norm should be greater than 1.0 before clipping, got {norm:.4f}"
    print('clip_grad_norm works perfectly')
    
    
unit_tests_clip_grad_norm()

Unit Tests: Testing the clip_grad_norm function ....
clip_grad_norm works perfectly
