In [2]:
import random
import numpy as np
from minitorch.tensor.tensor import Tensor
from minitorch.dataloaders.dataloader import TensorDataset, DataLoader

In [3]:
def test_units_tensordataset():
    print('=' *50)
    features = Tensor(np.array([[1,2,3,4], [5,6,7,8]]))
    labels = Tensor([0,1])
    
    ds = TensorDataset(features, labels)
    
    # test length
    ds_length = len(ds)
    assert ds_length == 2, f'Expected 2, got {ds_length}'
    print('Length test works perfectly')
    
    # test indexing
    sample = ds[0]
    assert len(sample) == 2, f'Expects two samples, got {len(sample)}'
    assert np.array_equal(sample[0].data,[1,2,3,4]),\
        f'Wrong features, expected [1,2,3,4], got {sample[0].data}'
    assert np.array_equal(sample[1].data, 0),\
        f'Wrong label, expected 0, got {sample[1].data}'
    
    sample = ds[1]
    assert np.array_equal(sample[0].data, [5,6,7,8]),\
        f'Wrong features at index 1, expected [5,6,7,8], got {sample[1].data}'
    assert np.array_equal(sample[1].data, 1),\
        f'Wrong labels at index 1, expected 1, got {sample[1].data}'
    print('Tensor indexing works perfectly')
    
    # test error handling
    try:
        ds[10]
        assert False, 'Index out of bounds'
    except IndexError:
        pass
    print('Errror testing passed successfully')
    
    # test feature and label mismathc
    bad_features = features
    bad_labels = Tensor([1,2,3,4])
    try:
        TensorDataset(bad_features, bad_labels)
        assert False, 'Dimension mismatch'
    except AssertionError:
        pass
    print('Dimension mismatch test passed successfully')
    print('TensorDataset works perfectly')
    print('=' *50)
    
test_units_tensordataset()

Length test works perfectly
Tensor indexing works perfectly
Errror testing passed successfully
Dimension mismatch test passed successfully
TensorDataset works perfectly


In [4]:
def test_unit_dataloader():
    print('=' *50)
    print("ðŸ”¬ Unit Test: DataLoader...")
    
    # create dataset
    features = Tensor(np.array([
        [1,2,3,4],
        [5,6,7,8],
        [20,30,40,2],
        [10,11,1,2],
        [90,70,10,11],
        [100,200,300,400],
        [0.5,1.0,2.1,11.1]
        ]))
    labels = Tensor([0,1,0,1,0,1,0])

    ds = TensorDataset(features, labels)
    
    # test basic batching (No shuffling)
    loader = DataLoader(dataset=ds, batch_size=2, shuffle=False)
    
    # test length calculation
    assert len(loader) == 4, f'Expected 4 batches, got {len(loader)}'
    print('Length calculation works perfectly')
    
    batches = list(loader)
    assert len(batches) == 4, f'Expected 4 batches, got {len(batches)}'
    print('Iteration works perfectly')
    
    #test first batch
    batch_features, batch_labels = batches[0]
    expected_features = np.array([[1,2,3,4],[5,6,7,8]])
    expected_labels = np.array([0,1])
    assert np.array_equal(batch_features.data, expected_features),\
        f'Wrong features in batch 1, expected {expected_features}, got {batch_features.data}'
    assert np.array_equal(batch_labels.data, expected_labels),\
        f'Wrong labels in batch 1, expected {expected_labels}, got {batch_labels.data}'
    print('First batch works perfectly')
    
    # test the last batch (should have only one sample)
    batch_features, batch_labels = batches[-1]
    expected_features = np.array([[0.5,1.0,2.1,11.1]])
    expected_labels = np.array([0])
    assert np.array_equal(batch_features.data, expected_features),\
        f'Wrong features in batch 4, expected {expected_features}, got {batch_features.data}'
    assert np.array_equal(batch_labels.data, expected_labels),\
        f'Wrong labels in batch 4, expected {expected_labels}, got {batch_labels.data}'
    print('Last batch works perfectly')
    
    # test shuffling
    loader_shuffled = DataLoader(dataset=ds, batch_size=2, shuffle=True)
    batches_shuffled = list(loader_shuffled)
    first_batch_shuffled = batches_shuffled[0]
    if np.array_equal(first_batch_shuffled[0].data, np.array([[1,2,3,4],[5,6,7,8]])):
        assert False, 'Shuffling did not work, first batch is same as non-shuffled'
    print('Shuffling works perfectly')
    
    # test error handling for invalid batch size
    try:
        DataLoader(dataset=ds, batch_size=0, shuffle=False)
        assert False, 'Batch size zero should raise error'
    except AssertionError:
        pass
    print('Error handling for invalid batch size works perfectly')
    
    # test error handling for non-TensorDataset
    try:
        DataLoader(dataset=[1,2,3], batch_size=2, shuffle=False)
        assert False, 'Non-TensorDataset should raise error'
    except AssertionError:
        pass

    print('âœ… DataLoader works correctly!')
    print('=' *50)
    
test_unit_dataloader()

ðŸ”¬ Unit Test: DataLoader...
Length calculation works perfectly
Iteration works perfectly
First batch works perfectly
Last batch works perfectly
Shuffling works perfectly
Error handling for invalid batch size works perfectly
âœ… DataLoader works correctly!


In [5]:
def test_unit_dataloader_edge_cases():
    print('=' *50)
    print("ðŸ”¬ Unit Test: DataLoader Edge Cases...")
    
    # create dataset with 7 samples
    features = Tensor(np.array([
    [1,2,3,4],
    [5,6,7,8],
    [20,30,40,2],
    [10,11,1,2],
    [90,70,10,11],
    [100,200,300,400],
    [0.5,1.0,2.1,11.1],
    [3,6,9,12]
    ]))
    labels = Tensor([0,1,0,1,0,1,0,0])
    ds = TensorDataset(features, labels)
    
    # batch size larger than dataset
    loader_large_batch = DataLoader(dataset=ds, batch_size=10, shuffle=False)
    batches_large = list(loader_large_batch)
    batch_features, batch_labels = batches_large[0]
    
    assert len(batches_large) == 1, \
        f'Expected 1 batch, got {len(batches_large)}'
    assert batch_features.shape[0] == 8,\
        f'Expected 8 samples in batch, got {batch_features.shape[0]}'
    print('Batch size larger than dataset works perfectly')
    
    # batch size of 1
    loader_batch_one = DataLoader(dataset=ds, batch_size=1, shuffle=False)
    batches_one = list(loader_batch_one)
    assert len(batches_one) == 8,\
        f'Expected 8 batches, got {len(batches_one)}'
    for i, (batch_features, batch_labels) in enumerate(batches_one):
        assert batch_features.shape[0] == 1,\
        f'Expected 1 sample in batch {i}, got {batch_features.shape[0]}'
    print('Batch size of 1 works perfectly')
    
    print('âœ… DataLoader Edge Cases work correctly!')
    print("=" * 50)
    
test_unit_dataloader_edge_cases()

ðŸ”¬ Unit Test: DataLoader Edge Cases...
Batch size larger than dataset works perfectly
Batch size of 1 works perfectly
âœ… DataLoader Edge Cases work correctly!


In [6]:
def unit_test_dataloader_randomness():
    print('=' *50)
    print("ðŸ”¬ Unit Test: DataLoader Randomness...")
    
    # create dataset
    features = Tensor(np.array([
    [1,2,3,4],
    [5,6,7,8],
    [20,30,40,2],
    [10,11,1,2],
    [90,70,10,11],
    [100,200,300,400],
    [0.5,1.0,2.1,11.1],
    [3,6,9,12]
    ]))
    labels = Tensor([0,1,0,1,0,1,0,0])
    
    ds = TensorDataset(features, labels)
    
    
    #* test with the same seed
    random.seed(42)
    # create two dataloaders with shuffling
    loader1 = DataLoader(dataset=ds, batch_size=2, shuffle=True)
    batches1 = list(loader1)
    
    random.seed(42)
    loader2 = DataLoader(dataset=ds, batch_size=2, shuffle=True)
    batches2 = list(loader2)
    
    # compare the two batches
    for (feat1, label1), (feat2, label2) in zip(batches1, batches2):
        assert np.array_equal(feat1.data, feat2.data),\
            'Features do not match between two shuffles with same seed'
        assert np.array_equal(label1.data, label2.data), \
            'Labels do not match between two shuffles with same seed'
    print('âœ… DataLoader shuffling with same seed works correctly!')
    
    #* test with different seeds
    random.seed(42)
    loader3 = DataLoader(dataset=ds, batch_size=2, shuffle=True)
    batches3 = list(loader3)
    
    random.seed(99)
    loader4 = DataLoader(dataset=ds, batch_size=2, shuffle=True)
    batches4 = list(loader4)
    
    # compare the two batches
    different = False
    for (feat3, label3), (feat4, label4) in zip(batches3, batches4):
        if not np.array_equal(feat3.data, feat4.data) or not np.array_equal(label3.data, label4.data):
            different = True
            break
    assert different, 'Shuffled batches are the same with different seeds'
    
    print('âœ… DataLoader Randomness works correctly!')
    print("=" * 50)
    
unit_test_dataloader_randomness()

ðŸ”¬ Unit Test: DataLoader Randomness...
âœ… DataLoader shuffling with same seed works correctly!
âœ… DataLoader Randomness works correctly!
