In [6]:
# In the context of neural networks, an epoch refers to one complete pass through the entire training dataset during the training phase of 
# the network. During each epoch, the neural network processes the entire training dataset, calculates the error (the difference between the 
# predicted output and the actual output), and updates its weights to improve its performance.


In [3]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [11]:
# gradient computation etc. not efficient for whole data set
# -> divide dataset into small batches

'''
# training loop
for epoch in range(num_epochs):
    # loop over all batches
    for i in range(total_batches):
        batch_x, batch_y = ...
'''

# epoch = one forward and backward pass of ALL training samples
# batch_size = number of training samples used in one forward/backward pass
# number of iterations = number of passes, each pass (forward+backward) using [batch_size] number of sampes
# e.g : 100 samples, batch_size=20 -> 100/20=5 iterations for 1 epoch

# --> DataLoader can do the batch computation for us

# Implement a custom Dataset:
# inherit Dataset
# implement __init__ , __getitem__ , and __len__

class WineDataset(Dataset):

    def __init__(self):
        # Initialize data, download, etc.
        # read with numpy or pandas
        xy = np.loadtxt('wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
        self.n_samples = xy.shape[0]

        # here the first column is the class label, the rest are the features
        self.x_data = torch.from_numpy(xy[:, 1:]) # size [n_samples, n_features]
        self.y_data = torch.from_numpy(xy[:, [0]]) # size [n_samples, 1]

    # support indexing such that dataset[i] can be used to get i-th sample
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    # we can call len(dataset) to return the size
    def __len__(self):
        return self.n_samples


# create dataset
dataset = WineDataset()

# get first sample and unpack
first_data = dataset[0]
features, labels = first_data
print(features, labels)

# Load whole dataset with DataLoader
# shuffle: shuffle data, good for training
# num_workers: faster loading as it uses multiple subprocesses
# !!! IF YOU GET AN ERROR DURING LOADING, SET num_workers TO 0 !!!
train_loader = DataLoader(dataset=dataset,
                          batch_size=4,
                          shuffle=True,
                          num_workers=0)

# convert to an iterator and look at one random sample
dataiter = iter(train_loader)
data = next(dataiter)
features, labels = data
print(features, labels)

# Dummy Training loop
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
print(total_samples, n_iterations)
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        
        # here: 178 samples, batch_size = 4, n_iters=178/4=44.5 -> 45 iterations
        # Run your training process
        if (i+1) % 5 == 0:
            print(f'Epoch: {epoch+1}/{num_epochs}, Step {i+1}/{n_iterations}| Inputs {inputs.shape} | Labels {labels.shape}')

# some famous datasets are available in torchvision.datasets
# e.g. MNIST, Fashion-MNIST, CIFAR10, COCO

train_dataset = torchvision.datasets.MNIST(root='./data', 
                                           train=True, 
                                           transform=torchvision.transforms.ToTensor(),  
                                           download=True)

train_loader = DataLoader(dataset=train_dataset, 
                                           batch_size=3, 
                                           shuffle=True)

# look at one random sample
dataiter = iter(train_loader)
data = next(dataiter)
inputs, targets = data
print(inputs.shape, targets.shape)

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])
tensor([[1.2290e+01, 1.4100e+00, 1.9800e+00, 1.6000e+01, 8.5000e+01, 2.5500e+00,
         2.5000e+00, 2.9000e-01, 1.7700e+00, 2.9000e+00, 1.2300e+00, 2.7400e+00,
         4.2800e+02],
        [1.2770e+01, 2.3900e+00, 2.2800e+00, 1.9500e+01, 8.6000e+01, 1.3900e+00,
         5.1000e-01, 4.8000e-01, 6.4000e-01, 9.9000e+00, 5.7000e-01, 1.6300e+00,
         4.7000e+02],
        [1.2080e+01, 2.0800e+00, 1.7000e+00, 1.7500e+01, 9.7000e+01, 2.2300e+00,
         2.1700e+00, 2.6000e-01, 1.4000e+00, 3.3000e+00, 1.2700e+00, 2.9600e+00,
         7.1000e+02],
        [1.3770e+01, 1.9000e+00, 2.6800e+00, 1.7100e+01, 1.1500e+02, 3.0000e+00,
         2.7900e+00, 3.9000e-01, 1.6800e+00, 6.3000e+00, 1.1300e+00, 2.9300e+00,
         1.3750e+03]]) tensor([[2.],
        [3.],
        [2.],
        [1.]])
178 45
Epoch

In [4]:
'''
__getitem__(self, index) is a special method in Python classes, and it's used to implement the behavior of indexing, slicing, and iteration 
for objects. When you define this method in a class, you enable instances of that class to use square bracket notation (`[]`) to access 
elements.

Here's a simple example to illustrate how `__getitem__` works:

```python
class MyList:
    def __init__(self, data):
        self.data = data

    def __getitem__(self, index):
        return self.data[index]

my_list = MyList([1, 2, 3, 4, 5])

# Now, you can use square bracket notation to access elements
print(my_list[2])  # Output: 3
```

In this example, `__getitem__` is defined to return the element at the specified index in the `data` list. By doing this, instances of the
 `MyList` class can be treated like regular Python lists when it comes to indexing.

This method is commonly used in classes that represent collections of data (such as lists or arrays) to provide a way to access elements in 
a manner similar to built-in Python sequences.
'''


'''
The `__len__` method is another special method in Python classes. It is used to define the behavior of the built-in `len()` function 
when applied to an object. By implementing `__len__` in a class, you specify the number of elements or the size of the object, allowing 
instances of that class to be used with the `len()` function.

Here's a simple example:

```python
class MyList:
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

my_list = MyList([1, 2, 3, 4, 5])

# Now, you can use the len() function with instances of MyList
print(len(my_list))  # Output: 5
```

In this example, the `__len__` method is defined to return the length of the `data` list. As a result, when you use the `len()` function 
with an instance of the `MyList` class, it returns the length of the underlying data.

Implementing `__len__` is particularly useful when you create custom classes that represent collections, as it allows you to leverage 
Python's built-in functions and conventions for working with such objects.
'''

"\nIt looks like there's a small typo in your question. You probably meant `__getitem__(self, index)`. This method is a special method in \nPython classes, and it's used to implement the behavior of indexing, slicing, and iteration for objects. When you define this method in \na class, you enable instances of that class to use square bracket notation (`[]`) to access elements.\n\nHere's a simple example to illustrate how `__getitem__` works:\n\n```python\nclass MyList:\n    def __init__(self, data):\n        self.data = data\n\n    def __getitem__(self, index):\n        return self.data[index]\n\nmy_list = MyList([1, 2, 3, 4, 5])\n\n# Now, you can use square bracket notation to access elements\nprint(my_list[2])  # Output: 3\n```\n\nIn this example, `__getitem__` is defined to return the element at the specified index in the `data` list. By doing this, instances of the\n `MyList` class can be treated like regular Python lists when it comes to indexing.\n\nThis method is commonly used i