In [11]:
import imageio
import torch
import numpy as np

### Loading an image

In [2]:
img_arr = imageio.imread('./img/sample1.jpeg')
img_arr.shape

(3799, 5698, 3)

### Changing the layout 

In [3]:
img = torch.from_numpy(img_arr)
out = img.permute(2, 0, 1) # H x W x C -> C x H x W
out.shape

torch.Size([3, 3799, 5698])

### Load all image into a batch

In [4]:
batch_size = 3
batch = torch.zeros(batch_size, 3, 256, 256, dtype=torch.uint8) # tensor holder for 3 images in 1 batch

In [13]:
import os

data_dir = "./img/"
file_names = [name for name in os.listdir(data_dir)]

for i, file_name in enumerate(file_names):
    img_arr = imageio.imread(os.path.join(data_dir, file_name)) # read the image
    img_arr = np.resize(img_arr, (256, 256, 3)) # resize to fit in batch 
    img_t = torch.from_numpy(img_arr) # convert to tensor
    img_t = img_t.permute(2, 0, 1) # change dim to C x H x W
    img_t = img_t[:3] # select only 3 channels
    batch[i] = img_t # assign to position i in batch

### Normalizing the data

In [15]:
batch = batch.float() # convert to a float-tensor
batch /= 255.0
batch.shape

torch.Size([3, 3, 256, 256])

### Load data from csv

In [16]:
import csv

wine_path = "../data/tabular-wine/tabular-wine.csv"
wine_numpy = np.loadtxt(wine_path, dtype=np.float32, delimiter=";", skiprows=1)

wine_numpy

array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 , 61.  ]], dtype=float32)

In [17]:
wine_numpy.shape

(4898, 12)

In [18]:
col_list = next(csv.reader(open(wine_path), delimiter=";"))

col_list

['fixed acidity',
 'volatile acidity',
 'citric acid',
 'residual sugar',
 'chlorides',
 'free sulfur dioxide',
 'total sulfur dioxide',
 'density',
 'pH',
 'sulphates',
 'alcohol',
 'quality']

In [19]:
wine_tensor = torch.from_numpy(wine_numpy)

wine_tensor.shape

torch.Size([4898, 12])

### Representing scores

In [20]:
data = wine_tensor[:, :-1] # select all rows and columns except the last column

data, data.shape

(tensor([[ 7.0000,  0.2700,  0.3600,  ...,  3.0000,  0.4500,  8.8000],
         [ 6.3000,  0.3000,  0.3400,  ...,  3.3000,  0.4900,  9.5000],
         [ 8.1000,  0.2800,  0.4000,  ...,  3.2600,  0.4400, 10.1000],
         ...,
         [ 6.5000,  0.2400,  0.1900,  ...,  2.9900,  0.4600,  9.4000],
         [ 5.5000,  0.2900,  0.3000,  ...,  3.3400,  0.3800, 12.8000],
         [ 6.0000,  0.2100,  0.3800,  ...,  3.2600,  0.3200, 11.8000]]),
 torch.Size([4898, 11]))

In [28]:
target = wine_tensor[:, -1] # select all rows and the last column
target[-1] = 6.0
target, target.shape

(tensor([6., 6., 6.,  ..., 6., 7., 6.]), torch.Size([4898]))

In [29]:
target = target.long()

target

tensor([6, 6, 6,  ..., 6, 7, 6])

### One-hot encoding

In [30]:
target_onehot = torch.zeros(target.shape[0], 10)
target_onehot.scatter_(1, target.unsqueeze(1), 1.0)


tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])