In [2]:
from torchvision.datasets import CIFAR10
import torch

### Data Loading

In [2]:
train_data = CIFAR10(root="./train/",
                    train=True,
                    download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./train/cifar-10-python.tar.gz


100.0%


Extracting ./train/cifar-10-python.tar.gz to ./train/


In [3]:
train_data

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./train/
    Split: Train

In [7]:
type(train_data.data)

numpy.ndarray

In [8]:
train_data.data.shape

(50000, 32, 32, 3)

In [11]:
train_data.targets[:5]

[6, 9, 9, 4, 1]

In [12]:
train_data.classes

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [13]:
train_data.class_to_idx

{'airplane': 0,
 'automobile': 1,
 'bird': 2,
 'cat': 3,
 'deer': 4,
 'dog': 5,
 'frog': 6,
 'horse': 7,
 'ship': 8,
 'truck': 9}

In [17]:
train_data[0]

(<PIL.Image.Image image mode=RGB size=32x32>, 6)

In [21]:
from PIL import Image
train_data[0][0].show()

In [22]:
# download the test data as well
test_data = CIFAR10(root='./test/',
                    train=False,
                   download=True)
test_data

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./test/cifar-10-python.tar.gz


100.0%


Extracting ./test/cifar-10-python.tar.gz to ./test/


Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ./test/
    Split: Test

### Data Transforms

In [30]:
from torchvision import transforms

train_transforms = transforms.Compose([
    transforms.RandomCrop(32,padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(.4914,.4822,.4465),
        std=(.2023,.1994,.2010)
    )
])

train_data = CIFAR10(root="./train/",
                    train=True,
                    download=False,
                    transform=train_transforms
                    )

train_data

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./train/
    Split: Train
    StandardTransform
Transform: Compose(
               RandomCrop(size=(32, 32), padding=4)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
           )

In [31]:
test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(.4914,.4822,.4465),
        std=(.2023,.1994,.2010)
    )
])

test_data = CIFAR10(
                root='./test/',
                train=False,
                transform=test_transforms
            )

test_data

Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ./test/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
           )

### Data Batching

In [34]:
trainloader = torch.utils.data.DataLoader(
                train_data,
                batch_size=16,
                shuffle=True
            )

In [36]:
# retrieve a batch of samples from the training set
data_batch, labels_batch = next(iter(trainloader))
data_batch.shape

torch.Size([16, 3, 32, 32])

In [37]:
testloader = torch.utils.data.DataLoader(
                 test_data,
                 batch_size=16,
                 shuffle=False
            )


### General Data Preparation

Image data loading into a tensor

In [48]:
# load images into the neural network
from torchvision import transforms

img = Image.open('coffee.jpg')
transform = transforms.ToTensor()
img_tensor = transform(img)
img_tensor.shape  # this is in C*H*W format

torch.Size([3, 1200, 1600])

In [55]:
# create a batch of images
N = 1
batch = torch.empty(1,3,1200,1600,dtype=torch.uint8)
batch[0]=img_tensor
batch.shape  # this is in N*C*H*W format

torch.Size([1, 3, 1200, 1600])

Tabular Data

In [10]:
import pandas as pd
df = pd.read_csv('winequality_data.csv',sep=';')
df.dtypes

fixed acidity           float64
volatile acidity        float64
citric acid             float64
residual sugar          float64
chlorides               float64
free sulfur dioxide     float64
total sulfur dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
quality                   int64
dtype: object

In [23]:
y = df['quality']
X = df.drop('quality',axis=1)

In [15]:
y.shape , X.shape

((4898,), (4898, 11))

In [24]:
y = torch.tensor(y.values)
X = torch.tensor(X.values)

print(y)
print(X)

tensor([6, 6, 6,  ..., 6, 7, 6])
tensor([[ 7.0000,  0.2700,  0.3600,  ...,  3.0000,  0.4500,  8.8000],
        [ 6.3000,  0.3000,  0.3400,  ...,  3.3000,  0.4900,  9.5000],
        [ 8.1000,  0.2800,  0.4000,  ...,  3.2600,  0.4400, 10.1000],
        ...,
        [ 6.5000,  0.2400,  0.1900,  ...,  2.9900,  0.4600,  9.4000],
        [ 5.5000,  0.2900,  0.3000,  ...,  3.3400,  0.3800, 12.8000],
        [ 6.0000,  0.2100,  0.3800,  ...,  3.2600,  0.3200, 11.8000]],
       dtype=torch.float64)


In [25]:
X[0]

tensor([7.0000e+00, 2.7000e-01, 3.6000e-01, 2.0700e+01, 4.5000e-02, 4.5000e+01,
        1.7000e+02, 1.0010e+00, 3.0000e+00, 4.5000e-01, 8.8000e+00],
       dtype=torch.float64)

In [27]:
df.iloc[0]

fixed acidity             7.000
volatile acidity          0.270
citric acid               0.360
residual sugar           20.700
chlorides                 0.045
free sulfur dioxide      45.000
total sulfur dioxide    170.000
density                   1.001
pH                        3.000
sulphates                 0.450
alcohol                   8.800
quality                   6.000
Name: 0, dtype: float64

In [28]:
X.shape

torch.Size([4898, 11])