# Téléchargement et manipulation des données

Trois datasets :
- CIFAR-10
- MNIST
- Fashion-MNIST

In [13]:
import torch
from torchvision import datasets
from torchvision import transforms
import numpy as np

# CIFAR-10

In [2]:
cifar_train = datasets.CIFAR10(root='data/cifar_10_train', download=True, train=True, transform=transforms.ToTensor())
cifar_test = datasets.CIFAR10(root='data/cifar_10_test', download=True, train=False, transform=transforms.ToTensor())

Files already downloaded and verified
Files already downloaded and verified


# MNIST

In [3]:
mnist_train = datasets.MNIST(root='data/mnist_train', download=True, train=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST(root='data/mnist_test', download=True, train=False, transform=transforms.ToTensor())

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


# Fashion-MNIST

In [5]:
fashion_train = datasets.FashionMNIST(root='data/mnist_fashion_train', download=True, train=True, transform=transforms.ToTensor())
fashion_test = datasets.FashionMNIST(root='data/mnist_fashion_test', download=True, train=False, transform=transforms.ToTensor())

# Suite
Il faudrait maintenant faire en sorte que nos données soient enregistrés comme des Numpy arrays. Pourquoi ? Afin de les récupérer dans un format générique, à la fois pour PyTorch et pour TensorFlow.

Pour se faire, nous pouvons directement récupérer chacune des valeurs, créer une liste et à la fin les stacker dans un tenseur et les convertir en array et sauvegarder ce fichier sous format npy.

In [31]:
# On factorise le code.
def create_final_tensor(dataset, train=True):
    list_imgs = []
    list_lbls = []

    for i in range(len(dataset)):
        img, lbl = dataset[i]
        list_imgs.append(img)
        list_lbls.append(lbl)
        print(f"\rStacking {'train' if train else 'test'} : {(i+1) / len(dataset) * 100:.2f}%", end='')
    
    tensor_imgs = torch.vstack(list_imgs)
    tensor_lbls = torch.tensor(list_lbls)
    print('')

    return tensor_imgs, tensor_lbls

In [32]:
print('CIFAR-10')
cifar_img_train, cifar_lbl_train = create_final_tensor(cifar_train, train=True)
cifar_img_test, cifar_lbl_test = create_final_tensor(cifar_test, train=False)
print('Fin')

print('MNIST')
mnist_img_train, mnist_lbl_train = create_final_tensor(mnist_train, train=True)
mnist_img_test, mnist_lbl_test = create_final_tensor(mnist_test, train=False)
print('Fin')

print('Fashion MNIST')
fashion_img_train, fashion_lbl_train = create_final_tensor(fashion_train, train=True)
fashion_img_test, fashion_lbl_test = create_final_tensor(fashion_test, train=False)
print('Fin')

CIFAR-10
Stacking train : 100.00%
Stacking test : 100.00%
Fin
MNIST
Stacking train : 100.00%
Stacking test : 100.00%
Fin
Fashion MNIST
Stacking train : 100.00%
Stacking test : 100.00%
Fin


# Vérification des outpus et conversion en np.array

In [33]:
cifar_img_train.shape, cifar_lbl_train.shape, cifar_img_test.shape, cifar_lbl_test.shape

(torch.Size([150000, 32, 32]),
 torch.Size([50000]),
 torch.Size([30000, 32, 32]),
 torch.Size([10000]))

In [34]:
mnist_img_train.shape, mnist_lbl_train.shape, mnist_img_test.shape, mnist_lbl_test.shape

(torch.Size([60000, 28, 28]),
 torch.Size([60000]),
 torch.Size([10000, 28, 28]),
 torch.Size([10000]))

In [35]:
fashion_img_train.shape, fashion_lbl_train.shape, fashion_img_test.shape, fashion_lbl_test.shape

(torch.Size([60000, 28, 28]),
 torch.Size([60000]),
 torch.Size([10000, 28, 28]),
 torch.Size([10000]))

Tout est ok.

In [43]:
# CIFAR-10
np.save('data/cifar_images_train.npy', cifar_img_train.numpy())
np.save('data/cifar_labels_train.npy', cifar_lbl_train.numpy())
np.save('data/cifar_images_test.npy', cifar_img_test.numpy())
np.save('data/cifar_labels_test.npy', cifar_lbl_test.numpy())

# MNIST
np.save('data/mnist_images_train.npy', mnist_img_train.numpy())
np.save('data/mnist_labels_train.npy', mnist_lbl_train.numpy())
np.save('data/mnist_images_test.npy', mnist_img_test.numpy())
np.save('data/mnist_labels_test.npy', mnist_lbl_test.numpy())

# Fashion MNIST
np.save('data/fashion_images_train.npy', fashion_img_train.numpy())
np.save('data/fashion_labels_train.npy', fashion_lbl_train.numpy())
np.save('data/fashion_images_test.npy', fashion_img_test.numpy())
np.save('data/fashion_labels_test.npy', fashion_lbl_test.numpy())