# Pretraitement des données


In [4]:
import tensorflow as tf
import tensorflow.keras
import numpy as np
from matplotlib import pyplot as plt

import random

In [2]:
# Chargement du jeu de données
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
assert x_train.shape == (50000, 32, 32, 3)
assert x_test.shape == (10000, 32, 32, 3)
assert y_train.shape == (50000, 1)
assert y_test.shape == (10000, 1)

In [5]:
image = random.choice(x_train)

print(f"Nombre d'images : {len(x_train)}")
print(f"Forme d'une image : {image.shape}")
print(f"Un pixel : {image[0][0]}")
print(f"Frome d'un pixel : {image[0][0].shape}")

Nombre d'images : 50000
Forme d'une image : (32, 32, 3)
Un pixel : [ 92 121  52]
Frome d'un pixel : (3,)


In [10]:
# Normalizatoion
tmp = x_train.astype("float32") / np.amax(x_train)

In [11]:
image = random.choice(tmp)

print(f"Nombre d'images : {len(x_train)}")
print(f"Forme d'une image : {image.shape}")
print(f"Un pixel : {image[0][0]}")
print(f"Frome d'un pixel : {image[0][0].shape}")

Nombre d'images : 50000
Forme d'une image : (32, 32, 3)
Un pixel : [0.61960787 0.25882354 0.03921569]
Frome d'un pixel : (3,)


In [14]:
label = random.choice(y_train)

print("Label: ", label)
print(f"Nombre de label: {len(y_train)}")
print(f"Forme des labels : {y_train.shape}")

Label:  [7]
Nombre de label: 50000
Forme des labels : (50000, 1)


In [19]:
# one hot encoding
unique_y = np.sort(np.unique(y_train))
y_tmp = np.array(list(map(lambda x: [1 if x == k else 0 for k in unique_y], y_train)))

In [20]:
label = random.choice(y_tmp)

print("Label: ", label)
print(f"Nombre de label: {len(y_train)}")
print(f"Forme des labels : {y_train.shape}")

Label:  [0 1 0 0 0 0 0 0 0 0]
Nombre de label: 50000
Forme des labels : (50000, 1)


In [21]:
def normalize_dataset(x_train, y_train):
    # Scale images to the [0, 1] range
    x_train = x_train.astype("float32") / np.amax(x_train)
    # Flatten the images.
    #x_train = x_train.reshape((-1, len(x_train[0]) * len(x_train[0][0])))
    # One hot encoding
    unique_y = np.sort(np.unique(y_train))
    y_train = np.array(list(map(lambda x:  [1 if x == k else 0 for k in unique_y], y_train)))

    return x_train , y_train