<a href="https://colab.research.google.com/github/qamtam/Hands-on-machine-learning/blob/main/CH13_SET10_with_notes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
    !pip install -q -U tfx==0.21.2
    print("You can safely ignore the package incompatibility errors.")
except Exception:
    pass

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

# Common imports
import numpy as np
import os


[K     |████████████████████████████████| 1.1MB 2.9MB/s 
[K     |████████████████████████████████| 1.5MB 16.4MB/s 
[K     |████████████████████████████████| 112kB 26.8MB/s 
[K     |████████████████████████████████| 59.2MB 67kB/s 
[K     |████████████████████████████████| 153kB 47.3MB/s 
[K     |████████████████████████████████| 1.9MB 38.9MB/s 
[K     |████████████████████████████████| 245kB 53.3MB/s 
[K     |████████████████████████████████| 3.0MB 43.1MB/s 
[K     |████████████████████████████████| 4.9MB 48.5MB/s 
[K     |████████████████████████████████| 2.4MB 41.3MB/s 
[K     |████████████████████████████████| 276kB 50.0MB/s 
[K     |████████████████████████████████| 204kB 49.1MB/s 
[K     |████████████████████████████████| 153kB 49.9MB/s 
[K     |████████████████████████████████| 225kB 51.0MB/s 
[K     |████████████████████████████████| 51kB 6.2MB/s 
[K     |████████████████████████████████| 61kB 6.2MB/s 
[K     |████████████████████████████████| 1.2MB 36.6MB/s 
[K

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow import data
BytesList = tf.train.BytesList
FloatList = tf.train.FloatList
Int64List = tf.train.Int64List
Feature = tf.train.Feature
Features = tf.train.Features
Example = tf.train.Example
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [None]:
def create_example(image, label):
    im = tf.io.serialize_tensor(image)
  #  print(im.numpy()) <-- jeden długi płaski string
    return Example(
        features = Features(
            feature={
                "image": Feature(bytes_list=BytesList(value=[im.numpy()])),
                "label": Feature(int64_list=Int64List(value=[label]))
            }
        )
    ) #not serialized yet, it is just an object

In [None]:
#try to read
feature_description = {
    "image": tf.io.FixedLenFeature([], tf.string, default_value=""), #shape płaski, bo czytamy z płaskiego stringa, reshape zrobimy później
    "label": tf.io.FixedLenFeature([], tf.int64, default_value=0),
}

In [None]:
# ta funkcja zapisze dany dataset do kilku plików typu tfrecord

# contextlib exitstack jest klasą kltóra pilnuje, żeby wszystkie procesy zapisywania domknęły się poprawnie
from contextlib import ExitStack
def write_tfrecords(name, dataset, n_shards=10):
  paths  = ["{}.tfrecord-{:05d}-of-{:05d}".format(name, index, n_shards) for index in range(n_shards)] #name.tfrecord-00000-of-00010
  with ExitStack() as stack:
    writers = [stack.enter_context(tf.io.TFRecordWriter(path)) for path in paths] #entercontext to tak jakby with tf.io. ... . as f
    for index, (image, label) in dataset.enumerate():
      shard = index % n_shards
      example= create_example(image, label)
      writers[shard].write(example.SerializeToString())
  return paths

train_set = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(X_train))
valid_set = tf.data.Dataset.from_tensor_slices((X_valid, y_valid))
test_set = tf.data.Dataset.from_tensor_slices((X_test, y_test))
train_filepaths = write_tfrecords("my_fashion_mnist.train", train_set)
valid_filepaths = write_tfrecords("my_fashion_mnist.valid", valid_set)
test_filepaths = write_tfrecords("my_fashion_mnist.test", test_set)

In [None]:
train_filepaths

['my_fashion_mnist.train.tfrecord-00000-of-00010',
 'my_fashion_mnist.train.tfrecord-00001-of-00010',
 'my_fashion_mnist.train.tfrecord-00002-of-00010',
 'my_fashion_mnist.train.tfrecord-00003-of-00010',
 'my_fashion_mnist.train.tfrecord-00004-of-00010',
 'my_fashion_mnist.train.tfrecord-00005-of-00010',
 'my_fashion_mnist.train.tfrecord-00006-of-00010',
 'my_fashion_mnist.train.tfrecord-00007-of-00010',
 'my_fashion_mnist.train.tfrecord-00008-of-00010',
 'my_fashion_mnist.train.tfrecord-00009-of-00010']

In [None]:
# preprocesowanie tu to odczytanie zakodowanego i zserializowanego Example
def preprocess(tfrecord):
  feature_description = {
    "image": tf.io.FixedLenFeature([], tf.string, default_value=""), #shape płaski, bo czytamy z płaskiego stringa, reshape zrobimy później
    "label": tf.io.FixedLenFeature([], tf.int64, default_value=-1),
  }
  example = tf.io.parse_example(tfrecord, feature_description) # tu już mamy dwa tensory
  #{'image': <tf.Tensor 'ParseExample/ParseExampleV2:0' shape=() dtype=string>, 'label': <tf.Tensor 'ParseExample/ParseExampleV2:1' shape=() dtype=int64>}
  image = tf.io.parse_tensor(example["image"], out_type=tf.uint8) #przerabiamy zestringowany tensor na tensor typu uint8 
    #image = tf.io.decode_jpeg(example["image"])
  image = tf.reshape(image, shape=[28, 28])
  return image, example["label"]

def mnist_dataset(filepaths, n_read_threads=5, shuffle_buffer_size=None,
                  n_parse_threads=5, batch_size=32, cache=True):
    dataset = tf.data.TFRecordDataset(filepaths,
                                      num_parallel_reads=n_read_threads)
    if cache: #wciśnij do ramu
        dataset = dataset.cache()
    if shuffle_buffer_size: # przemieszaj, jeśli jest potrzeba
        dataset = dataset.shuffle(shuffle_buffer_size)
    dataset = dataset.map(preprocess, num_parallel_calls=n_parse_threads) #rozpakuj zapakowane example
    dataset = dataset.batch(batch_size)
    return dataset.prefetch(1)
train_set = mnist_dataset(train_filepaths, shuffle_buffer_size=60000)
valid_set = mnist_dataset(train_filepaths)
test_set = mnist_dataset(train_filepaths)
train_set_micro = mnist_dataset(train_filepaths, batch_size=2)

In [None]:
sample = train_set_micro.take(1).map(lambda image, label: image) #zostaje samo zdjęcie
for x in sample:
  print(x.shape)
#sample_numpy = sample.as_numpy_iterator()
#x = sample_numpy
import numpy
numpy.set_printoptions(threshold=sys.maxsize)
numpy.set_printoptions(linewidth=np.inf)
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import layers
normalizer = preprocessing.Normalization()
normalizer.adapt(sample)
print(normalizer.mean)
normalizer2 = preprocessing.Normalization(axis=(1,2))
normalizer2.adapt(sample)
norma_mean = normalizer2.mean.numpy() # średnia w batchu po każdej komórce
norma0_mean = normalizer.mean.numpy() # średnia w batchu po całych kolumnach

print(normalizer2.mean.numpy())
print(numpy.sum(normalizer2.mean[:,1].numpy())/28) ## przeobiona średnia z komórkowych na jedną średnią kolumnnową
for x in sample:
  print(x)
  mean0 = np.mean(x, axis=0)
  mean1 = np.mean(x, axis=1)
  mean2 = np.mean(x, axis=2)
  mean01 = np.mean(x, axis=(0,1)) # też średnia po kolumnach (czyli z całej kolumny 1 tabeli + całej kolumny 2 tabeli)
  # wychodzi średnia z 28 liczb
  mean = np.mean(x)
 #   print(mean) średnia ze wszystkiego, jedna liczbaa
  #print(mean0)
  print(mean0-norma_mean)
  print("####")
  print(norma0_mean-mean01)
  print(norma0_mean)
  print(mean01)

(2, 28, 28)
<tf.Variable 'mean:0' shape=(28,) dtype=float32, numpy=array([10.660714 , 17.928572 , 14.178572 , 41.607143 , 63.857143 , 56.75     , 61.357143 , 62.910713 , 63.589287 , 60.214287 , 58.232143 , 55.5      , 48.19643  , 47.482143 , 48.875    , 48.30357  , 48.232143 , 51.464287 , 51.92857  , 52.607143 , 46.339287 , 48.67857  , 48.089287 , 20.321428 , 13.625    ,  7.25     ,  4.821429 ,  2.5714285], dtype=float32)>
[[  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0. ]
 [  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0. ]
 [  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0. ]
 [  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.

In [None]:
for x in tf.data.Dataset.__iter__(sample): #how to simply iterate over dataset
  print(np.mean(x, axis=(0)).shape) # po wartościach
  #print(np.mean(x, axis=(0,1)))

(28, 28)


In [None]:
#try standarization
#0 axis is the  batch axis
#so for example in fasion mnist we do have tables (tensors) of shape 32*28*28
#or 32 units * 784 features
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import layers
normalizer = preprocessing.Normalization()
normalizer.adapt(sample)
print(normalizer.mean)
normalizer2 = preprocessing.Normalization(axis=(1,2))
normalizer2.adapt(sample)
print(normalizer2.mean)


<tf.Variable 'mean:0' shape=(28,) dtype=float32, numpy=
array([  2.1149554,   3.794643 ,   6.9162946,  14.970983 ,  27.837053 ,
        47.78125  ,  59.515625 ,  66.97768  ,  73.967636 ,  93.720985 ,
       116.345985 , 120.08259  , 120.737724 , 111.03571  , 106.71429  ,
       114.97321  , 124.54576  , 119.69196  , 115.32701  ,  91.44196  ,
        77.36384  ,  71.69308  ,  61.27902  ,  46.71317  ,  27.46317  ,
        16.015625 ,  11.436384 ,   4.051339 ], dtype=float32)>
<tf.Variable 'mean:0' shape=(28, 28) dtype=float32, numpy=
array([[0.0000000e+00, 3.1250000e-02, 0.0000000e+00, 3.1250000e-02,
        1.2500000e-01, 1.5625000e-01, 1.0000000e+00, 2.9062500e+00,
        5.6250000e+00, 1.9937500e+01, 3.9531250e+01, 6.4687500e+01,
        6.4031250e+01, 5.4843750e+01, 5.3875000e+01, 6.1437500e+01,
        6.2593750e+01, 5.5968750e+01, 2.4500000e+01, 5.2500000e+00,
        2.2500000e+00, 1.8437500e+00, 1.8750000e-01, 1.2500000e-01,
        3.1250000e-02, 0.0000000e+00, 3.1250000e-02, 0

In [None]:

(x_train, y_train), _ = keras.datasets.cifar10.load_data()
print(x_train.shape)
x_train = x_train.reshape((len(x_train), -1))
print(x_train.shape)
input_shape = x_train.shape[1:] #[1:] znaczy wszystkie featury
print(input_shape)
classes = 10

# Create a Normalization layer and set its internal state using the training data
normalizer = preprocessing.Normalization()
normalizer.adapt(x_train)

# Create a model that include the normalization layer
inputs = keras.Input(shape=input_shape)
x = normalizer(inputs)
outputs = layers.Dense(classes, activation="softmax")(x)
model = keras.Model(inputs, outputs)

# Train the model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
model.fit(x_train, y_train)

(50000, 32, 32, 3)
(50000, 3072)
(3072,)


<tensorflow.python.keras.callbacks.History at 0x7f6ad356cd30>

In [None]:
sample_image_batches = train_set.take(100).map(lambda image, label: image)
sample_images = np.concatenate(list(sample_image_batches.as_numpy_iterator()),
                               axis=0).astype(np.float32) # jedna tabela ze wszystkimi dziesięcioma zdjęciami
class Standardization(keras.layers.Layer):
    def adapt(self, data_sample):
        self.means_ = np.mean(data_sample, axis=0, keepdims=True)
        self.stds_ = np.std(data_sample, axis=0, keepdims=True)
        #print(self.means_)
    def call(self, inputs):
        return (inputs - self.means_) / (self.stds_ + keras.backend.epsilon())

standardization = Standardization(input_shape=[28, 28])
standardization.adapt(sample_images)

normalizer = preprocessing.Normalization(axis=(1,2)) #pomijamy osie 1 i 2 --> zostawiamy oś 0
normalizer.adapt(sample_images)

"""
print(normalizer.mean-standardization.means_) #to samo!! ale osie inne
# layers.normalization wymaga by podać osie które NIE będą normalizowane
print(tf.math.sqrt(normalizer.variance)- standardization.stds_)
# też to samo. dlaczego zatem tak różne są

""" 
train_batches = train_set.take(20).map(lambda image, label: image)
train_ =np.concatenate(list(train_batches.as_numpy_iterator()),
                               axis=0).astype(np.float32) #mikro train set


In [None]:
standardization(train_)-normalizer(train_) # wygląda dobrze, wartości praktycznie równe 0

In [None]:


model = keras.models.Sequential([
    standardization,
    keras.layers.Flatten(),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy",
              optimizer="nadam", metrics=["accuracy"])

model2 = keras.models.Sequential([
    normalizer,
    keras.layers.Flatten(),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(10, activation="softmax")
])
model2.compile(loss="sparse_categorical_crossentropy",
              optimizer="nadam", metrics=["accuracy"])


history = model.fit(train_set, epochs=5, validation_data=valid_set,
          callbacks=[])

print("#####")


history2= model2.fit(train_set, epochs=5, validation_data=valid_set,
          callbacks=[])


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
#####
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

class Standardization(keras.layers.Layer):
    def adapt(self, data_sample):
        self.means_ = np.mean(data_sample, axis=0, keepdims=True)
        self.stds_ = np.std(data_sample, axis=0, keepdims=True)
    def call(self, inputs):
        return (inputs - self.means_) / (self.stds_ + keras.backend.epsilon())

standardization = Standardization(input_shape=[28, 28])
# or perhaps soon:
#standardization = keras.layers.Normalization()
from tensorflow.keras.layers.experimental import preprocessing
normalizer = preprocessing.Normalization(axis=(1,2))
sample_image_batches = train_set.take(100).map(lambda image, label: image)
sample_images = np.concatenate(list(sample_image_batches.as_numpy_iterator()),
                               axis=0).astype(np.float32)
standardization.adapt(sample_images)
normalizer.adapt(sample_images)

model = keras.models.Sequential([
    normalizer,
    keras.layers.Flatten(),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy",
              optimizer="nadam", metrics=["accuracy"])

model.fit(train_set, epochs=5, validation_data=valid_set,
          callbacks=[])


model2 = keras.models.Sequential([
    normalizer,
    keras.layers.Flatten(),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(10, activation="softmax")
])
model2.compile(loss="sparse_categorical_crossentropy",
              optimizer="nadam", metrics=["accuracy"])

model2.fit(train_set, epochs=5, validation_data=valid_set,
          callbacks=[])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f6ad371acc0>