# Tutorial V: Transfer Learning

<p>
Bern Winter School on Machine Learning, 2024<br>
Prepared by Mykhailo Vladymyrov and Matthew Vowels.
</p>

This work is licensed under a <a href="http://creativecommons.org/licenses/by-nc-sa/4.0/">Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License</a>.

In this session we will use the pretrained Inception model to build own image classifier. We will aslo learn how to save our trained models.

## 1. Load necessary libraries

In [None]:
colab = True # set to True is using google colab

In [20]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" 

import sys
import shutil
import tarfile

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.hub import download_url_to_file
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter

from PIL import Image

%load_ext tensorboard

#physical_devices = tf.config.experimental.list_physical_devices('GPU')
#tf.config.experimental.set_memory_growth(physical_devices[0], True)

### Download libraries

In [10]:
#if colab:
path = os.path.join(os.path.abspath('.')+'material.tgz')
url = 'https://github.com/neworldemancer/BMLWS/raw/main/tut_files/tpub0320.tgz'
# p = tf.keras.utils.get_file(path, url)
# Download compressed file with torch utils

download_url_to_file(url=url, dst=path)

tar = tarfile.open(path, "r:gz")
tar.extractall()
tar.close()

100%|██████████| 77.4M/77.4M [01:22<00:00, 988kB/s] 


AssertionError: 

## 2. Transfer learning

We load first an inception model with pretrained weights, without the final classifier:

In [15]:
print(torch.hub.help('pytorch/vision:v0.7.0', 'inception_v3'))


    Inception v3 model architecture from
    `Rethinking the Inception Architecture for Computer Vision <http://arxiv.org/abs/1512.00567>`_.

    .. note::
        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
        N x 3 x 299 x 299, so ensure your images are sized accordingly.

    Args:
        weights (:class:`~torchvision.models.Inception_V3_Weights`, optional): The
            pretrained weights for the model. See
            :class:`~torchvision.models.Inception_V3_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.Inception3``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/inception.py>`

Using cache found in C:\Users\newor/.cache\torch\hub\pytorch_vision_v0.7.0


In [5]:
base_model = torch.hub.load('pytorch/vision:v0.7.0', 'inception_v3')

Using cache found in C:\Users\newor/.cache\torch\hub\pytorch_vision_v0.7.0
Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to C:\Users\newor/.cache\torch\hub\checkpoints\inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [03:42<00:00, 489kB/s]  


In [11]:
dir(base_model)

['AuxLogits',
 'Conv2d_1a_3x3',
 'Conv2d_2a_3x3',
 'Conv2d_2b_3x3',
 'Conv2d_3b_1x1',
 'Conv2d_4a_3x3',
 'Mixed_5b',
 'Mixed_5c',
 'Mixed_5d',
 'Mixed_6a',
 'Mixed_6b',
 'Mixed_6c',
 'Mixed_6d',
 'Mixed_6e',
 'Mixed_7a',
 'Mixed_7b',
 'Mixed_7c',
 'T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_backward_pre_hooks',
 '_buffers',
 '_call_impl',
 '_compiled_call_impl',
 '_forward',
 '_forward_hooks',
 '_forward_hooks_always_called',
 '_forward_hooks_with_kwargs',
 '_forward_pre_hooks',
 '_forward_pre_hooks_with_kwargs',
 '_get_backward_hooks

In [16]:
base_model

Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [21]:
writer = SummaryWriter('runs/inspect_inception_v3')
xs = torch.zeros(1, 3, 299, 299)
writer.add_graph(base_model, xs)
writer.close()

In [22]:
%tensorboard --logdir=runs/inspect_inception_v3

Launching TensorBoard...

And build a new model using it:

In [24]:
def fc_head(in_features, n_classes):
    return nn.Sequential(
        nn.Linear(in_features, 64),
        nn.Sigmoid(),
        nn.Linear(64, n_classes)
    )

In [25]:
# freeze the parameters of the base model
for param in base_model.parameters():
    param.requires_grad = False

# replace existing fc layer with our 2-layer classication head
in_features = base_model.fc.in_features
n_classes = 2
base_model.fc = fc_head(in_features, n_classes)

In [None]:
# Loss function
criterion = nn.CrossEntropyLoss()
# Optimizer (e.g., Adam)
optimizer = optim.Adam(base_model.parameters(), lr=0.001)

## 3. Dataset

The Inception network is trained on natural images: thigs we see around everyday, like sky, flowers, animals, building, cars.
It builds an hierarchy of features, to describe what it sees. 
This features can be used to train fast on different classes of objects. E.g. [here](https://www.tensorflow.org/tutorials/image_retraining) are more examples on transfer learning.

Here you will see that these features can be even used to detect thngs very different from natural images. Namely we will try to use it to distinguish German text from Italian. We will use 100 samples, taken from 5 German and 5 Italian books, 10 samples each.

In [None]:
def prepare_training_img(img, crop=True, resize=True, img_size=(256, 256)):
    if img.dtype != np.uint8:
        img *= 255.0

    if crop:
        crop = np.min(img.shape[:2])
        r = (img.shape[0] - crop) // 2
        c = (img.shape[1] - crop) // 2
        cropped = img[r: r + crop, c: c + crop]
    else:
        cropped = img

    if resize:
        img_pil = Image.fromarray(cropped)
        img_pil = img_pil.resize(img_size, Image.ANTIALIAS)
        resized = np.array(img_pil.convert('RGB'))
    else:
        resized = cropped.copy()

    if resized.ndim == 2:
        resized = resized[..., np.newaxis]
    if resized.shape[2] == 4:
        resized = resized[..., :3]
    if resized.shape[2] == 1:
        resized = np.concatenate((resized, resized, resized), axis=2)

    resized = resized.astype(np.float32)

    img_preproc = tf.keras.applications.inception_v3.preprocess_input(resized)
    # subtract imagenet mean
    return img_preproc

def training_img_to_display(img):
  return (img+1)/2

In [None]:
text_label = ['German', 'Italian']

In [None]:
labels0 = []
images0 = []
labels1 = []
images1 = []

#German
for book in range(1,6):
    for sample in range(1,11):
        img = plt.imread('ML3/de/%d_%d.jpg'%(book, sample))
        assert(img.shape[0]>=256 and img.shape[1]>=256 and len(img.shape)==3)
        images0.append(prepare_training_img(img))
        labels0.append([1,0])
for book in range(1,6):
    for sample in range(1,11):
        img = plt.imread('ML3/it/%d_%d.jpg'%(book, sample))
        assert(img.shape[0]>=256 and img.shape[1]>=256 and len(img.shape)==3)
        images1.append(prepare_training_img(img))
        labels1.append([0,1])
        
idx = np.random.permutation(len(labels0))
labels0 = np.array(labels0)[idx]
images0 = np.array(images0)[idx]
labels1 = np.array(labels1)[idx]
images1 = np.array(images1)[idx]

In [None]:
#We will take 80% from each for training and 20 for validation
n_half = images0.shape[0]
n_train_half = n_half*80//100
n_train = n_train_half*2

x_train = np.concatenate([images0[:n_train_half], images1[:n_train_half]])
y_train = np.concatenate([labels0[:n_train_half], labels1[:n_train_half]])

x_valid = np.concatenate([images0[n_train_half:], images1[n_train_half:]])
y_valid = np.concatenate([labels0[n_train_half:], labels1[n_train_half:]])

Lets see a sample:

In [None]:
_, axs = plt.subplots(1, 2, figsize=(10,10))
img_d = training_img_to_display(images0[25])
axs[0].imshow(img_d)
axs[0].grid(False)
img_d = training_img_to_display(images1[25])
axs[1].imshow(img_d)
axs[1].grid(False)
plt.show()

## 4. Training

The training is similar to what we we saw previously.

Since Inception model is big, this will take a while, even we use GPUs. On your laptop CPU this would probably take ~15 times longer. And we are not training the whole Inception! We have just small thing on top + a very small dataset!

We will use callback to save checkpoints on each iteration of training. They contain values of trainable variables.

In [None]:
save_path = 'save/text_{epoch}.ckpt'

batch_size=10
n_itr_per_epoch = len(x_train) // batch_size
save_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_path,
                                                   save_weights_only=True,
                                                   save_freq=1 * n_itr_per_epoch) # save every 1 epochs

hist = model.fit(x_train, y_train,
                 epochs=150, batch_size=batch_size, 
                 validation_data=(x_valid, y_valid),
                 callbacks=[save_callback])

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(10,5))
axs[0].plot(hist.epoch, hist.history['loss'])
axs[0].plot(hist.epoch, hist.history['val_loss'])
axs[0].legend(('training loss', 'validation loss'), loc='lower right')
axs[1].plot(hist.epoch, hist.history['accuracy'])
axs[1].plot(hist.epoch, hist.history['val_accuracy'])

axs[1].legend(('training accuracy', 'validation accuracy'), loc='lower right')
plt.show()

We see that training accuracy hits 100% quickly. Why do you think it happens? Consider that loss keeps decreasing.
Also on such a small dataset our model overfits.

## 5. Load trained variables

If we have the model already created we can easily load the saved training variables values from a checkpoint:

In [None]:
#in the beginning:
model.load_weights('save/text_5.ckpt')
model.evaluate(images1[:1],  labels1[:1], verbose=2)

#in the end:
model.load_weights('save/text_150.ckpt')
model.evaluate(images1[:1],  labels1[:1], verbose=2)

## 6. Saving for inference.

In tf2 it's easy to save a model for inference:

In [None]:
tf.saved_model.save(model, "inference_model/")

## 7. Inference

In [None]:
mod = tf.saved_model.load('inference_model')
func = mod.signatures["serving_default"]

In [None]:
output_name = model.output_names[0]  # single output
print(output_name)

In [None]:
res = func(tf.constant(images1[:1]))[output_name]
print(res)

Or we can make a nice wrapper:

In [None]:
class Inferer:
    def __init__(self, model_path, output_name):
        self.mod = tf.saved_model.load(model_path)
        self.func = self.mod.signatures["serving_default"]
        self.output_name = output_name
        self.class_names = np.array(['german', 'italian'])
        self.max_len = 64

    def infere_class_batch(self, inputs):
        probabilities = self.func(tf.constant(inputs))[self.output_name].numpy()
        classes = np.argmax(probabilities, axis=1)
        probs = probabilities[np.arange(len(classes)), classes]
        return classes, probs

    def infere_class(self, inputs):
        n = len(inputs)
        if n > self.max_len:
            classes = []
            probs = []
            for i in range( (n+self.max_len-1) // self.max_len):
                batch = inputs[i* self.max_len : (i+1)* self.max_len]
                batch_classes, batch_probs = self.infere_class_batch(batch)
                classes.append(batch_classes)
                probs.append(batch_probs)
            classes = np.concatenate(classes)
            probs = np.concatenate(probs)
        else:
            classes, probs = self.infere_class_batch(inputs)
  
        return classes, probs

    def infere(self, inputs, prob=False):
        classes, probs = self.infere_class(inputs)
        cn = self.class_names[classes]
        return (cn, probs) if prob else cn

In [None]:
inf = Inferer('inference_model', output_name)

In [None]:
inf.infere(images0)

In [None]:
images_all = np.concatenate([images0, images1])

In [None]:
inf.infere(images_all, prob=True) # ouput class confidence probability

## 8. Improving the results

Often, as in this sample we don't have anough labeled data in hand. We need to use it as efficient as possible.
One way to do it is to aply training data augmentation: we can slightly distort it, e.g. rescale, to effectively multiply the dataset.

We will generate rescaled images, minimum - to have smaller dimension equal 256, maximum - 130%. Let's define a function which will do this job:

In [None]:
def get_random_scaled_img(file, minsize = 256, scalemax=1.3):
    im = Image.open(file)
    w, h = im.size
    # get minimal possible size
    scalemin =float(minsize) / min(w,h)
    # get a rescale factor from a uniform distribution.
    scale = scalemin + np.random.rand() * (scalemax - scalemin)
    w1 = int(max(minsize, scale*w))
    h1 = int(max(minsize, scale*h))
    
    #rescale with smoothing
    im1 = im.resize((w1,h1), Image.ANTIALIAS)
    #get numpy array from the PIL Image
    img_arr = np.array(im1.convert('RGB'))

    #crop to 256x256, preventing further resize by prepare_training_img
    r = (img_arr.shape[0] - minsize) // 2
    c = (img_arr.shape[1] - minsize) // 2
    img_arr = img_arr[r:r+minsize,c:c+minsize]

    return img_arr

Lets check rescaled images.

In [None]:
n_smpl=2
scaled_imgs=[get_random_scaled_img('ML3/de/%d_%d.jpg'%(1, 1)) for i in range(n_smpl**2)]
fig, ax = plt.subplots(n_smpl, n_smpl, figsize=(n_smpl*4, n_smpl*4))
for row in range(n_smpl):
    for col in range(n_smpl):
        ax[col, row].imshow(scaled_imgs[row*n_smpl+col])
        ax[col, row].grid(False)

Read again images, now generating 5 rescaled from each one.

In [None]:
labels0 = []
images0 = []
labels1 = []
images1 = []

mult = 5
#German
for book in range(1,6):
    for sample in range(1,11):
        for itr in range(mult):
            img = get_random_scaled_img('ML3/de/%d_%d.jpg'%(book, sample))
            assert(img.shape[0]>=256 and img.shape[1]>=256 and len(img.shape)==3)
            images0.append(prepare_training_img(img))
            labels0.append([1,0])
#Italian
for book in range(1,6):
    for sample in range(1,11):
        for itr in range(mult):
            img = get_random_scaled_img('ML3/it/%d_%d.jpg'%(book, sample))
            assert(img.shape[0]>=256 and img.shape[1]>=256 and len(img.shape)==3)
            images1.append(prepare_training_img(img))
            labels1.append([0,1])
        
idx = np.random.permutation(len(labels0))
labels0 = np.array(labels0)[idx]
images0 = np.array(images0)[idx]
labels1 = np.array(labels1)[idx]
images1 = np.array(images1)[idx]

In [None]:
#We will take 80% from each for training and 20 for validation
n_half = images0.shape[0]
n_train_half = n_half*80//100
n_train = n_train_half*2

x_train = np.concatenate([images0[:n_train_half], images1[:n_train_half]])
y_train = np.concatenate([labels0[:n_train_half], labels1[:n_train_half]])

x_valid = np.concatenate([images0[n_train_half:], images1[n_train_half:]])
y_valid = np.concatenate([labels0[n_train_half:], labels1[n_train_half:]])

And finally do training again, same way. Just now we change the number of epochs: before we had 150, but now that we have 5 times more training data we'll do 60. While 60 > 150/5, it looks like it takes a bit more time to converge.
We use the same graph as before, `g2`, the one we can train.

In [None]:
#We will take 80% from each for training and 20 for validation
n_half = images0.shape[0]
n_train_half = n_half*80//100
n_train = n_train_half*2

x_train = np.concatenate([images0[:n_train_half], images1[:n_train_half]])
y_train = np.concatenate([labels0[:n_train_half], labels1[:n_train_half]])

x_valid = np.concatenate([images0[n_train_half:], images1[n_train_half:]])
y_valid = np.concatenate([labels0[n_train_half:], labels1[n_train_half:]])

In [None]:
x = tf.keras.layers.Input(shape=(256,256,3), dtype=tf.float32)

base_model.trainable = False
base_out = base_model(x)

base_out_f = tf.keras.layers.GlobalAveragePooling2D()(base_out)

h1 = tf.keras.layers.Dense(64, activation='sigmoid')(base_out_f)
h2 = tf.keras.layers.Dense(2, activation='softmax')(h1)

model_aug = tf.keras.Model(x, h2)

model_aug.compile(optimizer=tf.keras.optimizers.Adam(0.0005,) ,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
save_path = 'save/text_augmented_{epoch}.ckpt'

batch_size=10
n_itr_per_epoch = len(x_train) // batch_size
save_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_path,
                                                   save_weights_only=True,
                                                   save_freq=1 * n_itr_per_epoch) # save every 1 epochs
                                                   
hist = model_aug.fit(x_train, y_train,
                 epochs=60, batch_size=batch_size, 
                 validation_data=(x_valid, y_valid),
                 callbacks=[save_callback])

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(10,5))
axs[0].plot(hist.epoch, hist.history['loss'])
axs[0].plot(hist.epoch, hist.history['val_loss'])
axs[0].legend(('training loss', 'validation loss'), loc='lower right')
axs[1].plot(hist.epoch, hist.history['accuracy'])
axs[1].plot(hist.epoch, hist.history['val_accuracy'])

axs[1].legend(('training accuracy', 'validation accuracy'), loc='lower right')
plt.show()

In [None]:
# model_aug.load_weights('save/text_augmented_23.ckpt')
tf.saved_model.save(model_aug, "inference_model_aug/")

We had a REEEALLY small dataset for such a complicated task. Does it really generalize? mb it just memorizes all the images we fed into it? Lets perform a test. `w1.PNG` and `w2.PNG` are text screenshots from wikipedia in [Italian](https://it.wikipedia.org/wiki/Apprendimento_automatico) and [German](https://de.wikipedia.org/wiki/Maschinelles_Lernen).

In [None]:
# load images
im_wiki_1 = plt.imread('ML3/w1.jpg')
im_wiki_2 = plt.imread('ML3/w2.jpg')

# crop/covert for proper color range
im_wiki_1_p = prepare_training_img(im_wiki_1)[np.newaxis]
im_wiki_2_p = prepare_training_img(im_wiki_2)[np.newaxis]

In [None]:
output_name = model_aug.output_names[0]
inf = Inferer('inference_model_aug', output_name)

In [None]:
class_name, prob = inf.infere(np.concatenate([im_wiki_1_p, im_wiki_2_p]), prob=True)


print('probabilities for w1:', prob[0], 'detected language:', class_name[0])
print('probabilities for w2:', prob[1], 'detected language:', class_name[1])

# Show image crops
plt.imshow( training_img_to_display(im_wiki_1_p[0]))
plt.show()
plt.imshow( training_img_to_display(im_wiki_2_p[0]))
plt.show()



## 9. Excercises

Do in 4 groups (35 min), in the end present results (2 min/group)

### Option 1.

There is a serious problem in the example above: the training and validation datasets are not independent. We generated 5 randomly scaled images from each initial image. With high probability from 5 images (generated from same initial one!) some will end up im the training and some in validation datasets. Since they are generated from the same initial ones, they are not fully independent. This compromises evaluation of model performance, leading to an overestimate of the performance.

1. Modify the generation of the training and validation datasets to fulfil requirenment of independance.
2. Check how validation accuracy and loss changes

Do not look solution to Option 1 ^_^

### Option 2.

In the situation when the model is likely to overfit, final performance would especially depend on the learning rate.

* Plot the best validation accuracy vs learning rate.
* Plot the number of epochs untill the best validation accuracy vs learning rate.

Test learning rates within +/- 1.5 orders of magnitude, i.e. from 30 times smaller to 30 times larger learning rates, than the current one.

Get 3 replicates.

Use test/validation split from solution to Exercise 1.

### Option 3.

In the situation when the model is likely to overfit, final performance would especially depend on the model's architecture.

* Plot the best validation accuracy vs width of the first dense layer.
* Plot the number of epochs untill the best validation accuracy vs width.

Test number of features in the first dense layer between 8 and 4096.

Get 3 replicates.

Use test/validation split from solution to Exercise 1.

### Option 4.

We tested one model for the backbone of trnasfer learning. Try other ones (at least 3): https://www.tensorflow.org/api_docs/python/tf/keras/applications.


* Plot the best validation accuracy vs model.
* Plot the number of epochs until the best validation accuracy vs model.
* Does any model generalize to wiki data?

Run 3 replicates for each model. Input size might vary for different models, adjust the crop size accordingly in parameters to `prepare_training_img`.

Use test/validation split from solution to Exercise 1.


## 10. Homework


For more information on transfer learing look https://www.tensorflow.org/guide/keras/transfer_learning

So far we scaled images as a whole.
Which other augmentations would make sence for the text data?
Check https://www.tensorflow.org/tutorials/images/data_augmentation

## 11. Solutions

### Option 1

To prevent same rescaled versions of the same image ending up in both training and validation sets, we could split the dataset first

In [None]:
np.random.seed(42)  # ensure all groups and runs to have the same training/validation split.

# list all samples
de_book_sample = [(book, sample) for book in range(1,6) for sample in range(1,11)]
it_book_sample = [(book, sample) for book in range(1,6) for sample in range(1,11)]

de_book_sample = np.array(de_book_sample)
it_book_sample = np.array(it_book_sample)

# get array of permultation indexes
n_half = len(de_book_sample)  # size of both datasets is identical
de_idx = np.random.permutation(n_half)
it_idx = np.random.permutation(n_half)

# shuffle list of samples
de_book_sample = de_book_sample[de_idx]
it_book_sample = it_book_sample[it_idx]

# split training and validation
# We will take 80% from each for training and 20 for validation
n_train_half = n_half*80//100
n_train = n_train_half*2

de_book_sample_train = de_book_sample[:n_train_half] # first 80 %
de_book_sample_valid = de_book_sample[n_train_half:] # remaining part

it_book_sample_train = it_book_sample[:n_train_half] # first 80 %
it_book_sample_valid = it_book_sample[n_train_half:] # remaining part

x_train = []
y_train = []

x_valid = []
y_valid = []

mult = 10
for itr in range(mult):
    # each pair [book,sample] goes to either training or validation set, not both
    # German training
    for book, sample in de_book_sample_train:
        img = get_random_scaled_img('ML3/de/%d_%d.jpg'%(book, sample), scalemax=1.5)
        assert(img.shape[0]>=256 and img.shape[1]>=256 and len(img.shape)==3)
        x_train.append(prepare_training_img(img))
        y_train.append([1,0])
  
    # Italian training
    for book, sample in it_book_sample_train:
        img = get_random_scaled_img('ML3/it/%d_%d.jpg'%(book, sample), scalemax=1.5)
        assert(img.shape[0]>=256 and img.shape[1]>=256 and len(img.shape)==3)
        x_train.append(prepare_training_img(img))
        y_train.append([0,1])
  
    # German validation
    for book, sample in de_book_sample_valid:
        img = get_random_scaled_img('ML3/de/%d_%d.jpg'%(book, sample), scalemax=1.5)
        assert(img.shape[0]>=256 and img.shape[1]>=256 and len(img.shape)==3)
        x_valid.append(prepare_training_img(img))
        y_valid.append([1,0])
  
    # Italian validation
    for book, sample in it_book_sample_valid:
        img = get_random_scaled_img('ML3/it/%d_%d.jpg'%(book, sample), scalemax=1.5)
        assert(img.shape[0]>=256 and img.shape[1]>=256 and len(img.shape)==3)
        x_valid.append(prepare_training_img(img))
        y_valid.append([0,1])


x_train = np.array(x_train)
y_train = np.array(y_train)

x_valid = np.array(x_valid)
y_valid = np.array(y_valid)