# Transfer learning guide(With examples for text and images in Keras and PyTorch)



Click the image below to read the post online.

<a target="_blank" href="https://www.machinelearningnuggets.com/transfer-learning-guide"><img src="https://digitalpress.fra1.cdn.digitaloceanspaces.com/mhujhsj/2022/07/logho-1.png" alt="Open in ML Nuggets"></a>

#### Keras

In [None]:
import tensorflow as tf
IMAGE_SIZE = 224 # define images size
pretrained_model = tf.keras.applications.MobileNetV3Small(
    input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3),
    alpha=1.0,
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax"
)
#
pretrained_model.trainable = False
#summary of the architecture
#pretrained_model.summary()

#### TensorFlow Hub

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
#link to the pre-trained model
mobilenet_v2 ="https://tfhub.dev/google/imagenet/mobilenet_v3_small_100_224/classification/5"
#define the model name you want to acquire
classifier_model = mobilenet_v2

IMAGE_SHAPE = 224

classifier = tf.keras.Sequential([
    hub.KerasLayer(classifier_model, input_shape=(IMAGE_SHAPE, IMAGE_SHAPE, 3))
])
classifier.summary()

In [None]:
classifier.summary()

#### Word Embeddings

##### Glove

In [None]:
# download glove and unzip it in Notebook.
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove*.zip

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
  
x = {'the', 'match', 'score', 'prime',
     'player', 'manager', 'league'}
  
# create the dict.
tokenizer = Tokenizer()
tokenizer.fit_on_texts(x)
  
# number of unique words in dict.
print("Number of unique words in dictionary=", 
      len(tokenizer.word_index))
print("Dictionary is = ", tokenizer.word_index)
def embedding_for_vocab(filepath, word_index,
                        embedding_dim):
    vocab_size = len(word_index) + 1
      
    # Adding again 1 because of reserved 0 index
    embedding_matrix_vocab = np.zeros((vocab_size,
                                       embedding_dim))
  
    with open(filepath, encoding="utf8") as f:
        for line in f:
            word, *vector = line.split()
            if word in word_index:
                idx = word_index[word]
                embedding_matrix_vocab[idx] = np.array(
                    vector, dtype=np.float32)[:embedding_dim]
  
    return embedding_matrix_vocab
  
  
# matrix for vocab: word_index
embedding_dim = 50
embedding_matrix_vocab = embedding_for_vocab(
    'glove.6B.50d.txt', tokenizer.word_index,
  embedding_dim)
  
print("Dense vector for first entry is => ",
      embedding_matrix_vocab[1])

##### Word2Vec

In [None]:
!wget -c "https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz"

In [None]:
!pip install wget

In [None]:
!wget http://vectors.nlpl.eu/repository/20/51.zip


In [None]:
#unzip
!unzip 51.zip

In [None]:
#download the model
!wget http://vectors.nlpl.eu/repository/20/51.zip
#unzip
!unzip 51.zip
#gzip the model for loading
!gzip model.bin

In [None]:
import gensim
from gensim.models import word2vec
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity
EMBEDDING_FILE = 'model.bin.gz'
word_vectors = KeyedVectors.load_word2vec_format(EMBEDDING_FILE, binary=True)
#get most similar words in the word vector
result = word_vectors.most_similar(positive=['player', 'league'], negative=['man'])
most_similar_key, similarity = result[0]  # look at the first match
print(f"{most_similar_key}: {similarity:.4f}")

In [None]:
result = word_vectors.most_similar(positive=['player', 'league'], negative=['man'])
most_similar_key, similarity = result[0]  # look at the first match
print(f"{most_similar_key}: {similarity:.4f}")

##### FastText

In [None]:
!pip install fasttext


In [None]:
!pip install gluonnlp
#mxnet
!pip install mxnet

In [None]:
import gluonnlp as nlp
#create a word embedding instance by calling nlp.embedding.create
fasttext_model = nlp.embedding.create('fasttext', source='wiki.simple')

In [None]:
def tokenizer(source_str, token_delim=' ', seq_delim='\n'):
    import re
    '''Utility function for tokenizing'''
    tokens = filter(None, re.split(token_delim + '|' + seq_delim, source_str))
    return tokens
sentence = "The player scored twice during the match"
counter = nlp.data.count_tokens(tokenizer(sentence))
#create vocabulary
vocab = nlp.Vocab(counter)
#attach embedding
vocab.set_embedding(fasttext_model)
#check the embedding vector

vocab.embedding['player'][:5]

#### Hugging Face

In [None]:
!pip install transformers sentencepiece


In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")

nlp = pipeline("ner", model=model, tokenizer=tokenizer)
sentence = "The player scored twice during the match in Moscow and helped Brendan Rodgers manager win the league"

ner_results = nlp(sentence)
print(ner_results)


#### PyTorch

In [None]:
import torchvision

model_conv = torchvision.models.resnet18(pretrained=True) 

#### Prediction

In [None]:
from keras.applications.vgg16 import VGG16
model = VGG16()
print(model.summary())

In [None]:
import requests
image_url = "https://unsplash.com/photos/u_kMWN-BWyU/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjYyODMwNjAy&force=true"
img_data = requests.get(image_url).content
with open('satyabratasm-u_kMWN-BWyU-unsplash.jpg', 'wb') as handler:
    handler.write(img_data)

In [None]:
from keras.preprocessing.image import load_img
# load an image from path
path = 'satyabratasm-u_kMWN-BWyU-unsplash.jpg'
img = load_img(path, target_size=(224, 224))
from keras.preprocessing.image import img_to_array
# convert the  pixels to a numpy array
img = img_to_array(img)

# reshape data for the pre-trained VGG model
img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))

from keras.applications.vgg16 import preprocess_input
# transform the img for the pre-trained VGG model
img = preprocess_input(img)
# predict the probability for the output classes used in ImageNet
yhat = model.predict(img)
from keras.applications.vgg16 import decode_predictions
# convert the probabilities to discrete class labels
label = decode_predictions(yhat, top = 5)
# Get the most likely output with the highest probability
label = label[0][0]
# Show the predicted class
print('%s (%.2f%%)' % (label[1], label[2]*100))

In [None]:
import tensorflow as tf
from keras.applications.vgg16 import VGG16, preprocess_input
import numpy as np

model = VGG16(weights='imagenet', include_top=False)

image_path = 'satyabratasm-u_kMWN-BWyU-unsplash.jpg'
image = tf.keras.utils.load_img(image_path, target_size=(224, 224))
from keras.preprocessing.image import img_to_array
image_data = img_to_array(image)

image_data = np.expand_dims(image_data, axis=0)
image_data = preprocess_input(image_data)

extracted_features = model.predict(image_data)

print (extracted_features.shape)

#### Fine-Tuning

In [None]:
# begin by unfreezing all layers of the base model
model.trainable = True

#Apart from the 5 last layers, freeze all the other layers
for layer in model.layers[:-5]: 
    layer.trainable = False

# compile and retrain with a very low learning rate
# compile and start training after freezing the layers
learning_rate = 1e-4
low_learning_rate = learning_rate / 100
#recompile the model with the new learning rate
model.compile(loss = 'binary_crossentropy',
              optimizer = tf.keras.optimizers.RMSprop(learning_rate = low_learning_rate), 
              metrics = ['acc']
)


#### Transfer learning with image data

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

##### Getting the Dataset

In [None]:
import tensorflow_datasets as tfds

#tfds.disable_progress_bar()

train_data, validation_data, test_data = tfds.load(
    "cats_vs_dogs",
    # Reserve 20% for validation and 10% for test
    split=["train[:40%]", "train[40%:50%]", "train[50%:60%]"],
    as_supervised=True,  # Include labels
)

print("There are %d training samples" % tf.data.experimental.cardinality(train_data))
print(
    "There are %d validation samples" % tf.data.experimental.cardinality(validation_data)
)
print("There are %d test samples" % tf.data.experimental.cardinality(test_data))

In [None]:
#if the link below is broken, go to https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765
#to obtain a new download link
!wget --no-check-certificate \
    "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip"
#remove previous files
!rm -rf PetImages
#unzip the dataset
!unzip -qq kagglecatsanddogs_5340.zip

##### Load dataset

In [None]:
from tensorflow.keras.preprocessing import image_dataset_from_directory
dir = "PetImages/"
data = image_dataset_from_directory(dir,
                                             shuffle=True,
                                             batch_size=32,
                                             image_size=(150, 150))

In [None]:
plt.figure(figsize=(10, 10))
for i, (img, label) in enumerate(train_data.take(4)):
    ax = plt.subplot(2, 2, i + 1)
    plt.imshow(img)
    plt.title(int(label))
    plt.axis("off")
plt.suptitle("Sample images (Cat :0, Dog:1)")
plt.show()

##### Data Preprocessing

In [None]:
size = (150, 150)

train_data = train_data.map(lambda x, y: (tf.image.resize(x, size), y))
validation_data = validation_data.map(lambda x, y: (tf.image.resize(x, size), y))
test_data = test_data.map(lambda x, y: (tf.image.resize(x, size), y))

Batch the data and cache to prevent loading the dataset each time we need it to optimize loading speeds

In [None]:
batch_size = 64

train_data = train_data.cache().batch(batch_size).prefetch(buffer_size=10)
validation_data = validation_data.cache().batch(batch_size).prefetch(buffer_size=10)
test_data = test_data.cache().batch(batch_size).prefetch(buffer_size=10)

We have a small dataset, therefore, it is advisable to initiate sample diversity by applying random but realistic transformations to the training data. , Some of the transformations for image data include:
1. Random horizontal flipping or small random rotations.
2. Gray-scaling
3. Shifts
4. Flips
5. Brightness
6. Zoom

Data augmentation helps to expose the model to different aspects of the training data which helps to prevent overfitting.

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

data_augmentation = keras.Sequential(
    [layers.RandomFlip("horizontal"), #flips images 
     layers.RandomRotation(0.1),#randomly rotates images
     #layers.RandomZoom(.5, .2), #randomly zooms images 
     layers.RandomFlip(
    mode="horizontal_and_vertical", seed=None #randomly flips images
)
]
)

In [None]:
import numpy as np

for images, labels in train_data.take(1):
    plt.figure(figsize=(10, 10))
    first_image = images[7]
    for i in range(4):
        ax = plt.subplot(2, 2, i + 1)
        augmented_image = data_augmentation(
            tf.expand_dims(first_image, 0), training=True
        )
        plt.imshow(augmented_image[0].numpy().astype("int32"))
        plt.axis("off")
plt.suptitle("Sample preprocessed image")
plt.show();

##### Create a base model from the pre-trained Inception model

In [None]:
base_model = keras.applications.InceptionV3(
    weights="imagenet",  # Load weights pre-trained on ImageNet.
    input_shape=(150, 150, 3),
    include_top=False,  #Exclude ImageNet classifier at the top
)

# Freeze the base_model
base_model.trainable = False

##### Create the final dense layer

In [None]:

# Create new model on top
#standardize the input
inputs = keras.Input(shape=(150, 150, 3))
x = data_augmentation(inputs)  # Apply random data augmentation

# Pre-trained Inception weights requires that input be scaled
# from (0, 255) to a range of (-1., +1.), the rescaling layer
#rescale
scale_layer = keras.layers.Rescaling(scale=1 / 127.5, offset=-1)
x = scale_layer(x)
x = base_model(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.2)(x)  # Regularize with dropout
outputs = keras.layers.Dense(1)(x)
model = keras.Model(inputs, outputs)

model.summary()

##### Train the model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
!rm -rf image_logs
%load_ext tensorboard
log_folder = 'image_logs'
callbacks = [
            EarlyStopping(patience = 3),
            TensorBoard(log_dir=log_folder)
            ]

#compile the model to 
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=keras.metrics.BinaryAccuracy())
hist = model.fit(train_data,
                 epochs=5, 
                 validation_data = validation_data, callbacks = callbacks)

In [None]:
#evaluate performance on test data
loss, accuracy = model.evaluate(test_data)
print("Model accuracy:", round(accuracy, 4)*100)
print("Model loss:", round(loss, 4))

In [None]:
%reload_ext tensorboard
%tensorboard --logdir {'image_logs/'}

#### Fine-tuning the model

In [None]:
#unfreeze the base model
base_model.trainable = False
#Apart from the 10 last layers, freeze all the other layers
for layer in model.layers[:-10]: 
    layer.trainable = True
model.summary()
#define the learning rate
learning_rate = 1e-5
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate),  # Low learning rate
    loss=keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=[keras.metrics.BinaryAccuracy()],
)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
!rm -rf fine_tune_logs
%load_ext tensorboard
log_folder = 'fine_tune_logs'
callbacks = [
            EarlyStopping(patience = 5),
            TensorBoard(log_dir=log_folder)
            ]
epochs = 5
hist1 = model.fit(train_data,
          epochs=epochs,
          validation_data=validation_data,callbacks=callbacks)

In [None]:
#evaluate performance on test data
loss, accuracy = model.evaluate(test_data)
print("Fine-tuned model accuracy:", round(accuracy, 4)*100)
print("Fine-tuned model loss:", round(loss, 4))

In [None]:
%reload_ext tensorboard
%tensorboard --logdir {'fine_tune_logs/'}

#### Example of transfer learning with natural language processing

#### Pretrained word embeddings

In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout, SpatialDropout1D, GlobalAveragePooling1D
from tensorflow.keras.models import Sequential
import numpy as np
from sklearn.model_selection import train_test_split
import re
from tensorflow.keras.utils import to_categorical

In [None]:
!wget !wget https://archive.ics.uci.edu/ml/machine-learning-databases/00462/drugsCom_raw.zip
!unzip drugsCom_raw.zip

In [None]:
#read the data
df = pd.read_csv('drugsComTrain_raw.tsv', sep='\t')
#create sentiment column
df['category'] = [1 if int(x)>5 else 0 for x in df['rating']]
#get relevant variables
df = df[['review', 'category']].copy()
df.head()

##### Data Pre-processing

###### Tokenizing the words

In [None]:
import tensorflow as tf
max_features = 10000  # Maximum vocabulary size.
max_len = 100 # Sequence length to pad the outputs to.
vectorize_layer = tf.keras.layers.TextVectorization(standardize='lower_and_strip_punctuation',max_tokens=max_features,output_mode='int',output_sequence_length=max_len)
vectorize_layer.adapt(list((df['review'].values)),batch_size=None)

In [None]:
#split the data into train and test sets
from sklearn.model_selection import train_test_split
X_t = list((df['review'].values))
y = to_categorical(df['category'])
X_train, X_test , y_train, y_test = train_test_split(X_t, y , test_size = 0.30)
#apply cetorization layer to train and test
X_train =  vectorize_layer(X_train)
X_test =  vectorize_layer(X_test)

In [None]:
df['words in sentence'] = [len(item.split()) for item in df.review]
df.head()

##### Using GloVe Embeddings

In [None]:
#download glove embeddings
# download glove and unzip it in Notebook.
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove*.zip

In [None]:
#load your embeddings
embeddings_index = {}
emb = open('glove.6B.100d.txt')
for sentence in emb:
    values = sentence.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
emb.close()

print('There are %s word vectors.' % len(embeddings_index))

In [None]:
#get vocabulary
voc = vectorize_layer.get_vocabulary()
#create a word index
word_index = dict(zip(voc, range(len(voc))))

In [None]:
word_index

##### Create embedding matrix

In [None]:
num_tokens = len(voc) + 2
embedding_dim = 100
hits = 0
misses = 0

# Prepare embedding matrix
embedding_matrix = np.zeros((num_tokens, embedding_dim))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # Words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector
        hits += 1
    else:
        misses += 1

In [None]:
embedding_matrix

##### Create the embedding layer

In [None]:
from tensorflow.keras.layers import Embedding
from tensorflow import keras

embedding_layer = Embedding(
    input_dim = num_tokens,
    output_dim = embedding_dim,
    embeddings_initializer=keras.initializers.Constant(embedding_matrix),
    trainable=False,
)

#### Create the model

In [None]:
# define model
from tensorflow.keras.layers import Flatten
model = Sequential()
vocab_size = 10002
#use the embedding_matrix
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=100, trainable=False)
model.add(e)
model.add(Bidirectional(LSTM(10, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)))
model.add(Flatten())
model.add(Dense(2, activation='sigmoid'))
# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# summarize the model
print(model.summary())

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

In [None]:
%load_ext tensorboard
!rm -rf embed_logs
log_folder = 'embed_logs'
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
#apply callbacks
callbacks = [
            EarlyStopping(patience = 3),
            TensorBoard(log_dir=log_folder)
            ]
#compile
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
num_epochs = 10
history = model.fit(X_train, y_train, epochs=num_epochs, validation_data=(X_test, y_test),callbacks=callbacks, batch_size = 2560)

10/45 [=====>........................] - ETA: 31s - loss: 0.5131 - accuracy: 0.7562

In [None]:
loss, accuracy = model.evaluate(X_test,y_test)
print('Test accuracy :', round(accuracy, 4))
print("Test Loss:", round(loss, 4))

In [None]:
%reload_ext tensorboard
%tensorboard --logdir {'embed_logs/'}

## Where to go from here
Follow us on [LinkedIn](https://www.linkedin.com/company/mlnuggets), [Twitter](https://twitter.com/ml_nuggets), [GitHub](https://github.com/mlnuggets) and subscribe to our [blog](https://www.machinelearningnuggets.com/#/portal) so that you don't miss a new issue.