# TensorFlow baseline Implementation

In this notebook I have given a basline approach to this problem using Tensorflow. But that's not all.
I have written the approach whilst following **Object Oriented Approach** and a few of **design patterns** as well. I have made sure not to use any procedural code.

I am sure almost everyone is aware of the benfits of writing code using Object oriented paradigm instead of procedural paradigm. I highly recommend everyone to go through this notebook as I am sure, it would be *great learning experience* for everyone (I say that because it was surely for me witing this notebook), more so for beginners in Data Science and object oriented design patterns.

**Please remember to upvote the notebook if you like the content. It is always a great motivator to write such notebook in the future ;) !!**

I have taken a lot of help from Notebook [**here**](https://www.kaggle.com/ammarali32/molecular-translation-simple-training-starter/notebook).

I would very much appreciate if anyone has any **suggestions** on how I can **improve** this a bit further.

Thanks in Advance!!

**Note**: The class names used here in the code are very intuitive and easy to understand, just go though once or twice. If you still don't understand, do let me know in the comments.

In [None]:
import numpy as np
import pandas as pd

import cv2

import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import collections
import random
import re
import numpy as np
import os
import time
import json
from glob import glob
from PIL import Image
import pickle

In [None]:
class CFG:
    
    EPOCHS = 8
    top_k = 5000
    BATCH_SIZE = 64
    BUFFER_SIZE = 1000
    embedding_dim = 256
    units = 512
    vocab_size = top_k + 1
    num_steps = 75
    features_shape = 2048
    attention_features_shape = 64
    TRAIN_LABELS_PATH = "../input/bms-molecular-translation/train_labels.csv"
    Data_path = '../input/bms-molecular-translation/train/0/0/0/'
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=top_k,
                                                  oov_token="<unk>",
                                                  filters='!"#$%&*+.-;?@[]^`{}~ ')
    checkpoint_path = "./checkpoints/train"

In [None]:
class Attention(tf.keras.Model):
    
    def __init__(self, units):
    
        super(Attention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, features, hidden):
        
        hidden_with_time_axis = tf.expand_dims(hidden, 1)
        attention_hidden_layer = (tf.nn.tanh(self.W1(features) +
                                             self.W2(hidden_with_time_axis)))
        score = self.V(attention_hidden_layer)
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * features
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, attention_weights

In [None]:
class CNN_Encoder(tf.keras.Model):
    
    def __init__(self, embedding_dim):
        
        super(CNN_Encoder, self).__init__()
        self.fc = tf.keras.layers.Dense(embedding_dim)
    
    def call(self, x):
        
        x = self.fc(x)
        x = tf.nn.relu(x)
        return x

In [None]:
class RNN_Decoder(tf.keras.Model):
  
    def __init__(self, embedding_dim, units, vocab_size):
        
        super(RNN_Decoder, self).__init__()
        self.units = units

        self.embedding = tf.keras.layers.Embedding(vocab_size,embedding_dim)
        self.gru = tf.keras.layers.GRU(self.units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')
        self.fc1 = tf.keras.layers.Dense(self.units)
        self.fc2 = tf.keras.layers.Dense(vocab_size)

        self.attention = Attention(self.units)

    def call(self, x, features, hidden):
    
        context_vector, attention_weights = self.attention(features, hidden)
        x = self.embedding(x)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
        output, state = self.gru(x)
        x = self.fc1(output)
        x = tf.reshape(x, (-1, x.shape[2]))
        x = self.fc2(x)
        return x, state, attention_weights

    def reset_state(self, batch_size):
    
        return tf.zeros((batch_size, self.units))

In [None]:
class getDataframe:
    
    __instance = None
    
    @staticmethod 
    def getInstance():
        """ Static access method. """
        if getDataframe.__instance == None:
            getDataframe(df_train_labels)
        return getDataframe.__instance
    
    def __init__(self):
        
        df_train_labels = pd.read_csv(CFG.TRAIN_LABELS_PATH, index_col=0)
        self.df_train_labels = df_train_labels
        getDataframe.__instance = self

In [None]:
df_train_labels_new = getDataframe().getInstance().df_train_labels
df_train_labels_new

In [None]:
class Visualize:
    
    def __init__(self):
        pass
    
    def visualize_images():
        pass

In [None]:
class Visualize_batch_images(Visualize):
    
    def __init__(self, path, image_ids, labels):
        
        super(Visualize_batch_images, self).__init__()
        self.path = path
        self.image_ids = image_ids
        self.labels = labels
        
    def convert_image_id_2_path(self, image_id: str) -> str:
        return "../input/bms-molecular-translation/train/{}/{}/{}/{}.png".format(
            image_id[0], image_id[1], image_id[2], image_id 
        )
        
    def visualize_images(self):
        
        plt.figure(figsize=(16, 12))
    
        for ind, (image_id, label) in enumerate(zip(self.image_ids, self.labels)):
            plt.subplot(3, 3, ind + 1)
            image = cv2.imread(self.convert_image_id_2_path(image_id))
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            plt.imshow(image)
            plt.title(f"{label[:30]}...", fontsize=10)
            plt.axis("off")

        plt.show()

In [None]:
class Visualize_single_image(Visualize):
    
    def __init__(self, path, image_id, label):
        
        super(Visualize_single_image, self).__init__()
        self.path = path
        self.image_id = image_id
        self.label = label
        
    def convert_image_id_2_path(self, image_id: str) -> str:
        return "../input/bms-molecular-translation/train/{}/{}/{}/{}.png".format(
            self.image_id[0], self.image_id[1], self.image_id[2], self.image_id 
        )
        
    def visualize_images(self):
        
        plt.figure(figsize=(10, 8))
            
        image = cv2.imread(self.convert_image_id_2_path(self.image_id))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        plt.imshow(image)
        plt.title(f"{self.label}", fontsize=14)
        plt.axis("off")

        plt.show()

In [None]:
# Illustrating the working of Visualizing class
sample_row = getDataframe().getInstance().df_train_labels.sample(5)

for i in range(5):
    
    viz_object = Visualize_single_image('', sample_row.index[i], sample_row["InChI"][i])
    viz_object.visualize_images()

In [None]:
# Illustrating the working of Batch Visualizing class
sample_row = getDataframe().getInstance().df_train_labels.sample(5)

img_ids = [i for i in sample_row.index]
labels = [i for i in sample_row["InChI"]]

for i in range(5):
    
    viz_object = Visualize_batch_images('', img_ids, labels)
    viz_object.visualize_images()

In [None]:
class Helper:
    
    def load_image(image_path):
    
        img = tf.io.read_file(image_path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, (224, 224))
        img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
        return img, image_path

    def map_func(img_name, cap):
        img_tensor = np.load('features/' + img_name.decode('utf-8')[len(CFG.Data_path):-4]+'.npy')
        return img_tensor, cap
    
    def convert_image_id_2_path(image_id: str) -> str:
        return "../input/bms-molecular-translation/train/{}/{}/{}/{}.png".format(
            image_id[0], image_id[1], image_id[2], image_id 
        )
    
    def Train_image_paths(populateImageCaption_Path):
        
        image_paths = list(populateImageCaption_Path.keys())
        #random.shuffle(image_paths)
        # Let us take just first 6000 images for training now 
        train_image_paths = image_paths[:5500]
        
        return train_image_paths
    
    def Image_name_vector(train_image_paths):
        
        train_captions = []
        img_name_vector = []
        
        image_path_to_caption = populateImageCaptionPath(getDataframe().getInstance().df_train_labels).populate()
        
        for image_path in train_image_paths:
            caption_list = image_path_to_caption[image_path]
            train_captions.extend(caption_list)
            img_name_vector.extend([image_path] * len(caption_list))
            
        return train_captions, img_name_vector
    
    def calc_max_length(tensor):
        return max(len(t) for t in tensor)
    
    def create_cap_val(df_train_labels):
        
        img_to_cap_vector = collections.defaultdict(list)
        
        image_path_to_caption = populateImageCaptionPath(df_train_labels).populate()
        train_image_paths = Helper.Train_image_paths(image_path_to_caption)
        img_name_vector = Helper.Image_name_vector(train_image_paths)[1]
        
        cap_vector = populateCapVector(df_train_labels).populate()
        
        for img, cap in zip(img_name_vector, cap_vector):
            img_to_cap_vector[img].append(cap)

        # Create training and validation sets
        img_keys = list(img_to_cap_vector.keys())
        random.shuffle(img_keys)

        slice_index = int(len(img_keys)*0.8)
        img_name_train_keys, img_name_val_keys = img_keys[:slice_index], img_keys[slice_index:]

        img_name_val = []
        cap_val = []
        for imgv in img_name_val_keys:
            capv_len = len(img_to_cap_vector[imgv])
            img_name_val.extend([imgv] * capv_len)
            cap_val.extend(img_to_cap_vector[imgv])

            
        return cap_val, img_name_val
    
    def create_cap_train(df_train_labels):
        
        img_to_cap_vector = collections.defaultdict(list)
        
        image_path_to_caption = populateImageCaptionPath(df_train_labels).populate()
        train_image_paths = Helper.Train_image_paths(image_path_to_caption)
        train_captions, img_name_vector = Helper.Image_name_vector(train_image_paths)
        
        cap_vector = populateCapVector(df_train_labels, image_path_to_caption, train_captions, train_image_paths).populate()
        
        for img, cap in zip(img_name_vector, cap_vector):
            img_to_cap_vector[img].append(cap)

        # print("img_to_cap_vector: ", len(img_to_cap_vector))
        # print("img_name_vector: ", len(img_name_vector))
        # print("cap_vector: ", len(cap_vector))
        # Create training and validation sets
        img_keys = list(img_to_cap_vector.keys())
        #random.shuffle(img_keys)

        slice_index = int(len(img_keys)*0.8)
        img_name_train_keys, img_name_val_keys = img_keys[:slice_index], img_keys[slice_index:]

        img_name_train = []
        cap_train = []
        for imgt in img_name_train_keys:
            capt_len = len(img_to_cap_vector[imgt])
            img_name_train.extend([imgt] * capt_len)
            cap_train.extend(img_to_cap_vector[imgt])
            
        return cap_train, img_name_train


In [None]:
class Load:
    
    def __init__(self):
        pass
        
    def load(self):
        pass

In [None]:
class LoadImage(Load):
    
    def __init__(self, path):
        super(LoadImage, self).__init__()
        self.path = path
        
    def load(self, path):
        
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, (224, 224))
        img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
        
        return img, path

In [None]:
class LoadModel(Load):
        
    def load(self):
        
        image_model = tf.keras.applications.MobileNetV2(include_top=False,
                                                weights='imagenet')
        
        return image_model

In [None]:
class populateData:
    
    def __init__(self):
        pass
    
    def populate():
        pass

In [None]:
class populateImageCaptionPath(populateData):
    
    def __init__(self, df_train_labels):
        
        self.df_train_labels = df_train_labels
        
    def populate(self):
        
        image_path_to_caption = collections.defaultdict(list)
        
        for idx,path in enumerate(self.df_train_labels.index):
            caption = self.df_train_labels['InChI'].iloc[idx]
            image_path = Helper.convert_image_id_2_path(path)
            image_path_to_caption[image_path].append(caption)
        
        return image_path_to_caption

In [None]:
class populateCapVector(populateData):
    
    def __init__(self, df_train_labels, image_path_to_caption, train_captions, train_image_paths):
        
        self.tokenizer = CFG.tokenizer
        self.df_train_labels = df_train_labels
        self.image_path_to_caption = image_path_to_caption
        self.train_image_paths = train_image_paths
        self.train_captions = train_captions
        

    def populate(self):
        
        self.tokenizer.fit_on_texts(self.train_captions)
        self.tokenizer.word_index['<pad>'] = 0
        self.tokenizer.index_word[0] = '<pad>'
        
        # Create the tokenized vectors
        train_seqs = self.tokenizer.texts_to_sequences(self.train_captions)
        
        # Pad each vector to the max_length of InChI
        cap_vector = tf.keras.preprocessing.sequence.pad_sequences(train_seqs, padding='post')
        
        max_length = Helper.calc_max_length(train_seqs)
        
        return cap_vector

In [None]:
!mkdir features/

In [None]:
class extractFeatures:
    
    def __init__(self, df_train_labels):
        pass
        
    def extract(self):
        pass

In [None]:
class extractFeatures_images(extractFeatures):
    
    def __init__(self, df_train_labels):
        self.df_train_labels = df_train_labels
    
    def extract(self):
        
        image_model_object = LoadModel()
        image_model = image_model_object.load()
        new_input = image_model.input
        hidden_layer = image_model.layers[-1].output

        image_features_extract_model = tf.keras.Model(new_input, hidden_layer)
        
        # Extract features
        image_path_to_caption = populateImageCaptionPath(self.df_train_labels).populate()
        train_image_paths = Helper.Train_image_paths(image_path_to_caption)
        encode_train = sorted(set(Helper.Image_name_vector(train_image_paths)[1]))
        
        image_dataset = tf.data.Dataset.from_tensor_slices(encode_train)
        image_dataset = image_dataset.map(Helper.load_image, num_parallel_calls=tf.data.AUTOTUNE).batch(16)
        
        for img, path in image_dataset:
            
            batch_features = image_features_extract_model(img)
            batch_features = tf.reshape(batch_features,
                                      (batch_features.shape[0], -1, batch_features.shape[3]))

            for bf, p in zip(batch_features, path):
                
                path_of_feature ='features/'+ p.numpy().decode("utf-8")[len(CFG.Data_path):-4]
                #print(path_of_feature)
                np.save(path_of_feature, bf.numpy())

In [None]:
extract_obj = extractFeatures_images(getDataframe().getInstance().df_train_labels)
extract_obj.extract()

In [None]:
class populateDataset(populateData):
    
    def __init__(self, df_train_labels):
        self.df_train_labels = df_train_labels
        self.cap_train, self.img_name_train = Helper.create_cap_train(self.df_train_labels)
    
    
    def map_funct(self, img_name, cap):
        img_tensor = np.load('features/' + img_name.decode('utf-8')[len(CFG.Data_path):-4]+'.npy')
        return img_tensor, cap
    
    def populate(self):
        
        dataset = tf.data.Dataset.from_tensor_slices((self.img_name_train, self.cap_train))
        dataset = dataset.map(lambda item1, item2: tf.numpy_function(
                  self.map_funct, [item1, item2], [tf.float32, tf.int32]),
                  num_parallel_calls=tf.data.AUTOTUNE)
        dataset = dataset.shuffle(CFG.BUFFER_SIZE).batch(CFG.BATCH_SIZE)
        dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
        
        return dataset

In [None]:
!ls features/ | wc -l

In [None]:
class checkpoint:
    
    def __init__(self):
        
        self.encoder = CNN_Encoder(CFG.embedding_dim)
        self.decoder = RNN_Decoder(CFG.embedding_dim, CFG.units, CFG.vocab_size)
        self.optimizer = tf.keras.optimizers.Adam()
    
    def return_ckpt_manager(self):
        checkpoint_path = CFG.checkpoint_path
        ckpt = tf.train.Checkpoint(encoder = self.encoder,
                               decoder = self.decoder,
                               optimizer = self.optimizer)

        ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)
        
        return ckpt_manager

In [None]:
class Training:
    
    def __init__(self, df_train_labels):
        
        self.encoder = CNN_Encoder(CFG.embedding_dim)
        self.decoder = RNN_Decoder(CFG.embedding_dim, CFG.units, CFG.vocab_size)
        self.optimizer = tf.keras.optimizers.Adam()
        self.start_epoch = int()
        self.ckpt_manager = checkpoint().return_ckpt_manager()
        self.df_train_labels = df_train_labels
        
    def loss_function(self, real, pred):
        
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
                      from_logits=True, reduction='none'
        )
        
        mask = tf.math.logical_not(tf.math.equal(real, 0))
        loss_ = loss_object(real, pred)

        mask = tf.cast(mask, dtype=loss_.dtype)
        loss_ *= mask

        return tf.reduce_mean(loss_)
    
    def checkpoint(self):
        
        checkpoint_path = CFG.checkpoint_path
        ckpt = tf.train.Checkpoint(encoder = self.encoder,
                           decoder = self.decoder,
                           optimizer = self.optimizer)
        
        ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)
        
        loss_plot = []
        start_epoch = 0
        if ckpt_manager.latest_checkpoint:
            self.start_epoch = int(ckpt_manager.latest_checkpoint.split('-')[-1])
            ckpt.restore(ckpt_manager.latest_checkpoint)
        
    @tf.function
    def train_step(self, img_tensor, target):
        
        #self.checkpoint()
        loss = 0
        hidden = self.decoder.reset_state(batch_size=target.shape[0])
        dec_input = tf.expand_dims([CFG.tokenizer.word_index['<unk>']] * target.shape[0], 1)
        
        with tf.GradientTape() as tape:
            features = self.encoder(img_tensor)
            for i in range(1, target.shape[1]):
                predictions, hidden, _ = self.decoder(dec_input, features, hidden)
                loss += self.loss_function(target[:, i], predictions)
                dec_input = tf.expand_dims(target[:, i], 1)
        
        total_loss = (loss / int(target.shape[1]))
        trainable_variables = self.encoder.trainable_variables + self.decoder.trainable_variables
        gradients = tape.gradient(loss, trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, trainable_variables))
        
        return loss, total_loss

    
    def train(self):
        
        loss_plot = []
        for epoch in range(self.start_epoch, CFG.EPOCHS):
            start = time.time()
            total_loss = 0
            dataset = populateDataset(getDataframe().getInstance().df_train_labels).populate()

            for (batch, (img_tensor, target)) in enumerate(dataset):

                batch_loss, t_loss = self.train_step(img_tensor, target)
                total_loss += t_loss
                if batch % 100 == 0:
                    print ('Epoch {} Batch {} Loss {:.4f}'.format(
                      epoch + 1, batch, batch_loss.numpy() / int(target.shape[1])))

            # Let us save the loss info to visualize
            loss_plot.append(total_loss / CFG.num_steps)
            if epoch % 5 == 0:
                self.ckpt_manager.save()
            print ('Epoch {} Loss {:.6f}'.format(epoch + 1, total_loss/CFG.num_steps))
            print ('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

In [None]:
train_obj = Training(getDataframe().getInstance().df_train_labels)
train_obj.train()

**This is still A Work in Progress and I will keep making changes to it.** 

**So please do let me know in the comments if there's any way I can improve the code.**