<a href="https://colab.research.google.com/github/osjayaprakash/deeplearning/blob/main/CS_230_Full_TF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

python3 -m venv ~/cs230

source ~/cs230/bin/activate

pip3 install kagglehub kaggle tensorflow tensorflow-macos tensorflow-metal

brew install hdf5


## AMAZON AWS


```
sudo apt update
sudo apt install nvidia-driver-535
reboot  # Restart the system after installation

nvidia-smi

python3
import tensorflow as tf
tf.sysconfig.get_build_info()

python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices());"
```




In [None]:
import kagglehub

# Download latest version
root_dir = kagglehub.dataset_download("shahrukhkhan/im2latex100k")
# path = kagglehub.dataset_download("gregoryeritsyan/im2latex-230k")

print("Path to dataset files:", root_dir)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Flatten,
                                     Dense, GRU, Embedding, Bidirectional,
                                     TimeDistributed, Concatenate, RepeatVector, LSTM)
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import matplotlib.pyplot as plt
import platform
import sys
import pandas as pd
import sklearn as sk
import scipy as sp

tf.config.experimental.list_physical_devices('GPU')
print(f"Python Platform: {platform.platform()}")
print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tf.keras.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
print(f"SciPy {sp.__version__}")
print(tf.config.list_physical_devices())

In [None]:
# Initialize Tokenizer (Configure it with LaTeX vocabulary)
vocab_size = None  # Adjust based on your dataset
max_seq_length = None  # Max length of output sequence

tokenizer = Tokenizer(num_words=vocab_size, filters='', lower=False)
def fit_tokenizer(texts):
    """Fit the tokenizer on the LaTeX text corpus."""
    tokenizer.fit_on_texts(texts)
    vocab_size = len(tokenizer.word_index) + 1
    max_seq_length = max(len(seq) for seq in tokenizer.texts_to_sequences(texts))
    print(f"Vocabulary size: {vocab_size}, Max sequence length: {max_seq_length}")
    return vocab_size, max_seq_length

def text_to_sequence(text):
    """Convert LaTeX text to a sequence of tokens."""
    return tokenizer.texts_to_sequences([text])[0]

def sequence_to_text(sequence):
    """Convert token sequence back to LaTeX text."""
    return tokenizer.sequences_to_texts([sequence])[0]

# Preprocess

In [None]:
def preprocess_image(image):
    """Preprocess the input image: Resize and normalize."""
    image = tf.image.resize(image, (50, 200))  # Resize to (50, 200)
    image = image / 255.0  # Normalize to [0, 1]
    return image

def load_and_preprocess_images(image_paths):
    """Load and preprocess a batch of images."""
    # Use Gray scale
    images = [preprocess_image(tf.io.decode_image(tf.io.read_file(path), channels=1))
              for path in image_paths]
    return tf.stack(images)

def prepare_sequences(latex_texts, max_seq_length):
    """Convert LaTeX texts to padded sequences of tokens."""
    sequences = [text_to_sequence(text) for text in latex_texts]
    return pad_sequences(sequences, maxlen=max_seq_length, padding='post')


In [None]:
import pandas as pd

df = pd.read_csv(f"{root_dir}/im2latex_train.csv", nrows=100000)

train_image_paths = []
train_latex_texts = []

for index, row in df.iterrows():
    train_image_paths += [f"{root_dir}//formula_images_processed/formula_images_processed/{row.image}"]
    train_latex_texts += ["<START> " + row.formula + " <END>"]

# Enable Numpy behaviour of TF
tf.experimental.numpy.experimental_enable_numpy_behavior()

vocab_size, max_seq_length = fit_tokenizer(train_latex_texts)

train_images = load_and_preprocess_images(train_image_paths)
train_sequences = prepare_sequences(train_latex_texts, max_seq_length)
train_sequences = np.expand_dims(train_sequences, -1)
print("train_images:", train_images.shape)
print("train_sequences:", train_sequences.shape)

In [None]:
print(train_sequences.shape, train_images.shape)
print(type(train_sequences))
print(train_sequences[0].reshape(1,-1).tolist()[0])
print(sequence_to_text(train_sequences[0].reshape(1,-1).tolist()[0]))

In [None]:
print(train_image_paths[0])
!ls -lart "{train_image_paths[0]}"
print(train_latex_texts[0])

## Model

In [None]:
IMG_WIDTH, IMG_HEIGHT = 200, 50
EMBEDDING_DIM = 256

from tensorflow import keras
from tensorflow.keras.layers import Layer

def create_cnn_encoder():
    """Create the CNN feature extractor."""
    inputs = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 1))  # Grayscale input
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    cnn_output = Dense(EMBEDDING_DIM, activation='relu')(x)
    return Model(inputs, cnn_output)

class MyLayer(Layer):
    def call(self, x):
        notq = tf.math.not_equal(x,0)
        return tf.cast(notq, dtype=tf.float32)


def create_model():
    """Create the encoder-decoder model."""
    cnn_encoder = create_cnn_encoder()

    # Encoder
    image_input = cnn_encoder.input
    image_output = cnn_encoder.output
    encoder_output = RepeatVector(max_seq_length, name='enc_output')(image_output)

    # Decoder
    text_input = Input(shape=(max_seq_length,), name="text_input")
    #mask = MyLayer()(text_input)
    text_embedding = Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_seq_length, mask_zero=False)(text_input)
    #text_embedding = tf.keras.layers.Multiply()([text_embedding, mask[:, :, tf.newaxis]])

    merged_input = tf.keras.layers.Concatenate()([encoder_output, text_embedding])

    lstm_output = LSTM(vocab_size, return_sequences=True)(merged_input)
    output = TimeDistributed(Dense(vocab_size, activation='softmax'))(lstm_output)

    # Full Model
    model = Model([image_input, text_input], output)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model()
model.summary()

In [None]:
#dot_img_file =
import keras
keras.utils.plot_model(model,
                       show_shapes=True,
                       show_dtype=True,
                       show_layer_names=True,
                       expand_nested=True,
                       show_layer_activations=True,
                       )

In [None]:
model.fit([train_images, train_sequences], train_sequences,
          epochs=5, batch_size=32, validation_split=0.2)

# Predict

In [None]:
def predict_latex(image, model):
    """Generate LaTeX code from a single input image."""
    image = preprocess_image(image)
    image = tf.expand_dims(image, axis=0)  # Add batch dimension

    # Initialize decoder input with <START> token
    decoder_input = tf.zeros((1, max_seq_length), dtype=tf.int32)
    # tf.print(decoder_input)
    # tf.print(decoder_input.shape)
    decoder_input = tf.tensor_scatter_nd_update(decoder_input, [[0, 0]], [tokenizer.word_index['<START>']])


    predicted_sequence = []

    for i in range(1, 100):
        predictions = model([image, decoder_input], training=False)
        # tf.print("predicted_sequence:", predicted_sequence)
        # tf.print("decoder_input:", decoder_input)
        # tf.print("predictions:", predictions)
        # tf.print("predictions:", predictions.shape)
        next_token = tf.argmax(predictions[0, i-1, 0:], axis=-1).numpy()
        _, next_token = max((x,i) for i,x in enumerate(predictions[0, i-1, 0:]) if i != tokenizer.word_index['<START>'])

        if next_token == tokenizer.word_index['<END>']:
           break

        predicted_sequence.append(next_token)
        decoder_input = tf.tensor_scatter_nd_update(decoder_input, [[0, i]], [next_token])

    print("Predicted Seq:", predicted_sequence)
    return sequence_to_text(predicted_sequence)

# Example usage: Predict LaTeX from an image
print(type(train_images))
#test_image = tf.random.uniform((50, 200, 1))  # Replace with your test image
predicted_latex = predict_latex(train_images[1], model)
print("Predicted LaTeX:", predicted_latex)
print("Original Seq:", train_sequences[0])
print("Original Seq:", train_latex_texts[0])
