In [2]:
import numpy as np
import tensorflow as tf
from transformers import DistilBertTokenizer
from sklearn.model_selection import train_test_split
import pandas as pd

In [3]:
# Step 1: Load and Preprocess Data
data_path = '/content/drive/MyDrive/Major_Project_Final-Yr/trainLatex.txt'
image_folder = '/content/drive/MyDrive/Major_Project_Final-Yr/off_image_train'

In [4]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls

drive  sample_data


In [5]:
import os

In [6]:

# Load label data from text file into a DataFrame
df = pd.read_csv(data_path, sep='\t', names=['Image', 'Label'])


In [7]:
# Concatenate '_0.bmp' to the Image column
df['Image'] = df['Image'] + '_0'

# Display the DataFrame
print(df)

                       Image  \
0                   2_em_3_0   
1                   2_em_7_0   
2                  3_em_11_0   
3                  3_em_18_0   
4                  4_em_22_0   
...                      ...   
8830  TrainData2_26_sub_71_0   
8831  TrainData2_26_sub_73_0   
8832  TrainData2_26_sub_88_0   
8833  TrainData2_26_sub_95_0   
8834  TrainData2_26_sub_98_0   

                                                  Label  
0                                               \{ T \}  
1                                               \{ u \}  
2                                       \{ T _ { N } \}  
3                                       \{ I _ { k } \}  
4                                      \{ I , \sigma \}  
...                                                 ...  
8830      \frac { \sin B + \sin C } { \cos B + \cos C }  
8831  \alpha _ { n + 1 } - 3 \beta = \frac { 2 } { 3...  
8832                         3 0 \times 2 9 x ^ { 2 8 }  
8833  \sqrt { 1 + \sqrt { 2 + \sqrt

In [9]:
df.head(20)

Unnamed: 0,Image,Label
0,2_em_3_0,\{ T \}
1,2_em_7_0,\{ u \}
2,3_em_11_0,\{ T _ { N } \}
3,3_em_18_0,\{ I _ { k } \}
4,4_em_22_0,"\{ I , \sigma \}"
5,4_em_26_0,G \neq \{ e \}
6,4_em_27_0,"\{ \sigma , F \}"
7,5_em_30_0,\{ c _ { s } \}
8,7_em_59_0,\forall g \in G
9,8_em_62_0,\sigma \in G


No charts were generated by quickchart


In [10]:
print(f"Image Title: {df['Image'][0]}.bmp and Latex Code is: {df['Label'][0]}")

Image Title: 2_em_3_0.bmp and Latex Code is: \{ T \}


In [11]:
image_title = '4_em_22_0'

# Use loc to locate the row with the specified image title and retrieve the label
label = df.loc[df['Image'] == image_title, 'Label'].values[0]

print(f"Image Title: {image_title} and Latex Code is: {label}")


Image Title: 4_em_22_0 and Latex Code is: \{ I , \sigma \}


In [12]:
# Assuming df is your DataFrame with 'Image' and 'Label' columns
image_label_dict = dict(zip(df['Image'], df['Label']))

# Replace 'your_image_title.jpg' with the actual image title you're looking for
image_title = '4_em_22_0'

# Access the label using the image title
if image_title in image_label_dict:
    label = image_label_dict[image_title]
    print(f"Image Title: {image_title} and Latex Code is: {label}")
else:
    print(f"Image Title '{image_title}' not found in the DataFrame.")


Image Title: 4_em_22_0 and Latex Code is: \{ I , \sigma \}


In [None]:
# Initialize lists to store preprocessed data
images = []
labels = []

# Load and preprocess images (customize based on your requirements)
for image_filename in df['Image']:
    image_path = os.path.join(image_folder, f'{image_filename}.bmp')
    image = tf.image.decode_bmp(tf.io.read_file(image_path))
    image = tf.image.resize(image, (224, 224))  # Adjust size as needed
    image = tf.image.convert_image_dtype(image, tf.float32)  # Normalize
    images.append(image)


In [None]:
# Tokenize and encode labels
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
labels = tokenizer(df['Label'].tolist(), return_tensors='tf', padding=True, truncation=True)

In [None]:
# # Step 2: Create TensorFlow Dataset ## Error
# X_train, X_valid, y_train, y_valid = train_test_split(images, labels['input_ids'], test_size=0.2)
# # Step 2: Create TensorFlow Dataset
# train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train['input_ids']))
# valid_dataset = tf.data.Dataset.from_tensor_slices((X_valid, y_valid['input_ids']))


In [None]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

# Assuming 'images' is your input data and 'labels' is a dictionary with 'input_ids' as a key
# Split the data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(images, labels['input_ids'], test_size=0.2)

# Step 2: Create TensorFlow Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
valid_dataset = tf.data.Dataset.from_tensor_slices((X_valid, y_valid))

# Optionally, you can shuffle and batch your datasets
# Example:
batch_size = 32
train_dataset = train_dataset.shuffle(buffer_size=len(X_train)).batch(batch_size)
valid_dataset = valid_dataset.batch(batch_size)

# Convert TensorFlow dataset to Pandas DataFrame
def dataset_to_dataframe(dataset):
    data = {'images': [], 'labels': []}
    for images, labels in dataset:
        # Assuming images and labels are NumPy arrays
        data['images'].extend(images.numpy())
        data['labels'].extend(labels.numpy())

    return pd.DataFrame(data)

# Convert train_dataset and valid_dataset to DataFrames
train_dataframe = dataset_to_dataframe(train_dataset)
valid_dataframe = dataset_to_dataframe(valid_dataset)

# Display the first few rows of the DataFrames
print("Train DataFrame:")
print(train_dataframe.head())

print("\nValid DataFrame:")
print(valid_dataframe.head())


In [None]:
# Create TensorFlow Dataset
dataset = tf.data.Dataset.from_tensor_slices((images, labels['input_ids']))

# Split the dataset
train_size = int(0.8 * len(images))
train_dataset = dataset.take(train_size).batch(32)
valid_dataset = dataset.skip(train_size).batch(32)

In [None]:
## How To Acess the Images and their Corresponding Labels

In [None]:
print(images)

In [None]:
from matplotlib import pyplot as plt

In [None]:
# Print values from the dataset
for image, label in dataset.take(2):  # Print values for the first 2 samples
    print("Image Tensor Shape:", image.shape)
    print("Label Tensor Shape:", label.shape)
    plt.imshow(image)
    print(label)
    # print("Label Tensor Values:", label.numpy())
    print("------")

In [None]:
from transformers import TFAutoModel, AutoTokenizer


In [None]:
# Build a simple transformer-based model
transformer_model = TFAutoModel.from_pretrained('bert-base-uncased')
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(224, 224, 3)),  # Image input
    tf.keras.layers.Flatten(),
    transformer_model,
    tf.keras.layers.Dense(units=512, activation='relu'),
    tf.keras.layers.Dense(units=len(tokenizer.get_vocab()), activation='softmax')  # Adjust output units based on your task
])


In [None]:

# Build a simple transformer-based model
transformer_model = TFAutoModel.from_pretrained('bert-base-uncased')

# Define model inputs
image_input = tf.keras.layers.Input(shape=(224, 224, 3), name="image_input")
label_input = tf.keras.layers.Input(shape=(None,), dtype=tf.int32, name="label_input")

# Process image through the transformer model
image_embedding = transformer_model(image_input)["last_hidden_state"][:, 0, :]

# Concatenate image embedding with label input
combined_embedding = tf.keras.layers.Concatenate(axis=1)([image_embedding, transformer_model(label_input)["last_hidden_state"][:, 0, :]])

# Fully connected layers for prediction
dense_layer = tf.keras.layers.Dense(units=512, activation='relu')(combined_embedding)
output_layer = tf.keras.layers.Dense(units=len(tokenizer.get_vocab()), activation='softmax')(dense_layer)


## **Transformer**

In [None]:
import torch
import math
from torch import nn
import torch.nn.functional as F

### Encoder Architecture

In [None]:
def scaled_dot_product(q, k, v, mask=None):
    d_k = q.size()[-1]
    scaled = torch.matmul(q, k.transpose(-1, -2)) / math.sqrt(d_k)
    print(f"scaled.size() : {scaled.size()}")
    if mask is not None:
        print(f"-- ADDING MASK of shape {mask.size()} --")
        # Broadcasting add. So just the last N dimensions need to match
        scaled += mask
    attention = F.softmax(scaled, dim=-1)
    values = torch.matmul(attention, v)
    return values, attention

class MultiHeadAttention(nn.Module):

    def __init__(self, d_model, num_heads):
        super().__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        self.head_dim = d_model // num_heads
        self.qkv_layer = nn.Linear(d_model , 3 * d_model)
        self.linear_layer = nn.Linear(d_model, d_model)

    def forward(self, x, mask=None):
        batch_size, max_sequence_length, d_model = x.size()
        print(f"x.size(): {x.size()}")
        qkv = self.qkv_layer(x)
        print(f"qkv.size(): {qkv.size()}")
        qkv = qkv.reshape(batch_size, max_sequence_length, self.num_heads, 3 * self.head_dim)
        print(f"qkv.size(): {qkv.size()}")
        qkv = qkv.permute(0, 2, 1, 3)
        print(f"qkv.size(): {qkv.size()}")
        q, k, v = qkv.chunk(3, dim=-1)
        print(f"q size: {q.size()}, k size: {k.size()}, v size: {v.size()}, ")
        values, attention = scaled_dot_product(q, k, v, mask)
        print(f"values.size(): {values.size()}, attention.size:{ attention.size()} ")
        values = values.reshape(batch_size, max_sequence_length, self.num_heads * self.head_dim)
        print(f"values.size(): {values.size()}")
        out = self.linear_layer(values)
        print(f"out.size(): {out.size()}")
        return out


class LayerNormalization(nn.Module):
    def __init__(self, parameters_shape, eps=1e-5):
        super().__init__()
        self.parameters_shape=parameters_shape
        self.eps=eps
        self.gamma = nn.Parameter(torch.ones(parameters_shape))
        self.beta =  nn.Parameter(torch.zeros(parameters_shape))

    def forward(self, inputs):
        dims = [-(i + 1) for i in range(len(self.parameters_shape))]
        mean = inputs.mean(dim=dims, keepdim=True)
        print(f"Mean ({mean.size()})")
        var = ((inputs - mean) ** 2).mean(dim=dims, keepdim=True)
        std = (var + self.eps).sqrt()
        print(f"Standard Deviation  ({std.size()})")
        y = (inputs - mean) / std
        print(f"y: {y.size()}")
        out = self.gamma * y  + self.beta
        print(f"self.gamma: {self.gamma.size()}, self.beta: {self.beta.size()}")
        print(f"out: {out.size()}")
        return out


class PositionwiseFeedForward(nn.Module):

    def __init__(self, d_model, hidden, drop_prob=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.linear1 = nn.Linear(d_model, hidden)
        self.linear2 = nn.Linear(hidden, d_model)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=drop_prob)

    def forward(self, x):
        x = self.linear1(x)
        print(f"x after first linear layer: {x.size()}")
        x = self.relu(x)
        print(f"x after activation: {x.size()}")
        x = self.dropout(x)
        print(f"x after dropout: {x.size()}")
        x = self.linear2(x)
        print(f"x after 2nd linear layer: {x.size()}")
        return x


class EncoderLayer(nn.Module):

    def __init__(self, d_model, ffn_hidden, num_heads, drop_prob):
        super(EncoderLayer, self).__init__()
        self.attention = MultiHeadAttention(d_model=d_model, num_heads=num_heads)
        self.norm1 = LayerNormalization(parameters_shape=[d_model])
        self.dropout1 = nn.Dropout(p=drop_prob)
        self.ffn = PositionwiseFeedForward(d_model=d_model, hidden=ffn_hidden, drop_prob=drop_prob)
        self.norm2 = LayerNormalization(parameters_shape=[d_model])
        self.dropout2 = nn.Dropout(p=drop_prob)

    def forward(self, x):
        residual_x = x
        print("------- ATTENTION 1 ------")
        x = self.attention(x, mask=None)
        print("------- DROPOUT 1 ------")
        x = self.dropout1(x)
        print("------- ADD AND LAYER NORMALIZATION 1 ------")
        x = self.norm1(x + residual_x)
        residual_x = x
        print("------- ATTENTION 2 ------")
        x = self.ffn(x)
        print("------- DROPOUT 2 ------")
        x = self.dropout2(x)
        print("------- ADD AND LAYER NORMALIZATION 2 ------")
        x = self.norm2(x + residual_x)
        return x

class Encoder(nn.Module):
    def __init__(self, d_model, ffn_hidden, num_heads, drop_prob, num_layers):
        super().__init__()
        self.layers = nn.Sequential(*[EncoderLayer(d_model, ffn_hidden, num_heads, drop_prob)
                                     for _ in range(num_layers)])

    def forward(self, x):
        x = self.layers(x)
        return x

### Decoder Architecture

In [None]:

class MultiHeadCrossAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super().__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        self.head_dim = d_model // num_heads
        self.kv_layer = nn.Linear(d_model , 2 * d_model)
        self.q_layer = nn.Linear(d_model , d_model)
        self.linear_layer = nn.Linear(d_model, d_model)

    def forward(self, x, y, mask):
        batch_size, sequence_length, d_model = x.size() # in practice, this is the same for both languages...so we can technically combine with normal attention
        kv = self.kv_layer(x)
        q = self.q_layer(y)
        kv = kv.reshape(batch_size, sequence_length, self.num_heads, 2 * self.head_dim)
        q = q.reshape(batch_size, sequence_length, self.num_heads, self.head_dim)
        kv = kv.permute(0, 2, 1, 3)
        q = q.permute(0, 2, 1, 3)
        k, v = kv.chunk(2, dim=-1)
        values, attention = scaled_dot_product(q, k, v, mask) # We don't need the mask for cross attention, removing in outer function!
        values = values.permute(0, 2, 1, 3).reshape(batch_size, sequence_length, d_model)
        out = self.linear_layer(values)
        return out
class DecoderLayer(nn.Module):

    def __init__(self, d_model, ffn_hidden, num_heads, drop_prob):
        super(DecoderLayer, self).__init__()
        self.self_attention = MultiHeadAttention(d_model=d_model, num_heads=num_heads)
        self.norm1 = LayerNormalization(parameters_shape=[d_model])
        self.dropout1 = nn.Dropout(p=drop_prob)
        self.encoder_decoder_attention = MultiHeadCrossAttention(d_model=d_model, num_heads=num_heads)
        self.norm2 = LayerNormalization(parameters_shape=[d_model])
        self.dropout2 = nn.Dropout(p=drop_prob)
        self.ffn = PositionwiseFeedForward(d_model=d_model, hidden=ffn_hidden, drop_prob=drop_prob)
        self.norm3 = LayerNormalization(parameters_shape=[d_model])
        self.dropout3 = nn.Dropout(p=drop_prob)

    def forward(self, x, y, decoder_mask):
        _y = y # 30 x 200 x 512
        print("MASKED SELF ATTENTION")
        y = self.self_attention(y, mask=decoder_mask) # 30 x 200 x 512
        print("DROP OUT 1")
        y = self.dropout1(y) # 30 x 200 x 512
        print("ADD + LAYER NORMALIZATION 1")
        y = self.norm1(y + _y) # 30 x 200 x 512

        _y = y # 30 x 200 x 512
        print("CROSS ATTENTION")
        y = self.encoder_decoder_attention(x, y, mask=None) #30 x 200 x 512
        print("DROP OUT 2")  #30 x 200 x 512
        y = self.dropout2(y)
        print("ADD + LAYER NORMALIZATION 2")
        y = self.norm2(y + _y)  #30 x 200 x 512

        _y = y  #30 x 200 x 512
        print("FEED FORWARD 1")
        y = self.ffn(y) #30 x 200 x 512
        print("DROP OUT 3")
        y = self.dropout3(y) #30 x 200 x 512
        print("ADD + LAYER NORMALIZATION 3")
        y = self.norm3(y + _y) #30 x 200 x 512
        return y #30 x 200 x 512
class SequentialDecoder(nn.Sequential):
    def forward(self, *inputs):
        x, y, mask = inputs
        for module in self._modules.values():
            y = module(x, y, mask) #30 x 200 x 512
        return y

class Decoder(nn.Module):
    def __init__(self, d_model, ffn_hidden, num_heads, drop_prob, num_layers=1):
        super().__init__()
        self.layers = SequentialDecoder(*[DecoderLayer(d_model, ffn_hidden, num_heads, drop_prob)
                                          for _ in range(num_layers)])

    def forward(self, x, y, mask):
        #x : 30 x 200 x 512
        #y : 30 x 200 x 512
        #mask : 200 x 200
        y = self.layers(x, y, mask)
        return y #30 x 200 x 512

In [None]:
class transformer_model(nn.Module):
    def __init__(self, d_model, ffn_hidden, num_heads, drop_prob, num_encoder_layers, num_decoder_layers):
        super(transformer_model, self).__init__()
        self.encoder = Encoder(d_model, ffn_hidden, num_heads, drop_prob, num_layers=num_encoder_layers)
        self.decoder = Decoder(d_model, ffn_hidden, num_heads, drop_prob, num_layers=num_decoder_layers)

    def forward(self, x, y, mask):
        # x: image data, y: label data, mask: mask for decoder self-attention
        encoder_output = self.encoder(x)
        decoder_output = self.decoder(encoder_output, y, mask)
        return decoder_output

In [None]:
d_model = 512  # Adjust according to your model configuration
ffn_hidden = 2048  # Adjust according to your model configuration
num_heads = 8  # Adjust according to your model configuration
drop_prob = 0.1  # Adjust according to your model configuration
num_encoder_layers = 6  # Adjust according to your model configuration
num_decoder_layers = 6  # Adjust according to your model configuration

transformer_model_instance = transformer_model(
    d_model, ffn_hidden, num_heads, drop_prob, num_encoder_layers, num_decoder_layers
)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

# Assuming you have a custom loss function, let's call it 'custom_loss'
# Assuming your images and labels are tensors
# Adjust batch size according to your memory capacity
batch_size = 32

class CustomDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image = torch.tensor(self.images[index], dtype=torch.float32)
        label = torch.tensor(self.labels[index], dtype=torch.long)
        return image, label

train_dataset = CustomDataset(images[:train_size], labels['input_ids'][:train_size])
valid_dataset = CustomDataset(images[train_size:], labels['input_ids'][train_size:])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

# Now, you can use these loaders to train your PyTorch model
# Assuming you have initialized your model as 'transformer_model'
optimizer = optim.Adam(transformer_model_instance.parameters(), lr=0.001)
num_epochs = 10

# Training loop
for epoch in range(num_epochs):
    transformer_model.train()
    total_loss = 0.0

    for batch_images, batch_labels in train_loader:
        # Forward pass
        outputs = transformer_model(batch_images, batch_labels, mask=None)

        # Calculate the loss
        loss = custom_loss(outputs, batch_labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    average_loss = total_loss / len(train_loader)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}')

    # Validation loop
    transformer_model.eval()
    with torch.no_grad():
        total_val_loss = 0.0

        for batch_images, batch_labels in valid_loader:
            # Forward pass
            outputs = transformer_model(batch_images, batch_labels, mask=None)

            # Calculate the loss
            val_loss = custom_loss(outputs, batch_labels)

            total_val_loss += val_loss.item()

        average_val_loss = total_val_loss / len(valid_loader)
        print(f'Validation Loss: {average_val_loss:.4f}')

# After training, you can save your model if needed
torch.save(transformer_model.state_dict(), 'transformer_model.pth')


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# Assuming your Transformer model is already defined, let's call it 'transformer_model'
# Also, assuming you have a custom loss function, let's call it 'custom_loss'
# Define your optimizer, learning rate, and other training configurations
optimizer = optim.Adam(TransformerModel.parameters(), lr=0.001)
num_epochs = 10

# Assuming your DataLoader is named 'train_loader' and 'valid_loader'
# Adjust batch size according to your memory capacity
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

# Training loop
for epoch in range(num_epochs):
    TransformerModel.train()
    total_loss = 0.0

    for batch_images, batch_labels in train_loader:
        # Assuming batch_images is your image data and batch_labels is your corresponding label data
        # Perform any necessary pre-processing on your input data

        # Forward pass
        outputs = TransformerModel(batch_images, batch_labels)

        # Calculate the loss
        loss = custom_loss(outputs, batch_labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    average_loss = total_loss / len(train_loader)

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}')

    # Validation loop
    TransformerModel.eval()
    with torch.no_grad():
        total_val_loss = 0.0

        for batch_images, batch_labels in valid_loader:
            # Assuming batch_images is your image data and batch_labels is your corresponding label data
            # Perform any necessary pre-processing on your input data

            # Forward pass
            outputs = TransformerModel(batch_images, batch_labels)

            # Calculate the loss
            val_loss = custom_loss(outputs, batch_labels)

            total_val_loss += val_loss.item()

        average_val_loss = total_val_loss / len(valid_loader)
        print(f'Validation Loss: {average_val_loss:.4f}')

# After training, you can save your model if needed
torch.save(TransformerModel.state_dict(), 'TransformerModel.pth')
