### Import the necessary libraries

In [92]:
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras import layers, models

### Loading the data

In [93]:
# load the data from the pickle file in the data/ directory
df = pd.read_pickle('data/words_df.pkl')

In [94]:
df.head()

Unnamed: 0,WordID,GrayLevel,BoundingBox,GrammaticalTag,Transcription,ImageData
0,a01-000u-03-00,156,"(430, 1290, 177, 59)",NPTS,M Ps,"[[[0.84313726], [0.79607844], [0.9490196], [0...."
1,a01-000u-03-01,156,"(705, 1296, 431, 54)",NR,tomorrow,"[[[0.9843137], [0.9843137], [0.9843137], [0.98..."
2,a01-000u-03-02,156,"(1154, 1346, 9, 10)",.,.,"[[[0.9843137], [0.9843137], [0.9843137], [0.98..."
3,a01-000u-03-03,156,"(1269, 1292, 320, 68)",NPT,Mr.,"[[[0.9843137], [0.9843137], [0.9843137], [0.98..."
4,a01-000u-03-04,156,"(1570, 1297, 252, 56)",NP,Michael,"[[[0.90588236], [0.7372549], [0.30588236], [0...."


In [95]:
print(df.shape)

(18863, 6)


### Splitting train, validation, and test data

In [96]:
count=0
c2=0
for i, img in enumerate(df['ImageData']):
    try:
        if img.shape == (32, 128, 1):  # Replace with your expected dimensions
            count+=1
    except AttributeError:
        print(f"{df['WordID'][i]} will be deleted")
        df.drop([i], inplace=True)
        c2+=1
print(count)
print(c2)

18863
0


2 images data weren't loaded into the dataframe. Delete those rows.

In [97]:
print(len(df['Transcription'].unique()))

4582


In [98]:
# calculate the longest word
max_len = 0
for word in df['Transcription']:
    if len(word) > max_len:
        max_len = len(word)
print(max_len)

18


In [110]:
# Split data into training and validation sets
X = np.stack(df['ImageData'].values)  # Convert the list of images to a numpy array
y = df['Transcription'].values

In [111]:
# print the top 40 value counts of the array y
print(pd.Series(y).value_counts()[:10])

,      1194
.       890
the     840
"       538
of      451
to      396
and     365
a       314
in      285
'       253
Name: count, dtype: int64


In [112]:
def encode_to_labels(y):
    # create a list of the chartcaters vocabulary ordered alphabetically
    vocabulary = sorted(set(''.join(y)))

    # create a dictionary mapping each character to the vocabulary list index. do not use 0.
    char_to_num = {char:idx+1 for idx, char in enumerate(vocabulary)}

    # transform y values to numerical using the char_to_num dictionary, using 0 for padding. the length of each word is the maximum length of the words in the dataset
    y_encoded = []
    for word in y:
        word_encoded = [char_to_num[char] for char in word]
        y_encoded.append(word_encoded)

    # pad the encoded values (fill with 0s to the right)
    y_encoded = tf.keras.preprocessing.sequence.pad_sequences(y_encoded, padding='post')

    return y_encoded

y = encode_to_labels(y)

In [114]:
print(X.shape)
print(y.shape)

(18863, 32, 128, 1)
(18863, 18)


In [116]:
TRAIN_SPLIT = 0.8
VAL_SPLIT = 0.1
TEST_SPLIT = 0.1

# split the data into training, validation and test sets
X_train = X[:int(X.shape[0]*TRAIN_SPLIT)]
y_train = y[:int(y.shape[0]*TRAIN_SPLIT)]

X_val = X[int(X.shape[0]*TRAIN_SPLIT):int(X.shape[0]*(TRAIN_SPLIT+VAL_SPLIT))]
y_val = y[int(y.shape[0]*TRAIN_SPLIT):int(y.shape[0]*(TRAIN_SPLIT+VAL_SPLIT))]

X_test = X[int(X.shape[0]*(TRAIN_SPLIT+VAL_SPLIT)):]
y_test = y[int(y.shape[0]*(TRAIN_SPLIT+VAL_SPLIT)):]

print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)
print(X_test.shape)
print(y_test.shape)

(15090, 32, 128, 1)
(15090, 18)
(1886, 32, 128, 1)
(1886, 18)
(1887, 32, 128, 1)
(1887, 18)


### Neural Networks

#### Starting simple

In [118]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define a simple CNN model
def create_basic_cnn(input_shape, output_length):
    model = models.Sequential()
    
    # Convolutional layer that extracts features from the input images
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Additional convolutional layer for more complex features
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Flatten the 3D output to 1D
    model.add(layers.Flatten())
    
    # Dense layer for prediction
    model.add(layers.Dense(128, activation='relu'))
    
    # Output layer with 'output_length' units, one for each time step in the output sequence
    model.add(layers.Dense(output_length, activation='softmax'))
    
    return model

# Create the model
input_shape = X_train.shape[1:]  # (32, 128, 1)
output_length = y_train.shape[1]  # 18
model = create_basic_cnn(input_shape, output_length)

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Change if your y is one-hot encoded
              metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 126, 32)       320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 15, 63, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 13, 61, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 6, 30, 64)         0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 11520)             0         
                                                                 
 dense_2 (Dense)             (None, 128)              

In [119]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))


Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/traitlets/config/application.py", line 1043, in launch_instance

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/ipykernel/kernelapp.py", line 725, in start

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/opt/homebrew/Cellar/python@3.11/3.11.6_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py", line 607, in run_forever

  File "/opt/homebrew/Cellar/python@3.11/3.11.6_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once

  File "/opt/homebrew/Cellar/python@3.11/3.11.6_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/events.py", line 80, in _run

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/ipykernel/kernelbase.py", line 502, in process_one

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/ipykernel/kernelbase.py", line 729, in execute_request

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/ipykernel/ipkernel.py", line 422, in do_execute

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/ipykernel/zmqshell.py", line 540, in run_cell

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/IPython/core/interactiveshell.py", line 3009, in run_cell

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/IPython/core/interactiveshell.py", line 3064, in _run_cell

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/IPython/core/interactiveshell.py", line 3269, in run_cell_async

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/IPython/core/interactiveshell.py", line 3448, in run_ast_nodes

  File "/Users/jorgemilhomem/Library/Python/3.11/lib/python/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code

  File "/var/folders/6q/0yfg3pwd67d3xrqsp_16lm680000gn/T/ipykernel_84529/2512682409.py", line 1, in <module>

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/engine/training.py", line 1807, in fit

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/engine/training.py", line 1151, in train_step

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/engine/training.py", line 1209, in compute_loss

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/engine/compile_utils.py", line 277, in __call__

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/losses.py", line 143, in __call__

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/losses.py", line 270, in call

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/losses.py", line 2454, in sparse_categorical_crossentropy

  File "/opt/homebrew/lib/python3.11/site-packages/keras/src/backend.py", line 5775, in sparse_categorical_crossentropy

logits and labels must have the same first dimension, got logits shape [32,18] and labels shape [576]
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_1351]

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc*100:.2f}%")
