# Load Env Variables

In [2]:
from decouple import config

aws_bucket_name = config('AWS_BUCKET_NAME')

# Helper Functions

In [3]:
import requests
import cv2
import numpy as np
import json

def preprocess_img(url, target_size=(224, 224)):
    try:
        res = requests.get(url)
        img = cv2.imdecode(np.frombuffer(res.content, np.uint8), -1)

        img = cv2.resize(img, target_size)
        img = img.astype(np.float32) / 255.0 # normalising in range (0, 1)

        if res.status_code == 200:
            return img    
    except Exception as e:
        print(f'Error while fetching img:{url}: {e}')

def append_to_json(json_file, nparray, word):
    try:
        with open(json_file, 'r') as file:
            data = json.load(file)
    except FileNotFoundError:
        data = {}

    data_list = data.get('data', [])

    data_list.append({
        'word': word,
        'nprray': nparray
    })

    with open(json_file, 'w') as file:
        json.dump(data, file, indent=4)

# Load Dataset

In [4]:
import json

json_file_path = '../data.json'

dataset = []

with open(json_file_path, 'r') as file:
    data = json.load(file)

for entry in data['data']:
    img_urls = []

    for url in entry['filenames']:
        s3_url = f'https://{aws_bucket_name}.s3.amazonaws.com/{url}'

        img_urls.append(s3_url)
    
    dataset.append({
        'word': entry['word'],
        'urls': img_urls
    })

In [11]:
dataset[0]

{'word': 'match',
 'urls': ['https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_0.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_1.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_2.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_3.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_4.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_5.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_6.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_7.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_8.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_9.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_10.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictur

In [12]:
# Range of pixel values

import cv2
import urllib
import numpy as np

img_url = dataset[3]['urls'][3].replace(' ', '+')

req = urllib.request.urlopen(img_url)
arr = np.asarray(bytearray(req.read()), dtype=np.uint8)
img = cv2.imdecode(arr, -1)

min_val = img.min()
max_val = img.max()

print("Minimum pixel value:", min_val)
print("Maximum pixel value:", max_val)

Minimum pixel value: 0
Maximum pixel value: 226


In [28]:
# for entry in dataset[0]:

images = []

for entry in dataset[0:2]:
    for url in entry['urls']:
        img = preprocess_img(url)

        images.append({
            'word': dataset[0]['word'],
            'nparray': img
        })

print(images)

[{'word': 'match', 'nparray': array([[[0.05882353, 0.04705882, 0.03137255],
        [0.0627451 , 0.05098039, 0.03529412],
        [0.06666667, 0.05490196, 0.03921569],
        ...,
        [0.9490196 , 0.9607843 , 0.9254902 ],
        [0.9529412 , 0.9647059 , 0.9254902 ],
        [0.9529412 , 0.96862745, 0.9254902 ]],

       [[0.05882353, 0.04705882, 0.03137255],
        [0.0627451 , 0.05098039, 0.03137255],
        [0.06666667, 0.05490196, 0.03921569],
        ...,
        [0.9490196 , 0.9607843 , 0.9254902 ],
        [0.9490196 , 0.9647059 , 0.9254902 ],
        [0.9529412 , 0.96862745, 0.9254902 ]],

       [[0.05882353, 0.04705882, 0.03137255],
        [0.0627451 , 0.05098039, 0.03529412],
        [0.06666667, 0.05490196, 0.03921569],
        ...,
        [0.9490196 , 0.9607843 , 0.9254902 ],
        [0.9490196 , 0.9647059 , 0.9254902 ],
        [0.9529412 , 0.96862745, 0.9254902 ]],

       ...,

       [[0.05490196, 0.03921569, 0.03529412],
        [0.05490196, 0.03921569, 0.035

In [76]:
import numpy as np

images = []

for entry in dataset:
    urls = entry['urls']
    word = entry['word']

    for url in urls:
        img = preprocess_img(url)

    images.append({
        'word': word,
        'nparray': img
    })

    np.save(f'../images/{word}.npy', img)

print(images)

Error while fetching img:https://sign-language-recognition.s3.amazonaws.com/pictures/phone/phone_16.jpg: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


KeyboardInterrupt: 

In [5]:
import numpy as np

words = [entry['word'] for entry in dataset]
vocab = set(words)

word_to_index = {word: index for index, word in enumerate(vocab)}

for word in vocab:
    labels_array = np.array([word_to_index[word] for _ in range(len(words))])

    np.save(f'../data/labels/{word}.npy', labels_array)

In [29]:
print(labels_list[0])
print(f'Length of labels: {len(labels_list)}') # should be 791

{'word': 'artist', 'nparray': array([584, 232, 698, ..., 522,  54, 413], dtype=int32)}
Length of labels: 791


In [6]:
dataset[0]

{'word': 'match',
 'urls': ['https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_0.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_1.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_2.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_3.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_4.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_5.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_6.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_7.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_8.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_9.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictures/match/match_10.jpg',
  'https://sign-language-recognition.s3.amazonaws.com/pictur

In [5]:
import numpy as np
import os

labels_folder = '../data/labels'
images_folder = '../data/images'

data_lists = []

for image_file in os.listdir(images_folder):
    word = os.path.splitext(image_file)[0]

    label_file_path = os.path.join(labels_folder, f"{word}.npy")
    images_file_path = os.path.join(images_folder, image_file)

    same_word = label_file_path.split('/')[3] == images_file_path.split('/')[3]
    

    if os.path.exists(label_file_path) and os.path.exists(images_file_path) and same_word:
        word_label = np.load(label_file_path, allow_pickle=True)

        word_images = np.load(images_file_path, allow_pickle=True)

        data_tuple = (word_images, word_label)

        data_lists.append(data_tuple)

In [6]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(data_lists, test_size=0.2, random_state=64)
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42)

# Unpack train, validation, and test sets
train_images, train_labels = zip(*train_data)
val_images, val_labels = zip(*val_data)
test_images, test_labels = zip(*test_data)

# Convert to numpy arrays
train_images = np.array(train_images)
val_images = np.array(val_images)
test_images = np.array(test_images)

train_labels = np.array(train_labels)
val_labels = np.array(val_labels)
test_labels = np.array(test_labels)

In [7]:
print('train label: \n')
print(train_labels.shape)

print('test label: \n')
print(test_labels.shape)

print('validation label: \n')
print(val_labels.shape)

print('train images: \n')
print(train_images.shape)

print('test images: \n')
print(test_images.shape)

print('validation images: \n')
print(val_images.shape)

train label: 

(330, 1800)
test label: 

(111, 1800)
validation label: 

(111, 1800)
train images: 

(330, 224, 224, 3)
test images: 

(111, 224, 224, 3)
validation images: 

(111, 224, 224, 3)


In [8]:
import tensorflow as tf
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1800, activation='sigmoid')  # Sigmoid activation for multi-label classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',  # Binary cross-entropy for multi-label classification
              metrics=['accuracy'])

history = model.fit(train_images, train_labels,
                    epochs=10,
                    batch_size=32,
                    validation_data=(val_images, val_labels))

2024-03-28 11:26:57.687254: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-28 11:26:57.929304: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(


Epoch 1/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 834ms/step - accuracy: 0.0000e+00 - loss: -32588.6328 - val_accuracy: 0.0000e+00 - val_loss: -797955.3750
Epoch 2/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 744ms/step - accuracy: 0.0000e+00 - loss: -3768675.2500 - val_accuracy: 0.0000e+00 - val_loss: -31623392.0000
Epoch 3/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 759ms/step - accuracy: 0.0000e+00 - loss: -77929200.0000 - val_accuracy: 0.0000e+00 - val_loss: -401510304.0000
Epoch 4/10
[1m 2/11[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m7s[0m 834ms/step - accuracy: 0.0000e+00 - loss: -459011648.0000

KeyboardInterrupt: 