In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
from tensorflow import keras
import random
import cv2
import math
from PIL import Image
import seaborn as sns
import csv
import json
from tqdm import tqdm

from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Flatten
from tensorflow.python.keras.saving.hdf5_format import save_attributes_to_hdf5_group

import joblib

In [2]:
model = Sequential()
model.add(Conv2D(32, (4, 4), padding="same", activation='relu', input_shape=(16, 16, 1)))
model.add(MaxPooling2D((4, 4)))
model.add(Conv2D(64, (2, 2), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(16, activation='softmax'))

model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [3]:
label_dict = {'fivefingerR': 0, 'fourfingerR': 1, 'threefingerR': 2, 'twofingerR': 3, 'onefingerR': 4,
              'fistR': 5, 'cowabungaR': 6, 'spidermanR': 7, 'okR': 8, 'cR': 9, 'thumbR': 10,
              'lR': 11, 'oR': 12, 'rockonR': 13, 'palmR': 14, 'sidehandR': 15}

index_dict = {value : key for key, value in label_dict.items()}

with open('data_short.json', 'r') as file:
    raw_data = json.load(file)

labels = []
data = []

for label, arrays in raw_data.items():
    for array in arrays:
        def_label = np.zeros((16))
        def_label[label_dict[label]] = 1
        labels.append(def_label)
        normalizedData = np.array((array-np.min(array))/(np.max(array)-np.min(array)))
        data.append(normalizedData.reshape(16, 16, 1))

labels = np.array(labels)
data = np.array(data)

(xtrain,xtest,ytrain,ytest)=train_test_split(data,labels,test_size=0.4)
# print(xtrain.shape, xtest.shape)


In [4]:
model_t = model.fit(xtrain, ytrain, epochs=20, batch_size = 64, verbose=2, validation_data=(xtest, ytest))

Epoch 1/20
30/30 - 0s - loss: 2.7643 - accuracy: 0.0891 - val_loss: 2.7515 - val_accuracy: 0.0578 - 461ms/epoch - 15ms/step
Epoch 2/20
30/30 - 0s - loss: 2.7195 - accuracy: 0.1724 - val_loss: 2.6790 - val_accuracy: 0.2414 - 194ms/epoch - 6ms/step
Epoch 3/20
30/30 - 0s - loss: 2.5615 - accuracy: 0.2766 - val_loss: 2.4017 - val_accuracy: 0.2711 - 149ms/epoch - 5ms/step
Epoch 4/20
30/30 - 0s - loss: 2.1570 - accuracy: 0.4161 - val_loss: 1.9453 - val_accuracy: 0.4164 - 182ms/epoch - 6ms/step
Epoch 5/20
30/30 - 0s - loss: 1.6847 - accuracy: 0.5693 - val_loss: 1.4817 - val_accuracy: 0.6438 - 159ms/epoch - 5ms/step
Epoch 6/20
30/30 - 0s - loss: 1.3350 - accuracy: 0.6599 - val_loss: 1.2014 - val_accuracy: 0.6984 - 141ms/epoch - 5ms/step
Epoch 7/20
30/30 - 0s - loss: 1.0861 - accuracy: 0.7479 - val_loss: 0.9542 - val_accuracy: 0.8039 - 139ms/epoch - 5ms/step
Epoch 8/20
30/30 - 0s - loss: 0.8887 - accuracy: 0.7948 - val_loss: 0.7770 - val_accuracy: 0.8211 - 139ms/epoch - 5ms/step
Epoch 9/20
30/3

In [6]:
# loaded_rf = joblib.load("model.joblib")

def getTotalAccuracy(model):
    with open('./data/data.json', 'r') as file:
        data = json.load(file)

    correct = np.zeros((16))
    incorrect = np.zeros((16))
    total = np.zeros((16))

    for label, arrays in (data.items()):
        print(label, len(arrays))
        for image in tqdm(arrays):
            input_data = np.reshape(image, (-1, 16, 16, 1))
            predictions = np.squeeze(model.predict(input_data, verbose=0))
            max_idx = predictions.tolist().index(max(predictions))
            label_out = index_dict[max_idx]
            
            if (label == label_out):
                correct[index_dict[label]] += 1
            else:
                incorrect[index_dict[label]] += 1
            total[index_dict[label]] += 1

    print(f"Total samples: {np.sum(total)}\nTotal correct: {np.sum(correct)}\nTotal incorrect: {np.sum(incorrect)}")
    print(f"Accuracy: {np.sum(correct) / np.sum(total)}\n")
    print("Number Correct by Index:\n", correct)
    print("Number Incorrect by Index:\n", incorrect)
    print("Accuracy by Index:\n", correct / total)


getTotalAccuracy(model)



# # Load the image
# image_path = "data_pad.jpg"
# image = load_img(image_path, target_size=(32, 32))  # Resize the image to match the input size of the CNN
# image_array = img_to_array(image)  # Convert the image to a numpy array
# expanded_image_array = tf.expand_dims(image_array, axis=0)  # Expand the dimensions to match the batch size

# # Preprocess the image
# preprocessed_image = preprocess_input(expanded_image_array)

# # Load the pre-trained CNN model
# model = model_d

# # Make predictions on the image
# predictions = model.predict(preprocessed_image)

  0%|          | 0/16 [00:00<?, ?it/s]

fivefingerR 1386


100%|██████████| 1386/1386 [00:38<00:00, 35.74it/s]
  6%|▋         | 1/16 [00:38<09:41, 38.78s/it]

fourfingerR 1144


100%|██████████| 1144/1144 [00:30<00:00, 36.96it/s]
 12%|█▎        | 2/16 [01:09<07:58, 34.18s/it]

threefingerR 1234


100%|██████████| 1234/1234 [00:34<00:00, 35.32it/s]
 19%|█▉        | 3/16 [01:44<07:28, 34.53s/it]

twofingerR 1372


100%|██████████| 1372/1372 [00:36<00:00, 37.56it/s]
 25%|██▌       | 4/16 [02:21<07:03, 35.32s/it]

onefingerR 1359


100%|██████████| 1359/1359 [00:38<00:00, 35.69it/s]
 31%|███▏      | 5/16 [02:59<06:39, 36.32s/it]

fistR 1937


100%|██████████| 1937/1937 [00:56<00:00, 34.09it/s]
 38%|███▊      | 6/16 [03:56<07:12, 43.29s/it]

cowabungaR 1269


100%|██████████| 1269/1269 [00:37<00:00, 34.13it/s]
 44%|████▍     | 7/16 [04:33<06:11, 41.29s/it]

spidermanR 1652


100%|██████████| 1652/1652 [00:50<00:00, 32.73it/s]
 50%|█████     | 8/16 [05:23<05:53, 44.22s/it]

okR 1393


100%|██████████| 1393/1393 [00:44<00:00, 31.30it/s]
 56%|█████▋    | 9/16 [06:08<05:10, 44.30s/it]

cR 1364


100%|██████████| 1364/1364 [00:41<00:00, 32.96it/s]
 62%|██████▎   | 10/16 [06:49<04:20, 43.40s/it]

thumbR 1381


100%|██████████| 1381/1381 [00:43<00:00, 31.41it/s]
 69%|██████▉   | 11/16 [07:33<03:37, 43.58s/it]

lR 1332


100%|██████████| 1332/1332 [00:39<00:00, 34.07it/s]
 75%|███████▌  | 12/16 [08:12<02:48, 42.22s/it]

oR 1275


100%|██████████| 1275/1275 [00:38<00:00, 33.39it/s]
 81%|████████▏ | 13/16 [08:50<02:02, 40.99s/it]

rockonR 1277


100%|██████████| 1277/1277 [00:38<00:00, 33.21it/s]
 88%|████████▊ | 14/16 [09:29<01:20, 40.23s/it]

palmR 1221


100%|██████████| 1221/1221 [00:36<00:00, 33.92it/s]
 94%|█████████▍| 15/16 [10:05<00:38, 38.95s/it]

sidehandR 1184


100%|██████████| 1184/1184 [00:35<00:00, 32.95it/s]
100%|██████████| 16/16 [10:41<00:00, 40.08s/it]


Total samples: 21780
Total correct: 13447
Total incorrect: 8333
Accuracy: 0.6174012855831038



In [7]:
# frequency ranges from A3 to C5
freqs = [220, 233, 247, 262, 277, 294, 311, 330, 349, 370, 392, 415, 440, 466, 494, 523]
freqDict = {key : f for (key, f) in (index_dict.keys(), freqs)}
for key in index_dict.keys():
    freqDict

print(freqDict)

ValueError: too many values to unpack (expected 2)