In [33]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, optimizers
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.neighbors import KNeighborsClassifier

import plotly.express as ex
import plotly.graph_objects as go

In [7]:
# model in .~/keras/models
model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [8]:
dataset_path = "../data/raw/hand_dataset"
os.makedirs(dataset_path, exist_ok=True)
dataset_path

'../data/raw/hand_dataset'

In [11]:
cls = "2"
c = 0
path = os.path.join(dataset_path, cls)
os.makedirs(path, exist_ok=True)

In [12]:
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    frame = cv2.resize(frame, (224, 224))
    cv2.imshow('Webcam', frame),
    key = cv2.waitKey(1)
    if key == ord('q'):
        break
    elif key == ord('s'):
        img_path = os.path.join(path, f"{c}.jpg")
        cv2.imwrite(img_path, frame)
        c += 1
        print(f"Saved {img_path}")

cap.release()
cv2.destroyAllWindows()

Saved ../data/raw/hand_dataset/2/0.jpg
Saved ../data/raw/hand_dataset/2/1.jpg
Saved ../data/raw/hand_dataset/2/2.jpg
Saved ../data/raw/hand_dataset/2/3.jpg
Saved ../data/raw/hand_dataset/2/4.jpg
Saved ../data/raw/hand_dataset/2/5.jpg
Saved ../data/raw/hand_dataset/2/6.jpg
Saved ../data/raw/hand_dataset/2/7.jpg
Saved ../data/raw/hand_dataset/2/8.jpg
Saved ../data/raw/hand_dataset/2/9.jpg
Saved ../data/raw/hand_dataset/2/10.jpg
Saved ../data/raw/hand_dataset/2/11.jpg
Saved ../data/raw/hand_dataset/2/12.jpg
Saved ../data/raw/hand_dataset/2/13.jpg
Saved ../data/raw/hand_dataset/2/14.jpg
Saved ../data/raw/hand_dataset/2/15.jpg
Saved ../data/raw/hand_dataset/2/16.jpg


In [13]:
total = 0
for cls in os.listdir(dataset_path):
    path = os.path.join(dataset_path, cls)
    total += len(os.listdir(path))
print(f"Total images: {total}")

Total images: 30


In [15]:
IMG_SIZE = 224
BATCH_SIZE = 16

In [16]:
# https://www.tensorflow.org/guide/keras/preprocessing_layers
data_augmentation = tf.keras.Sequential([
    layers.Resizing(IMG_SIZE, IMG_SIZE),
    layers.Rescaling(1./255),
    layers.RandomRotation(.4),
    layers.RandomWidth(.2),
    layers.RandomHeight(.2),
    # layers.RandomCrop(.2, .2),
    layers.RandomZoom(.2),
    layers.RandomFlip("horizontal"),
    layers.Resizing(IMG_SIZE, IMG_SIZE),
])

In [30]:
def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 7, 7, 512))
    labels = np.zeros(shape=(sample_count))
    x_train = tf.keras.utils.image_dataset_from_directory(
        directory,
        image_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        seed=42
    )
    i = 0
    for inputs_batch, labels_batch in x_train:
        features_batch = model.predict(inputs_batch)
        features[i * BATCH_SIZE : (i + 1) * BATCH_SIZE] = features_batch
        labels[i * BATCH_SIZE : (i + 1) * BATCH_SIZE] = labels_batch
        i += 1
        if i * BATCH_SIZE >= sample_count:
            break
    return features, labels, x_train.class_names

In [31]:
train_features, train_labels, class_names = extract_features(dataset_path, total)
train_labels

Found 30 files belonging to 2 classes.


array([1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 0., 1., 0.,
       1., 0., 1., 1., 1., 1., 0., 1., 0., 0., 0., 1., 0.])

In [32]:
train_features = np.reshape(train_features, (total, 7 * 7 * 512))

In [34]:
neigh = KNeighborsClassifier(n_neighbors=2)
neigh.fit(train_features, train_labels)

In [36]:
cap = cv2.VideoCapture(0)
while 1:
    ret, frame = cap.read()
    frame = cv2.resize(frame, (224, 224))
    x = np.expand_dims(frame, axis=0)
    x = preprocess_input(x)
    feature = model.predict(x)
    feature = np.reshape(feature, (1, 7 * 7 * 512))
    preds = neigh.predict(feature)
    txt = class_names[int(preds[0])]
    cv2.putText(frame, txt, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    cv2.imshow('Webcam', frame),
    key = cv2.waitKey(1)
    if key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

