In [128]:
# imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
from PIL import Image
from tqdm import tqdm
from pathlib import Path
from PIL import Image
import cv2
from IPython.display import display, Image, clear_output
import ipywidgets as widgets
import threading
import warnings
from collections import Counter, deque

cudnn.benchmark = True

In [93]:
data_dir = './data'
class_names = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

In [16]:
#https://abauville.medium.com/display-your-live-webcam-feed-in-a-jupyter-notebook-using-opencv-d01eb75921d1

# remove all warnings if 
demoing = True
if demoing:
    warnings.filterwarnings("ignore")
else:
    warnings.filterwarnings("default")

# establish which model to use
chosen_model = "inception"
assert(chosen_model in ['resnet', 'inception'])

# establish data transforms for incoming webcam frames
data_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((299, 299) if chosen_model == "inception" else (224, 244)),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

# instanciate appropriate model
if chosen_model == 'inception':
    model = models.inception_v3(pretrained=True)
    model.aux_logits=False
else:
    model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(class_names))
model.load_state_dict(torch.load(os.path.abspath(f"./models/{chosen_model}")))
model.eval()

buffer = deque([])

message = []

# stop button
stopButton = widgets.ToggleButton(
    value=False,
    description='Stop',
    disabled=False,
    button_style='danger', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Description',
    icon='square' # (FontAwesome names without the `fa-` prefix)
)


# display function
def view(button):
    cap = cv2.VideoCapture(0)
    display_handle=display(None, display_id=True)
    i = 0
    while True:
        # read frame from webcam
        _, frame = cap.read()
        frame = cv2.flip(frame, 1) # if your camera reverses your image
        
        # crop and resize the frame
        crop_width, crop_height = 800, 800  # Adjust these values as per your requirements

        # get the dimensions of the image
        height, width = frame.shape[:2]

        # calculate coordinates for the crop
        start_x = (width - crop_width) // 2
        start_y = (height - crop_height) // 2

        # crop the image
        cropped_image = frame[start_y:start_y + crop_height, start_x:start_x + crop_width]
    
        frame = cropped_image.astype(np.float32)

        # apply transforms to frame
        new_frame = data_transforms(frame / 255).unsqueeze(0)

        # make prediction
        with torch.no_grad():
            pred = class_names[torch.argmax(model(new_frame))]
            confidence = torch.max(nn.Softmax()(model(new_frame))).item()
            if confidence < 0.5:
                pred = "N/A"
            buffer.append(pred)
        
        # set up color of text to be overlayed on frame
        color = (0, 255, 255)
        
        most_common = Counter(buffer).most_common(1)[0][0]
        
        if len(buffer) == 15:
            buffer.popleft()
            
        # if timing is correct, read a letter to memory
        if i % 15 == 0 and len(buffer) and most_common != "N/A":
            message.append(most_common)
            print("Read: ", most_common)
            color = (0, 0, 0)
            buffer.clear()
            
        if confidence > 0.5:
            buffer.append(pred[0])
            
        # display frame with letter overlayed
        font = cv2.FONT_HERSHEY_SIMPLEX 
        cv2.putText(frame,  
                most_common,  
                (50, 50),  
                font, 1,  
                color,  
                2,  
                cv2.LINE_4) 
        
        _, frame = cv2.imencode('.jpeg', frame)
        
        # if stop button is pressed, stop camera and show result
        display_handle.update(Image(data=frame.tobytes()))
        if stopButton.value==True:
            cap.release()
            display_handle.update(None)
            print(f"Translated message: ", "".join(message))
            break
        i += 1

            
# run
display(stopButton)
thread = threading.Thread(target=view, args=(stopButton,))
thread.start()

ToggleButton(value=False, button_style='danger', description='Stop', icon='square', tooltip='Description')



None

Read:  A
Translated message:  A
