In [None]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
def txtToArray(path):
    f = open(path)
    tokens=f.read().split()
    tokens = [i for i in tokens if i != '0' and i!='hand']
    for i in range(0,len(tokens)): tokens[i]=tokens[i].split(',')
    A=np.array(tokens, dtype=np.int64)
    return(A)
class HandsDataset(torch.utils.data.Dataset):
    def __init__(self,root,transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(os.listdir(os.path.join(root,"DATA_IMAGES")))
        self.masks = list(os.listdir(os.path.join(root,"DATA_MASKS")))
        self.boxes = list(os.listdir(os.path.join(root,"DATA_BOXES")))
    def __getitem__(self,idx):
        i = 1+int(idx/100)                                               #i indicates which video
        j = 1+(idx%100)                                                  #j indicates which frame within given video
        imgStr = "Image"+str(i)+"_"+str(j)+".jpg"
        maskStr = "Mask"+str(i)+"_"+str(j)+"_"
        boxStr = "Box"+str(i)+"_"+str(j)+".txt"
        img_path = os.path.join(self.root, "DATA_IMAGES",imgStr)
        box_path = os.path.join(self.root,"DATA_BOXES",boxStr)
        box_array = txtToArray(box_path)
        boxes = []
        masks = []
        for boxid in range(4):  #we go through each bounding box and fetch its corresponding mask image
            if box_array[boxid,2] != 0 : #we have bounding box 
                xmin = box_array[boxid,0]
                ymax = box_array[boxid,1]
                xmax = xmin+box_array[boxid,2]
                ymin = ymax - box_array[boxid,3]
                boxes.append([xmin,ymin,xmax,ymax])
                #get mask path and add to masks array
                maskStrTemp = maskStr +str(boxid)+".jpg"
                mask_path = os.path.join(self.root,"DATA_MASKS",maskStrTemp)
                mask = Image.open(mask_path)
                mask = np.array(mask)
                for i in range(mask.shape[0]):                 #turns mask into a binary (black and white) image
                    for j in range(mask.shape[1]):
                        if(mask[i,j]!= 0):
                            mask[i,j] = 1
                masks.append(mask)
        img = Image.open(img_path).convert("RGB")
        num_objs = len(boxes)
        if num_objs == 0:                               
            idx = (idx+1)%4800
            return self.__getitem__(idx)
        boxes = torch.as_tensor(boxes,dtype=torch.float32)
        if len(boxes) == 0:
            area = torch.as_tensor([0],dtype=torch.float32)
        else:
            area = (boxes[:,3]-boxes[:,1])*(boxes[:,2]-boxes[:,0])
        labels = torch.ones((num_objs,),dtype = torch.int64)
        masks = torch.as_tensor(masks,dtype=torch.uint8)  #these are already binary files
        image_id = torch.tensor([idx])
        iscrowd = torch.zeros((num_objs,),dtype = torch.int64) #?
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        return img, target
    def __len__(self):
        return len(self.imgs)

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)
    return model

In [None]:
import torchvision.transforms as T
from vision.references.detection import engine
from vision.references.detection.engine import train_one_epoch, evaluate
import utils
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

ModuleNotFoundError: ignored

In [None]:
!pip install pyttsx3

Collecting pyttsx3
  Downloading pyttsx3-2.90-py3-none-any.whl (39 kB)
Installing collected packages: pyttsx3
Successfully installed pyttsx3-2.90


In [None]:
!git clone https://github.com/pytorch/vision

Cloning into 'vision'...
remote: Enumerating objects: 119020, done.[K
remote: Counting objects: 100% (11726/11726), done.[K
remote: Compressing objects: 100% (1002/1002), done.[K
remote: Total 119020 (delta 10796), reused 11489 (delta 10666), pack-reused 107294
Receiving objects: 100% (119020/119020), 230.67 MiB | 31.18 MiB/s, done.
Resolving deltas: 100% (103450/103450), done.


In [None]:
!pip install py7zr
import keras
import numpy as np
import pandas as pd
import os
from PIL import Image
from shutil import rmtree
from re import findall
import py7zr
from pathlib import Path



In [None]:
make_dir = Path('/content/egohands')
path_to_source_folder = Path('/content/drive/MyDrive/handmask/egohands_kitti_formatted.7z')
if make_dir.exists(): rmtree(make_dir)
make_dir.mkdir()

if findall('.7z', str(path_to_source_folder)):
    with py7zr.SevenZipFile(path_to_source_folder, mode='r') as z:
        z.extractall('/content/egohands')
    path_to_source_folder = Path('/content/egohands/egohands_kitti_formatted')
elif findall('.zip', str(path_to_source_folder)):
    !unzip $path_to_source_folder -d /content/ASLdataset
    path_to_source_folder = Path('/content/egohands/egohands_kitti_formatted')
else:
    path_to_source_folder = Path(path_to_source_folder)

In [None]:
imgs = list(os.listdir(os.path.join(path_to_source_folder,"images")))
boxes = list(os.listdir(os.path.join(path_to_source_folder,"labels")))

In [None]:
from google.colab.patches import cv2_imshow
import cv2
image = cv2.imread('/content/egohands/egohands_kitti_formatted/images/CARDS_COURTYARD_B_T_frame_0011.jpg')
cv2_imshow(image)
cv2.rectangle(image,(647,453),(825,552),(0,255, 0),2)
cv2_imshow(image)


In [None]:
def txtToArray(path):
    f = open(path)
    tokens=f.read().split()
    
    print(tokens)
    for i in range(0,len(tokens)): tokens[i]=tokens[i].split(',')
    x_min = tokens[4]
    y_min = tokens[5]
    x_max = tokens[6]
    y_max = tokens[7]
    print(x_min,y_min,x_max,y_max)
    if 'hand' in tokens:
        print('found')
    
    print(tokens) 
    A=np.array(tokens, dtype=np.int64)
    
    return(A)
rect = txtToArray('/content/egohands/egohands_kitti_formatted/labels/CARDS_COURTYARD_B_T_frame_0011.txt')

['647', '453', '825', '552', '515', '431', '623', '544']
['515'] ['431'] ['623'] ['544']
[['647'], ['453'], ['825'], ['552'], ['515'], ['431'], ['623'], ['544']]


In [None]:
all_img_path = list((Path('/content/egohands/egohands_kitti_formatted/images')).glob("*"))

In [None]:
image = str(all_img_path[0])
print(image[:-3]+'txt')
print(image[:43]+'labels'+image[49:-3]+'txt')

/content/egohands/egohands_kitti_formatted/images/PUZZLE_LIVINGROOM_B_T_frame_1379.txt
/content/egohands/egohands_kitti_formatted/labels/PUZZLE_LIVINGROOM_B_T_frame_1379.txt


In [None]:
def txtToArray(path):
    print(path)
    f = open(path)
    tokens=f.read().split()
    print(tokens)
    tokens = [i for i in tokens if i != '0' and i!='hand']
    print(tokens)
    for i in range(0,len(tokens)): tokens[i]=tokens[i].split(',')
    A=np.array(tokens, dtype=np.int)
    print(A)
    return(A)
def __getitem__(idx,all_img_path):
    pic_count=idx
    i = 1+int(idx/100)
    j = 1+(idx%100)
    # imgStr = "Image"+str(i)+"_"+str(j)+".jpg"
    # boxStr = "Box"+str(i)+"_"+str(j)+".txt"
    # img_path = os.path.join('', "DATA_IMAGES",imgStr)
    # box_path = os.path.join('',"DATA_BOXES",boxStr)
    img_path = str(all_img_path[pic_count])
    box_path = str(str(img_path[:43])+'labels'+str(img_path[49:-3])+'txt')
    i+=1
    print(img_path)
    print(box_path)
    box_array = txtToArray(box_path)
    im = Image.open(img_path).convert("RGB")
    boxes = []
    for k in range(len(box_array)):  #get the bounding boxes for all hands in image with xmin,ymin,xmax,ymax coords
        if box_array[k,2] != 0:
            xmin = box_array[k,0]
            ymin = box_array[k,1]
            xmax = xmin+box_array[k,2]
            ymax = ymin + box_array[k,3]
            boxes.append([xmin,ymin,xmax,ymax])
    if len(boxes)==0:
        idx = (idx+1)%4800
        return __getitem__(idx)
    big_box = [1199,1199,0,0]         #big box is the Region of Interest, essentially a cropping of the image
    for k in range(len(boxes)):
        if boxes[k][0] < big_box[0]:
            big_box[0] = boxes[k][0]
        if boxes[k][1] < big_box[1]:
            big_box[1] = boxes[k][1]
        if boxes[k][2] > big_box[2]:
            big_box[2] = boxes[k][2]
        if boxes[k][3] > big_box[3]:
            big_box[3] = boxes[k][3]
    im_array = np.asarray(im)
    ROI = im_array[big_box[1]:big_box[3],big_box[0]:big_box[2],:]          #a crop of the image
    ROI_Image = Image.fromarray(ROI, 'RGB')
    ROI_Resize = ROI_Image.resize((32,32))
    ROI_npArr = np.asarray(ROI_Resize)
    #Image_Resize = np.asarray(im.resize((64,64)))
    label = 0
    if i>0 and i<13:
        label = 0    #cards
    if i>12 and i<25:
        label = 1    #chess
    if i>24 and i<37:
        label = 2    #jenga
    if i>36 and i<49:
        label = 3   #puzzle
    return ROI_npArr, label

In [None]:
X_train = []
Y_train = []
X_test = []
Y_test = []
all_img_path = list((Path('/content/egohands/egohands_kitti_formatted/images')).glob("*"))
print(len(all_img_path))
for idx in range(len(all_img_path)):
    i = 1+int(idx/100)  #video 
    j = 1+(idx%100)
    ROI, activity = __getitem__(idx,all_img_path)
    #lets take first 11 videos as training for each activity
    toTrain = (i-1)%12
    if (toTrain >= 0) and (toTrain <= 10):
        X_train.append(ROI)
        Y_train.append(activity)
    else:
        X_test.append(ROI)
        Y_test.append(activity)

4800
/content/egohands/egohands_kitti_formatted/images/PUZZLE_LIVINGROOM_B_T_frame_1379.jpg
/content/egohands/egohands_kitti_formatted/labels/PUZZLE_LIVINGROOM_B_T_frame_1379.txt
/content/egohands/egohands_kitti_formatted/labels/PUZZLE_LIVINGROOM_B_T_frame_1379.txt
['hand', '0', '0', '0', '1', '515', '479', '717', '0', '0', '0', '0', '0', '0', '0', '0', 'hand', '0', '0', '0', '540', '535', '1167', '716', '0', '0', '0', '0', '0', '0', '0', '0', 'hand', '0', '0', '0', '511', '181', '736', '308', '0', '0', '0', '0', '0', '0', '0', '0', 'hand', '0', '0', '0', '288', '146', '495', '297', '0', '0', '0', '0', '0', '0', '0', '0']
['1', '515', '479', '717', '540', '535', '1167', '716', '511', '181', '736', '308', '288', '146', '495', '297']
[[   1]
 [ 515]
 [ 479]
 [ 717]
 [ 540]
 [ 535]
 [1167]
 [ 716]
 [ 511]
 [ 181]
 [ 736]
 [ 308]
 [ 288]
 [ 146]
 [ 495]
 [ 297]]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if __name__ == '__main__':


IndexError: ignored

In [None]:
X_tr = np.array(X_train)
Y_tr = np.array(Y_train)
X_te = np.array(X_test)
Y_te = np.array(Y_test)

In [None]:
from keras.utils import to_categorical
Y_tr = to_categorical(Y_tr)
Y_te = to_categorical(Y_te)

In [None]:
import matplotlib.pyplot as plt
plt.imshow(X_tr[1501])
plt.show()

In [None]:
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D,Dropout,Activation
model = Sequential()

In [None]:
model.add(Conv2D(128,kernel_size=3,activation='relu',input_shape=(32,32,3)))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(64,kernel_size=3,activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Flatten())
model.add(Dense(32,activation='relu'))
model.add(Dense(4 ,activation='softmax'))

In [None]:
sgd = optimizers.SGD(lr=0.00001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd,loss='categorical_crossentropy',metrics = ['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(X_tr,Y_tr,validation_data=(X_te,Y_te),epochs=30,shuffle=True)

In [None]:
preds = model.predict(X_te)
counter = 0
for i in range(len(preds)):
    preds[i] = np.round(preds[i],0)
    if np.array_equal(Y_te[i],preds[i]):
        counter = counter +1

In [None]:
video_scores = [0,0,0,0]
for i in range(len(preds)):
    video = int(i/100)
    if np.array_equal(Y_te[i],preds[i]):
        video_scores[video] = video_scores[video]+1
print(video_scores)    #this gives amount of frames correctly classified in each of the 4 videos (cards,chess,jenga,puzzle)

In [None]:
box_path = '/content/CARDS_COURTYARD_T_B_frame_1432.txt'

In [None]:
def txtToArray(path):
    print(path)
    f = open(path)
    tokens=f.read().split()
    print(tokens)
    # tokens = [i for i in tokens if i != '0' and i!='hand']
    j=0
    print(len(tokens))
    ftoken = []
    while j< len(tokens):
        if tokens[j]!='0' and tokens[j]!='hand':
            ftoken.extend([tokens[j],tokens[j+1],tokens[j+2],tokens[j+3]])
            j=j+4
        else:
            j+=1

    print(ftoken)

    # while j<=(len(tokens)):
    #     print('j=',j)
    #     print('token',tokens[j])
    #     if tokens[j]!='hand':
    #         if int(tokens[j])!=0:
    #             print(tokens)
    #             j+=3
    #     else:
    #         tokens.remove(tokens[j])
    #         print(tokens)
    #         j+=1
    #     print('j=',j)
    tokens = splitlist(tokens)
    print(tokens)
    # for i in range(0,len(tokens)): tokens[i]=tokens[i].split(',')
    print(tokens)
    A=np.array(tokens, dtype=np.int)
    print(A)
    return(A)

In [None]:
box_array = txtToArray(box_path)


/content/CARDS_COURTYARD_T_B_frame_1432.txt
['hand', '0', '0', '0', '200', '594', '424', '717', '0', '0', '0', '0', '0', '0', '0', '0', 'hand', '0', '0', '0', '567', '619', '770', '718', '0', '0', '0', '0', '0', '0', '0', '0', 'hand', '0', '0', '0', '568', '308', '714', '399', '0', '0', '0', '0', '0', '0', '0', '0', 'hand', '0', '0', '0', '342', '325', '487', '478', '0', '0', '0', '0', '0', '0', '0', '0']
64
1
[]
2
[]
3
[]
4
[]
8
['200', '594', '424', '717']
9
['200', '594', '424', '717']
10
['200', '594', '424', '717']
11
['200', '594', '424', '717']
12
['200', '594', '424', '717']
13
['200', '594', '424', '717']
14
['200', '594', '424', '717']
15
['200', '594', '424', '717']
16
['200', '594', '424', '717']
17
['200', '594', '424', '717']
18
['200', '594', '424', '717']
19
['200', '594', '424', '717']
20
['200', '594', '424', '717']
24
['200', '594', '424', '717', '567', '619', '770', '718']
25
['200', '594', '424', '717', '567', '619', '770', '718']
26
['200', '594', '424', '717', '5

NameError: ignored

In [None]:
char c ='A'
long l = c

SyntaxError: ignored

In [None]:
!pip install pytorch torchvision cudatoolkit==10.0


Collecting pytorch
  Downloading pytorch-1.0.2.tar.gz (689 bytes)
[31mERROR: Could not find a version that satisfies the requirement cudatoolkit==10.0 (from versions: none)[0m
[31mERROR: No matching distribution found for cudatoolkit==10.0[0m


In [None]:
def Preprocessimage(new_image_path)
    im = Image.open(new_image_path).convert("RGB")
    im_array = np.asarray(im)
    ROI_Image = Image.fromarray(im_array, 'RGB')
    ROI_Resize = ROI_Image.resize((32,32))
    ROI_npArr = np.asarray(ROI_Resize)
    return ROI_npArr

In [None]:
X_te = np.array(X_test)