In [1]:
import cv2
import matplotlib.pyplot as plt
import os
from PIL import Image
from random import shuffle

In [3]:
import torch 
import torchvision.transforms.functional as F
import torch.nn as nn
from torch.optim import SGD

In [28]:
device='cuda'

In [17]:
parentPath='/content/drive/MyDrive/me/'
positivePath=parentPath+'parsedme/'
negativePath=parentPath+'parsednotme/'

In [53]:
def resizeImg(imgPath):
    temp=F.pil_to_tensor(Image.open(imgPath).resize((128,128))).to(dtype=torch.float32)
    mean=torch.mean(temp)
    std=torch.std(temp)
    return F.normalize(temp,mean=mean,std=std).to(device=device)

def getData():
    positiveFiles=[positivePath+path for path in os.listdir(positivePath)]
    shuffle(positiveFiles)
    negativeFiles=[negativePath+path for path in os.listdir(negativePath)]
    shuffle(negativeFiles)

    trainposData=map(resizeImg,positiveFiles[:150])
    trainnegData=map(resizeImg,negativeFiles[:150])
    testposData=map(resizeImg,positiveFiles[150:175])
    testnegData=map(resizeImg,negativeFiles[150:175])

    trainposlabel=torch.ones((150,1)).to(device)
    trainneglabel=torch.zeros((150,1)).to(device)
    testposlabel=torch.ones((25,1)).to(device)
    testneglabel=torch.zeros((25,1)).to(device)

    trainData = list(zip(trainposData,trainposlabel)) +   list(zip(trainnegData,trainneglabel))
    testData  = list(zip(testposData,testposlabel))   +   list(zip(testnegData,testneglabel))
    shuffle(trainData)
    shuffle(testData)

    return trainData,testData

In [54]:
trainData,testData=getData()

In [4]:
class myassistant(nn.Module):
  def __init__(self):
    super(myassistant,self).__init__()
    self.conv1=nn.Conv2d(3,16,3)
    self.pool1=nn.MaxPool2d(2)
    self.conv2=nn.Conv2d(16,16,3)
    self.pool2=nn.MaxPool2d(2)
    self.lin1=nn.Linear(16*30*30,128)
    self.lin2=nn.Linear(128,1)
  def forward(self,x):
    x=nn.functional.relu(self.conv1(x))
    x=self.pool1(x)
    x=nn.functional.relu(self.conv2(x))
    x=self.pool2(x)
    x=x.flatten()
    x=nn.functional.relu(self.lin1(x))
    x=torch.sigmoid(self.lin2(x))
    return x

In [50]:
model=myassistant().to(device)

In [51]:
lossFunc=nn.BCELoss()
optimizer=SGD(model.parameters(),lr=0.0002)

In [56]:
for i in range(30):
  totalLoss=0
  for img,label in trainData:
      pred=model(img)
      optimizer.zero_grad()
      loss=lossFunc(pred,label)
      totalLoss+=loss
      loss.backward()
      optimizer.step()
  print(f'epoch {i} - {totalLoss}')
    

epoch 0 - 60.0764274597168
epoch 1 - 43.35356521606445
epoch 2 - 32.1224365234375
epoch 3 - 24.453786849975586
epoch 4 - 19.11665916442871
epoch 5 - 15.31724739074707
epoch 6 - 12.551822662353516
epoch 7 - 10.488265991210938
epoch 8 - 8.911184310913086
epoch 9 - 7.68127965927124
epoch 10 - 6.701298236846924
epoch 11 - 5.906935691833496
epoch 12 - 5.255220413208008
epoch 13 - 4.711631774902344
epoch 14 - 4.252938747406006
epoch 15 - 3.8625636100769043
epoch 16 - 3.528038501739502
epoch 17 - 3.237978458404541
epoch 18 - 2.9847402572631836
epoch 19 - 2.762519598007202
epoch 20 - 2.5673673152923584
epoch 21 - 2.39363956451416
epoch 22 - 2.2383508682250977
epoch 23 - 2.0995194911956787
epoch 24 - 1.9739031791687012
epoch 25 - 1.861520767211914
epoch 26 - 1.7587531805038452
epoch 27 - 1.6654003858566284
epoch 28 - 1.5797561407089233
epoch 29 - 1.502052903175354


In [70]:
#testing accuracy
with torch.no_grad():
  total=0
  for img,label in testData:
    pred=model(img)
    total+=(round(pred.item())==label.item())
print(total)# total 50

47


In [5]:
tmodel=myassistant()
tmodel.load_state_dict(torch.load('model.pt',map_location=torch.device('cpu')))
tmodel.eval()

myassistant(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (lin1): Linear(in_features=14400, out_features=128, bias=True)
  (lin2): Linear(in_features=128, out_features=1, bias=True)
)

In [6]:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades+'haarcascade_frontalface_default.xml')

In [None]:
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open camera")
    exit()
while True:
    try:
        ret, frame = cap.read()
        gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
        if not ret:
            print("Can't receive frame (stream end?). Exiting ...")
            break
        faces=face_cascade.detectMultiScale(gray,1.05,5)
        for face in faces:
            x,y,w,h=face
            parsed=frame[y:y+h,x:x+w,:]
            pilImage=Image.fromarray(parsed).resize((128,128))
            tensor=F.pil_to_tensor(pilImage).to(dtype=torch.float32)
            mean=tensor.mean()
            std=tensor.std()
            tensor=F.normalize(tensor,mean=mean,std=std)
            with torch.no_grad():
                pred=tmodel(tensor)
                if pred>0.9:
                    cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,255),2)
        cv2.imshow('cam',frame)
        k = cv2.waitKey(30) & 0xff
        if k == 27:
            break
    except ValueError:
        break
plt.imshow(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))
plt.show()
cap.release()
cv2.destroyAllWindows()