<a href="https://colab.research.google.com/github/jsparihar/DeepLearningNotes/blob/master/EmotionsCode_JP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mtcnn
!pip install facenet-pytorch

Collecting mtcnn
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mtcnn
Successfully installed mtcnn-0.1.1
Collecting facenet-pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: facenet-pytorch
Successfully installed facenet-pytorch-2.5.3


In [None]:
import os
from google.colab import files
from PIL import Image
import pickle
import cv2
import glob
import numpy as np
import torch
import shutil

In [None]:
from google.colab import drive
drive.mount('/content/drive')
myDriveDir="/content/drive/MyDrive"

Mounted at /content/drive


In [None]:
def setupKaggle():
  #if you've already uploaded kaggle.json before, don't do it again
  if not(os.path.exists("kaggle.json")):
    files.upload() #upload kaggle.json
  !pip install --upgrade --force-reinstall --no-deps kaggle
  !mkdir -p ~/.kaggle
  !cp kaggle.json ~/.kaggle/
  !ls ~/.kaggle
  !chmod 600 /root/.kaggle/kaggle.json

def downloadDataset(dataset_url, myDriveDir):
  dataset_name=os.path.join(myDriveDir,"draft")
  zip_name=dataset_url.split("/")[1]
  !kaggle datasets download -d {dataset_url}
  if not(os.path.exists(dataset_name)):
    !mkdir {dataset_name}
  #!unzip -j -q {zip_name}.zip 'emoooooo/*' -d {dataset_name}
  !unzip -q {zip_name}.zip -d {dataset_name}
  !rm -f {zip_name}.zip

  old_name=os.path.join(dataset_name, os.listdir(dataset_name)[0])
  new_name=os.path.join(dataset_name,'raw_data')
  os.rename(old_name, new_name)
  return new_name #Folder path the data is saved in

In [None]:
from mtcnn import MTCNN
detector = MTCNN()
def findFace(img,detector):
    #error occurs at line below when looping through this function
    #IndexError: list index out of range
    img_size=img.shape
    d=detector.detect_faces(img) #figure out how to supress output
    if len(d)> 0:
      crop_coords,p,keypoints=d[0].values()
    else:
      p=0; crop_coords=[0,0,img_size[1],img_size[0]]

    return crop_coords, p

In [None]:
def cropNsave(img,crop_coords,label,dir, faceProb, ext="jpeg"):
  # img:Image array
  # crop_coords: crop coordinatates gotten from mtcnn
  # label: The emotion of the subject in the image. Should be gotten from the folder name it's found in
  # dir: the directory to save the cropped data. This will be different to the dir of the uncropped images
  # ext: The file type to save the image as
  if not(os.path.exists(dir)):
      os.mkdir(dir)
  x,y,width,height=crop_coords

  cropImage=img[y:y+height,x:x+width,:]
  cropImage = cv2.resize(cropImage,(160,160))
  #image = Image.fromarray(img, 'RGB')
  #cropImage=image.crop((x, y, x+width, y+height))

  emotFold=os.path.join(dir,label)
  badCrop=os.path.join(emotFold,"poorlyCropped")

  folders=os.listdir(dir)

  if label not in folders:
    os.mkdir(emotFold)
    os.mkdir(badCrop)

  if faceProb<0.95: #if mtcnn wasn't confident in identifying the face, the corresponding image is saved in a subfolder of the labeled folder called poorlyCropped
    number=str(len(os.listdir(badCrop))+1)+"_"+str(faceProb) #length gives the number of images already saved. 'number' is +1 greater than that.
    save_dir=badCrop
  else:
    number=str(len(os.listdir(emotFold))) # no +1 because poorly cropped folder is included in length and thus needs to be subtracted from count
    save_dir=emotFold

  filename= label + number +"."+ ext #name of the image is it's emotion label with a unique ID number, ID numbers are assinged sequentially.
  filepath=os.path.join(save_dir,filename)
  cv2.imwrite(filepath, cropImage)
  #image.save(filepath)

In [None]:
def getUncroppedList(datasetPath):
  #datasetPath: path of the raw_data folder, which contains all of the label folders.
  #outputs: a dictionary with labelnames as keys and a list of filepaths of uncropped images.
  path = Path(datasetPath)
  parent=path.parent.absolute() #parent of datasetPath. should be the emotions folder

  if "uncroppedList.pickle" in os.listdir(parent):
    savedPath=os.path.join(parent,"uncroppedList.pickle")
    with open(savedPath, "rb") as file:
      return pickle.load(file), parent

  uncroppedList={}

  labels=os.listdir(datasetPath)

  if "train" in labels: labels.remove("train"), labels.remove("test")
  if "validate" in labels: labels.remove("validate")

  for label in labels:
    labelPath=os.path.join(datasetPath,label)
    imageNames=os.listdir(labelPath)
    uncroppedList.update({label: imageNames})

  return uncroppedList, parent


In [None]:
#from google.colab.patches import cv2_imshow

#cv2_imshow(img)

In [None]:
dataset_url= "sanidhyak/human-face-emotions" #"abdulwasay551/facial-emotion-100100-pictures"

setupKaggle()
datasetPath=downloadDataset(dataset_url,myDriveDir) # directory of the emotions folder

In [None]:
from pathlib import Path
import copy
datasetPath="/content/drive/MyDrive/draft/raw_data"
uncroppedBefore, parent=getUncroppedList(datasetPath)
uncroppedAfter=copy.deepcopy(uncroppedBefore)

saveDir=os.path.join(parent,"cropped") #save directory for the cropped dataset, which will be in the parent folder of raw_data
#Get bounding box for face, cropp image and save the result in a new directory
for label in uncroppedBefore:
  labelPath=os.path.join(datasetPath,label)
  #imageNames=os.listdir(labelPath)
  for image_name in uncroppedBefore[label]:
    uncroppedAfter[label].remove(image_name)
    fileName=os.path.join(labelPath,image_name)
    img=cv2.cvtColor(cv2.imread(fileName), cv2.COLOR_BGR2RGB)
    crop_coords, faceProb=findFace(img,detector)
    cropNsave(img, crop_coords, label, saveDir, faceProb, ext="jpeg")



In [None]:
  #RUN THIS IF THE ABOVE FOR LOOP FINISHES EARLY
  #save the list of images that still need to be cropped
  savePath=os.path.join(parent,"uncroppedList.pickle")
  with open(savePath, "wb") as file:
    pickle.dump(uncroppedAfter, file, pickle.HIGHEST_PROTOCOL)

In [None]:
# OPTIONAL don't run if not necessary
#delete the cropped data from a folder
def delContents(dir,ext):
    img_names=[f for f in os.listdir(dir) if f.endswith(ext)]
    for I in img_names:
      f=os.path.join(dir, I)
      if os.path.isdir(f):
        os.rmdir(f)
      else: os.remove(f)

In [None]:
#label=#"Surprise" #name of the folder you want to clear
#folderpath=os.path.join(myDriveDir,'emotions','cropped',label)
setnames=["test","train","validate"]
folder="/content/drive/MyDrive/draft/raw_data"
setpaths=[os.path.join(folder,f)  for f in setnames]
for setp in setpaths:
  testcontents=glob.glob(os.path.join(setp,'*'))
  for labpath in testcontents:
    delContents(labpath,'jpeg')

In [None]:
def makeSplitDir(datasetPath, foldername):
  #make directory for train test or validate folders
  labels=os.listdir(datasetPath)
  setdir=os.path.join(datasetPath, foldername)
  labelPaths= [os.path.join(setdir,lab) for lab in labels if lab not in ["train","test"] ]

  if not(os.path.exists(setdir)):
    os.mkdir(setdir)
    for p in labelPaths:
      os.mkdir(p)
  return labelPaths, setdir

def splitSamples(datasetPath, split):
  #makes train and test (and validate) folder in the parent of datasetPath. Populates those folders with labels
  #labelPaths =  glob.glob( os.path.join(datasetPath,'*') )
  labels=os.listdir(datasetPath)
  labelPaths=[os.path.join(datasetPath,lab) for lab in labels if lab not in ["train","test" ]]

  testLabPaths, testDir = makeSplitDir(datasetPath,'test') #Function defined in this cell
  trainLabPaths, trainDir = makeSplitDir(datasetPath, 'train')
  setDirs=[trainDir,testDir]

  for L, labPath in enumerate(labelPaths):
    filepaths=glob.glob(os.path.join(labPath,'*.*'))
    filecount=len(filepaths)
    shuffled_indices = torch.randperm(filecount)  #np.random.permutation(filecount)
    train_size=split[0]*filecount

    for i in shuffled_indices[0:train_size]:
      shutil.copy(filepaths[i], trainLabPaths)
    for i in shuffled_indices[train_size:]:
      shutil.copy(filepaths[i], testLabPaths)

  return setDirs

In [None]:
#refrence1 : https://pytorch.org/vision/stable/generated/torchvision.datasets.ImageFolder.html
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader



In [None]:
#split the data up into train and test
#1) with background (raw data)
rawPath="/content/drive/MyDrive/emotion/raw_data"
setDirsRaw = splitSamples(rawPath, split=[0.8,0.2]) #makes a train and test folder in raw_data
#2) without background (cropped data)
cropPath="/content/drive/MyDrive/emotion/cropped"
setDirsCrop = splitSamples(cropPath, split=[0.8,0.2]) #makes a train and test folder in cropped



NameError: ignored

In [None]:
#Get data loaders for raw_data
trainDirRaw,testDirRaw=os.rawPath

dataset = ImageFolder(root=trainDirRaw)
TrainloaderRaw=DataLoader(dataset, batch_size=8, shuffle=True)

dataset = ImageFolder(root=testDirRaw)
TestloaderRaw=DataLoader(dataset, batch_size=8, shuffle=True)

#print to for check only.
#print("Train dataset length:", len(trainLoader.dataset))
#print("Validation dataset length:", len(testLoader.dataset))
#print("Test dataset length:", len(valLoader.dataset))

#SIDE GOAL
#Artificially unbalance an emotion group.

NameError: ignored

In [None]:
#Get data loaders for raw_data
trainDirCrop,testDirCrop=setDirsCrop

dataset = ImageFolder(root=trainDirCrop)
TrainloaderCrop=DataLoader(dataset, batch_size=8, shuffle=True)

dataset = ImageFolder(root=testDirCrop)
TestloaderCrop=DataLoader(dataset, batch_size=8, shuffle=True)

#print to for check only.
#print("Train dataset length:", len(trainLoader.dataset))
#print("Validation dataset length:", len(testLoader.dataset))
#print("Test dataset length:", len(valLoader.dataset))

In [None]:
saveDir="/content/drive/MyDrive/draft/cropped"
setDirs = splitSamples(saveDir, split=[0.8,0.2]) #makes a train and test folder in the parent of datasetPath


In [None]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
valDir="/content/drive/MyDrive/draft/cropped/validate"
dataset = ImageFolder(root=valDir)
loader=DataLoader(dataset, batch_size=8, shuffle=True)


In [None]:
#IF .ipynb_checkpoints causing errors run this
!rm -rf `find -type d -name .ipynb_checkpoints`

In [None]:
for batch in loader:
  input, labels = batch
  print(labels)

In [None]:
def evaluate(loader,model):
    model.eval()
    correct = 0
    total = 0
    for inputs, labels in loader:
      #inputs = inputs.to(device).float()
      #labels = labels.to(device).long()
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    return 100*correct/total

In [None]:
class facenet()

SyntaxError: ignored

In [None]:
#import facenet model, modify it and retrain it on labeled data (Saxon & Zhenyuyou)
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
from facenet_pytorch import InceptionResnetV1
import matplotlib.pyplot as plt
import numpy as np

# Parameters.
num_train = len(trainLoader.dataset)
num_test = len(testLoader.dataset)
num_channels = 3
pic_size = 96
num_classes = 7

# Training parameters.
batch_size = 64
learning_rate = 0.001
momentum = 0.9
num_epochs = 10

X_train = torch.randn(num_train, num_channels, pic_size, pic_size)
X_test = torch.randn(num_test, num_channels, pic_size, pic_size)
y_train = torch.randint(0, num_classes, (num_train,))
y_test = torch.randint(0, num_classes, (num_test,))

# Create DataLoader for training and testing data
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Define the FaceNet model
model = InceptionResnetV1(classify=True, num_classes=num_classes)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

# Training loop
train_accuracy = []
test_accuracy = []
traning_losses = []
for epoch in range(num_epochs):
    print("Epoch: ", epoch)
    model.train()
    for inputs, labels in train_loader:
      optimizer.zero_grad()
      outputs = model(inputs)
      labels_onehot = torch.nn.functional.one_hot(labels, num_classes=num_classes).float()
      loss = criterion(outputs, labels_onehot)
      loss.backward()
      optimizer.step()

      traning_losses += [loss.item()]


    train_accuracy.append(evaluate(test_loader, model, traning=True))
    test_accuracy.append(evaluate(test_loader, model, traning=True))

    print(f'Epoch {epoch+1}/{num_epochs}: Train Accuracy = {train_accuracy[-1]:.4f}, Test Accuracy = {test_accuracy[-1]:.4f}')

# Plot the training and test accuracy
plt.plot(range(1, num_epochs+1), train_accuracy, label='Train Accuracy')
plt.plot(range(1, num_epochs+1), test_accuracy, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.savefig("train_test_accuracy.png")


NameError: ignored

In [None]:
#import resnet model, modify it and retrain it on labeled data (Nancy)

In [None]:
#facenet feature creation (Saxon)

In [None]:
#unsupervised clustering on train data (Saxon & Nancy)

In [None]:
#save clustering results in new folders: 1 folder per cluster (Saxon & Nancy)


In [None]:
#retrain moded Facenet model on unsupervised labels
#Test on original labels