In [None]:
'''
/*
** Data_organiser_augmentor.ipynb
** This file has been used to filter appropriate data from original CK+ dataset based on 
** availability of corresponding label for respective class images.
** Split the filter data into three different set (Training(80%), Testing(10%) and Validation(10%))
** and put the in different folder.
** Later Augment only Training dataset.(only flip augmentation has been used.)
**
** @author: Abhijeet Parida, Jyotirmay Senapati
** @Date: 27th January, 2018
** @Copyright: Facial Expression Prediction, DL4CV project, Winter Sem, 2018
*/
'''

# Importing modules

import numpy as np
import os
from random import shuffle
import cv2 
import glob
from shutil import copyfile, rmtree, copytree
import utils.img_allign_expnet as iae

emo_dict = {"neutral":0, "anger":1, "contempt":2, "disgust":3, "fear":4, "happy":5, "sadness":6, "surprise":7 }
emo_list = ["neutral", "anger", "contempt", "disgust", "fear", "happy", "sadness", "surprise" ]

## Creating required folder structure.

In [None]:
## Uncomment if respective files are not present and need to be created.
#root = "data/alligned"
#os.makedirs('%s'%root)

#for emotion in emo_list:
#    os.makedirs('%s/%s/'%(f"{root}",emotion)) 

## Copying filtered images.

In [None]:
# original_data + label (filtering)==> filtered_data (allignment)==> alligned (splitting)==> set (augmentation)==> set 

def get_filtered_data():
    data_folder = "data/original_data"
    label_folder = "data/labels"
    destination_folder = "data/filtered_data"

    participants = glob.glob(f"{data_folder}/*")

    ## Selecting 3 peak expressions and one neutral from each time frame in the CK+ data set where 
    ## corresponding label is available.
    ## Thereafter moving them to their respective emotion folder.
    for x in participants:
        part = "%s" %x[-4:] #store current participant number
        for sessions in glob.glob("%s//*" %x): #Store list of sessions for current participant
            for files in glob.glob("%s//*" %sessions):
                current_session = sessions[-3:]
                label_file = files.split(".")[0].split("/")[4]
                label_file = label_file+"_emotion.txt"
                label_ = f"{label_folder}/{part}/{current_session}/{label_file}"
                #if label_ == "data/labels/S014/001/S014_001_00000029_emotion.txt":
                    
                if os.path.isfile(label_):
                    print(f"file exist:{label_}")
                    file = open(label_, 'r')

                    #emotions are encoded as a float, readline as float, then convert to integer.
                    emotion = int(float(file.readline()))

                    #get path for last three images in sequence, which contains the pick expressions
                    sourcefile_emotion_frame1 = sorted(glob.glob(f"{data_folder}/%s/%s/*" %(part, current_session)))[-1] 
                    sourcefile_emotion_frame2 = sorted(glob.glob(f"{data_folder}/%s/%s/*" %(part, current_session)))[-2]
                    sourcefile_emotion_frame3 = sorted(glob.glob(f"{data_folder}/%s/%s/*" %(part, current_session)))[-3]

                    sourcefile_neutral = sorted(glob.glob(f"{data_folder}/%s/%s/*" %(part, current_session)))[0]

                    #Generate respective destinations path to put respective images
                    dest_emot_frame1 = f"{destination_folder}/%s/%s" %(emo_list[emotion], sourcefile_emotion_frame1[-21:]) 
                    dest_emot_frame2 = f"{destination_folder}/%s/%s" %(emo_list[emotion], sourcefile_emotion_frame2[-21:]) 
                    dest_emot_frame3 = f"{destination_folder}/%s/%s" %(emo_list[emotion], sourcefile_emotion_frame3[-21:]) 

                    dest_neut = f"{destination_folder}/neutral/%s" %sourcefile_neutral[-21:]

                    #Copy file
                    copyfile(sourcefile_neutral, dest_neut)
                    copyfile(sourcefile_emotion_frame1, dest_emot_frame1)
                    copyfile(sourcefile_emotion_frame2, dest_emot_frame2)
                    copyfile(sourcefile_emotion_frame3, dest_emot_frame3)
                else:
                    print("no label file for corresponding image file, skipping it.")
                    
get_filtered_data()

# Alignment.

In [None]:
def get_data(root, imgs, labels):
    paths = [[f"{root}/{i}", i] for i in emo_dict.keys()]
    for path in paths:
        for augmneted_file in os.listdir(path[0]):
            img_path = os.path.join(path[0], augmneted_file)
            try:
                if os.path.isfile(img_path):
                    imgs.append(img_path)
                    labels.append(emo_dict[path[1]])
            except Exception as e:
                print(e)
    return imgs, labels

img_paths, labels = get_data("data/filtered_data", [], [])

for img_path, label in zip(img_paths, labels):
        iae.img_align(img_path, label)


# Splitting filtered data into training, testing and validation set.

In [None]:

# Getting all images with its label in a list. Converting it to numpy array for later separation.
imgs, labels = get_data("data/alligned", [], [])

# Shuffling of data to get random emotions into every set.
dataset = list(zip(imgs, labels))
shuffle(dataset)
dataset = np.array(dataset)

## img paths and level separation into train, validate and test set.
def split(dataset):
    count = len(dataset)

    num_training = int(count*0.8) #80% into training
    num_validation = int((count-num_training)/2) #10% into validation
    num_testing = int((count-num_training)/2)-1 # rest 10# to testing

    mask = range(num_training)
    X_train = dataset[mask]

    mask = range(num_training, num_training + num_validation)
    X_val = dataset[mask]

    mask = range(num_training + num_validation, \
                 num_training + num_validation + num_testing)
    X_test = dataset[mask]

    # Copying files to their respective locations. separating them to train, validate and test folder.
    all_set = {"training": X_train, "validation": X_val, "testing": X_test}

    for key, value in all_set.items():
        count_neutral, count_anger, count_contempt, count_disgust = 0, 0, 0, 0
        count_fear, count_happy, count_sadness, count_surprise = 0, 0, 0, 0

        for img_path in value:
            image_name = img_path[0].split("/")[-1]  
            dest = f"data/set/{key}/{emo_list[int(img_path[1])]}/{image_name}"
            if emo_list[int(img_path[1])] == "neutral":
                count_neutral +=1
            if emo_list[int(img_path[1])] == "anger":
                count_anger +=1
            if emo_list[int(img_path[1])] == "contempt":
                count_contempt +=1
            if emo_list[int(img_path[1])] == "disgust":
                count_disgust +=1
            if emo_list[int(img_path[1])] == "fear":
                count_fear +=1
            if emo_list[int(img_path[1])] == "happy":
                count_happy +=1
            if emo_list[int(img_path[1])] == "sadness":
                count_sadness +=1
            if emo_list[int(img_path[1])] == "surprise":
                count_surprise +=1

            img = cv2.imread(img_path[0])

            cv2.imwrite(dest, img)

        total = count_neutral + count_anger + count_contempt + count_disgust + count_fear \
                + count_happy + count_sadness + count_surprise 

        print("neutral:", count_neutral, "anger:", count_anger, "contempt:", count_contempt, \
              "disgust:", count_disgust, "fear:", count_fear, "happy:", count_happy, "sadness:",count_sadness,\
              "surprise:", count_surprise, "total:", total)

split(dataset)

# Augmenting training data set only.

In [None]:
def augment():
    for emotion in emo_list:
        folder = f"data/set/training/{emotion}"
        for the_file in os.listdir(folder):
            img_path = os.path.join(folder, the_file)
            img = cv2.imread(img_path)
            if img is not None:
                original = img.copy()
                cv2.imwrite(f"data/train_augmented/{emotion}/{the_file}", original)
                horizontal_img = cv2.flip( original, 1 )
                cv2.imwrite(f"data/train_augmented/{emotion}/flipped_{the_file}", horizontal_img)
                
    rmtree('data/set/training')
    copytree("data/train_augmented", 'data/set/training')
        
augment()

# Reset directory content recursively, if needed.

In [None]:

def reset(folder):
    for the_file in os.listdir(folder):
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                print(f"Deleteing {the_file}")
                # Deleting a file inside current folder.
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                print(f"Moving into {file_path}")
                # Calling the function recursively.
                reset(file_path)
        except Exception as e:
            print(e)
            
reset("data")