In [1]:
import os
import re
import shutil

import pandas as pd
from tqdm import tqdm

In [2]:
#imp
SAMPLE_NAME= "TEmotionDetection_HNAPS-Conv_10-SGD_0.001-relu_0.5-2-1629538337.model"
DEST_DIR_NAME = "fer2013-skimmed-3903ted"
ROOT_DIR = "/Users/garage/Cohort/"
USECASE_NAME = "TF_Usecase"
USECASE_DIR = ROOT_DIR + USECASE_NAME + "/"
MODELS_DIR = os.path.join(ROOT_DIR, "TrainedModels", USECASE_NAME, SAMPLE_NAME)

DATA_DIR = ROOT_DIR + "Data/"
SRC_PATH = os.path.join(DATA_DIR, "fer2013-skimmed")
DEST_PATH = os.path.join(DATA_DIR, DEST_DIR_NAME)
SRC_TRAIN_DIR = SRC_PATH + "/train/"
SRC_TEST_DIR = SRC_PATH + "/test/"

PREFIX = ""
SUPPORTED_IMAGES = ("jpg", "png", "jpeg")

IMAGE_SIZE = 48
CHANNEL = 1

### Insight

In [3]:
def count_exp(path, label):
    
    dict = {}
    e_only_folder = re.compile("[∧a-zA-Z\-]")

    for expression in os.listdir(path):
        
        #omit other folders
        if not re.match(e_only_folder, expression):
            continue
        
        dir = path + expression
        dict[expression] = len(os.listdir(dir))
        
    df = pd.DataFrame(dict, index=[label])
    
    return df

In [4]:
train_count = count_exp(SRC_TRAIN_DIR, 'train')
test_count = count_exp(SRC_TEST_DIR, 'test')
print(train_count)
print(test_count)

       perplex  happy   sad  neutral  angry
train     3173   6919  4136     4220   3509
      perplex  happy   sad  neutral  angry
test      902   1669  1024     1047    816


### Train

In [5]:
csv_path = os.path.join(MODELS_DIR, "t.csv")
tdf = pd.read_csv(csv_path)
tdf.columns = ["No", "Path", "Emotion"]
tdf.head()

Unnamed: 0,No,Path,Emotion
0,0,/Users/garage/Cohort/Data/fer2013-skimmed/trai...,happy
1,1,/Users/garage/Cohort/Data/fer2013-skimmed/trai...,happy
2,2,/Users/garage/Cohort/Data/fer2013-skimmed/trai...,perplex
3,3,/Users/garage/Cohort/Data/fer2013-skimmed/trai...,sad
4,4,/Users/garage/Cohort/Data/fer2013-skimmed/trai...,angry


In [6]:
tdf.groupby("Emotion").size()

Emotion
angry      3171
happy      3171
neutral    3171
perplex    3171
sad        3171
dtype: int64

In [7]:
def makePath(path, mode):

        try:
            original_umask = os.umask(0)
            if not os.path.exists(path):
                os.makedirs(path, mode=mode)
        finally:
            os.umask(original_umask)

In [8]:
def copyFiles(mode, df):
    try:
        for index, row in tqdm(df.iterrows()):
            base_name = os.path.basename(row["Path"])
            emotion_path = os.path.join(DEST_PATH, mode, row["Emotion"])
            if not os.path.exists(emotion_path):
                os.makedirs(emotion_path, 0o775)
            dest_file_path = os.path.join(emotion_path, base_name)
            shutil.copy(row["Path"], dest_file_path)
    except e:
        print(e)

In [9]:
copyFiles("train", tdf)

15855it [00:42, 375.09it/s]


### Test

In [10]:
csv_path = os.path.join(MODELS_DIR, "v.csv")
vdf = pd.read_csv(csv_path)
vdf.columns = ["No", "Path", "Emotion"]
vdf.head()

Unnamed: 0,No,Path,Emotion
0,0,/Users/garage/Cohort/Data/fer2013-skimmed/test...,happy
1,1,/Users/garage/Cohort/Data/fer2013-skimmed/test...,angry
2,2,/Users/garage/Cohort/Data/fer2013-skimmed/test...,perplex
3,3,/Users/garage/Cohort/Data/fer2013-skimmed/test...,sad
4,4,/Users/garage/Cohort/Data/fer2013-skimmed/test...,happy


In [11]:
vdf.groupby("Emotion").size()

Emotion
angry      815
happy      816
neutral    816
perplex    816
sad        816
dtype: int64

In [12]:
copyFiles("test", vdf)

4079it [00:10, 389.54it/s]


### Validation

In [13]:
train_count = count_exp(os.path.join(DEST_PATH, "train/"), 'train')
test_count = count_exp(os.path.join(DEST_PATH, "test/"), 'test')
print(train_count)
print(test_count)

       perplex  happy   sad  neutral  angry
train     3171   3171  3171     3171   3171
      perplex  happy  sad  neutral  angry
test      816    816  816      816    815
