In [2]:
from utils.tools_lib import *

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
# flipping every image in a folder for data augmentation
def flip_png(images_folder):
    for emotion_folder in sorted(os.listdir(images_folder)):
        # create path for each emotion folder
        emotion_folder_path = os.path.join(images_folder, emotion_folder)

         # skip the .DS_Store file
        if emotion_folder == '.DS_Store': continue

        for filename in sorted(os.listdir(emotion_folder_path)):

            # metadata 
            if filename.endswith('.csv'): 
                # read the metadata file
                metadata_df = pd.read_csv(os.path.join(emotion_folder_path, filename))

                # convert the file_attributes column to a dictionary
                metadata_df['file_attributes'] = metadata_df['file_attributes'].apply(json.loads)

                # make a new column for age
                metadata_df['age'] = metadata_df['file_attributes'].apply(lambda x: x['age'] if 'age' in x else np.nan)

                # make a new column for gender
                metadata_df['gender'] = metadata_df['file_attributes'].apply(lambda x: x['gender'])

                new_rows = []
                for index, row in metadata_df.iterrows():
                    # get the filename
                    image_filename = row['filename']

                    # open the image
                    image_path = os.path.join(emotion_folder_path, image_filename)
                    img        = Image.open(image_path)

                    flipped_image = img.transpose(Image.FLIP_TOP_BOTTOM) # flip

                    # extract the filename (without extension) from the original image path
                    filename_wo_extension = os.path.splitext(image_path)[0]

                    # save it
                    flipped_image.save(filename_wo_extension + '_flipped.png')

                    # create a new row for the metadata dataframe
                    new_row = row.copy()
                    new_row['filename'] = os.path.basename(filename_wo_extension + '_flipped.png')
                    new_rows.append(new_row)


                new_rows_df = pd.DataFrame(new_rows)
                metadata_df = pd.concat([metadata_df, new_rows_df], ignore_index=True)

                # convert metadata dataframe to csv
                metadata_df.to_csv(os.path.join(emotion_folder_path, filename), index=False)

In [11]:
# images to dictionary conversion
def png_to_dict(images_folder):
    # create 2 empty lists, one for features and one for labels
    images = {}

    for emotion_folder in sorted(os.listdir(images_folder)):
        # create path for each emotion folder
        emotion_folder_path = os.path.join(images_folder, emotion_folder)

        # skip the .DS_Store file
        if emotion_folder == '.DS_Store': continue

        # loop through all the files in the folder and find metadata csv file
        for filename in sorted(os.listdir(emotion_folder_path)):

            # metadata 
            if filename.endswith('.csv'): 
                # read the csv file
                metadata_df = pd.read_csv(os.path.join(emotion_folder_path, filename))

                if 'age' not in metadata_df.columns and 'gender' not in metadata_df.columns:
                    # convert the file_attributes column to a dictionary
                    metadata_df['file_attributes'] = metadata_df['file_attributes'].apply(json.loads)

                    # make a new column for age
                    metadata_df['age'] = metadata_df['file_attributes'].apply(lambda x: x['age'] if 'age' in x else np.nan)

                    # make a new column for gender
                    metadata_df['gender'] = metadata_df['file_attributes'].apply(lambda x: x['gender'])

                for index, row in metadata_df.iterrows():
                    # get the filename
                    image_filename = row['filename']

                    # open the image
                    image_path = os.path.join(emotion_folder_path, image_filename)
                    img        = Image.open(image_path)

                    # convert the image into a numpy array
                    img_array = np.array(img)

                    # flatten the array to size 2304 (48x48)
                    img_array_flat = np.array(img_array.flatten())

                    # get gender
                    img_gender = metadata_df[metadata_df['filename'] == image_filename]['gender']

                    # get age
                    img_age = metadata_df[metadata_df['filename'] == image_filename]['age']

                    # add to dictionary (key: filename, value: dictionary with np array, path, age, gender, emotion)
                    images[f'{emotion_folder}/{image_filename}'] = {'np_array': img_array_flat,
                                                              'path': image_path,
                                                              'age': img_age,
                                                              'gender': img_gender,
                                                              'emotion': emotion_folder}

    return images

In [13]:

flip_png('data_w_metadata/train')

In [14]:
train_images = png_to_dict('data_w_metadata/train')
test_images = png_to_dict('data_w_metadata/test')

In [None]:
X_train = []
y_train = []
X_test = []
y_test = []

for key, value in train_images.items():
    X_train.append(value['np_array'])
    y_train.append(value['emotion'])

for key, value in test_images.items():
    X_test.append(value['np_array'])
    y_test.append(value['emotion'])

X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)

In [None]:
# normalize the features
scaler = sklearn.preprocessing.StandardScaler()

# fit the dataset to the scaler
scaler.fit(X_train)

# scale X and replace it with its original counterpart
X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)