In [1]:
import os, sys
import numpy as np
import pandas as pd
from tqdm import tqdm
import csv, re

In [2]:
def read_txt_files(path="datasets/training"):
    datasets = []
    flist = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(".txt"):
                flist.append(os.path.join(root, file))
    meta_file_path = os.path.join(path,'imfdb_meta.csv')
    with open(meta_file_path, mode='w') as csv_file:
        fieldnames = ['path','gender','emotion','obstruction','illumination','orientation']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for f in tqdm(flist):
            with open(f) as txt:
                x = list(filter(None, txt.read().split('\n')))
                for xs in x:
                    line = np.array(re.split("\t|\s",xs))
                    if len(line)>16 :
                        file_path = "/".join(f.split('/')[:-1]) + "/images/" + line[2]
                        row_dict = {fieldnames[0]:file_path,fieldnames[1]:line[10],fieldnames[2]:line[11],fieldnames[3]:line[12],fieldnames[4]:line[13],fieldnames[5]:line[15]}
                        writer.writerow(row_dict)


    return meta_file_path

meta_data_path = read_txt_files(path = "datasets/IMFDB_final")

100%|██████████| 399/399 [00:00<00:00, 605.92it/s]


In [31]:
df = pd.read_csv(meta_data_path)

In [32]:
df.describe()

Unnamed: 0,path,gender,emotion,obstruction,illumination,orientation
count,31292,30995,31268,31289,31290,31292
unique,30215,9,11,17,14,10
top,datasets/IMFDB_final/KatrinaKaif/ZindagiNaMile...,MALE,NEUTRAL,NONE,MEDIUM,FRONTAL
freq,235,20986,9588,21111,19836,11971


In [33]:
print(df.gender.unique(),df.emotion.unique(),df.obstruction.unique(),df.illumination.unique(),df.orientation.unique())

['FEMALE' 'MALE' 'SalmanKhan' nan 'Kavya_Madhavan' '174' 'Bharathi'
 'Prakash' 'KareenaKapoor' '2001'] ['ANGER' 'NEUTRAL' 'SURPRISE' 'HAPPINESS' 'SADNESS' 'FEAR' 'DISGUST'
 'MALE' nan 'FEMALE' 'Cheli' 'KareenaKapoor'] ['NONE' 'GLASSES' 'HAND' 'HAIR' 'OTHERS' 'DISGUST' 'MALE' 'HAPPINESS'
 'NEUTRAL' 'ANGER' 'BEARD' 'ORNAMENTS' 'SADNESS' 'SURPRISE' nan '2001'
 'FEAR' 'FEMALE'] ['MEDIUM' 'BAD' 'HIGH' 'NONE' 'NEUTRAL' 'HAND' 'OTHERS' 'GLASSES' 'ANGER'
 'DISGUST' 'HAPPINESS' 'ORNAMENTS' nan 'Madhavan' 'HAIR'] ['FRONTAL' 'UP' 'RIGHT' 'LEFT' 'DOWN' 'YOUNG' 'MEDIUM' 'ANGER' 'OLD'
 'MIDDLE']


In [34]:
df.groupby('gender')['path'].nunique()

gender
174                   1
2001                  1
Bharathi            225
FEMALE             9114
KareenaKapoor       191
Kavya_Madhavan       63
MALE              20383
Prakash              17
SalmanKhan           25
Name: path, dtype: int64

In [35]:
df.groupby('emotion')['path'].nunique()

emotion
ANGER            2697
Cheli               1
DISGUST          3736
FEAR              610
FEMALE            480
HAPPINESS        7846
KareenaKapoor       1
MALE              215
NEUTRAL          9417
SADNESS          3583
SURPRISE         1690
Name: path, dtype: int64

In [77]:
for col in ['emotion','gender','obstruction']:
    threshold = 500
    value_counts = df[col].value_counts()
    to_remove = value_counts[value_counts <= threshold].index
    df[col].replace(to_remove, None, inplace=True)

value_counts = df.path.value_counts()
to_remove = value_counts[value_counts >10].index
df.path.replace(to_remove, np.nan, inplace=True)

In [78]:
print(df.gender.unique(),df.emotion.unique(),df.obstruction.unique(),df.illumination.unique(),df.orientation.unique())

['FEMALE' 'MALE' nan] ['ANGER' 'NEUTRAL' 'SURPRISE' 'HAPPINESS' 'SADNESS' 'FEAR' 'DISGUST' nan] ['NONE' 'GLASSES' 'HAND' 'HAIR' 'OTHERS' 'BEARD' nan] ['MEDIUM' 'BAD' 'HIGH' 'NONE' 'NEUTRAL' 'HAND' 'OTHERS' 'GLASSES' 'ANGER'
 'DISGUST' 'HAPPINESS' 'ORNAMENTS' nan 'Madhavan' 'HAIR'] ['FRONTAL' 'UP' 'RIGHT' 'LEFT' 'DOWN' 'YOUNG' 'MEDIUM' 'ANGER' 'OLD'
 'MIDDLE']


In [86]:
clean_df = df.dropna()

In [87]:
print(clean_df.gender.unique(),clean_df.emotion.unique(),clean_df.obstruction.unique(),clean_df.illumination.unique(),clean_df.orientation.unique())

['FEMALE' 'MALE'] ['ANGER' 'NEUTRAL' 'SURPRISE' 'HAPPINESS' 'SADNESS' 'FEAR' 'DISGUST'] ['NONE' 'GLASSES' 'HAND' 'HAIR' 'OTHERS' 'BEARD'] ['MEDIUM' 'BAD' 'HIGH'] ['FRONTAL' 'UP' 'RIGHT' 'LEFT' 'DOWN']


In [88]:
clean_df.describe()

Unnamed: 0,path,gender,emotion,obstruction,illumination,orientation
count,29921,29921,29921,29921,29921,29921
unique,29467,2,7,6,3,5
top,datasets/IMFDB_final/Balakrishna/odahuttidavar...,MALE,NEUTRAL,NONE,MEDIUM,FRONTAL
freq,10,20721,9528,21104,19400,11711


In [89]:
len(clean_df.path.str.contains('.jpg'))

29921

In [90]:
convert_dict = {}
for col in clean_df.columns[1:]:
    print(col)
    convert_dict[col] = []
    for i,v in enumerate(clean_df[col].unique()):
        convert_dict[col].append((i,v))
        clean_df[col].replace(v, i, inplace=True)
print(convert_dict)

gender
emotion
obstruction
illumination
orientation
{'gender': [(0, 'FEMALE'), (1, 'MALE')], 'emotion': [(0, 'ANGER'), (1, 'NEUTRAL'), (2, 'SURPRISE'), (3, 'HAPPINESS'), (4, 'SADNESS'), (5, 'FEAR'), (6, 'DISGUST')], 'obstruction': [(0, 'NONE'), (1, 'GLASSES'), (2, 'HAND'), (3, 'HAIR'), (4, 'OTHERS'), (5, 'BEARD')], 'illumination': [(0, 'MEDIUM'), (1, 'BAD'), (2, 'HIGH')], 'orientation': [(0, 'FRONTAL'), (1, 'UP'), (2, 'RIGHT'), (3, 'LEFT'), (4, 'DOWN')]}


In [93]:
clean_df.describe()

Unnamed: 0,gender,emotion,obstruction,illumination,orientation
count,29921.0,29921.0,29921.0,29921.0,29921.0
mean,0.692524,2.572307,0.652451,0.404064,1.578156
std,0.461456,1.818648,1.252495,0.587949,1.398475
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,1.0,0.0,0.0,0.0
50%,1.0,3.0,0.0,0.0,2.0
75%,1.0,4.0,1.0,1.0,3.0
max,1.0,6.0,5.0,2.0,4.0


In [None]:
import os, sys, time
import tensorflow as tf
import numpy as np
from tqdm import tqdm

tf.keras.






























