In [13]:
import pandas as pd
import os
import re

In [14]:
def parse_filename(filename, directory):
    pattern = r"(\d+)_(\d+)_(\d+)_(\d{8}\d{9})\.jpg"
    match = re.match(pattern, filename)
    if match:
        return match.groups() + (os.path.join(directory, filename),)
    else:
        return None

In [15]:
directory = r'C:\Users\shint\Documents\Multitask Learning Research\part1'
filenames = [f for f in os.listdir(directory) if f.endswith('.jpg')]

In [16]:
filenames[0:5]

['100_1_0_20170110183726390.jpg',
 '100_1_2_20170105174847679.jpg',
 '100_1_2_20170110182836729.jpg',
 '101_1_2_20170105174739309.jpg',
 '10_0_0_20161220222308131.jpg']

In [17]:
data = [parse_filename(f, directory) for f in filenames]
data = [d for d in data if d is not None]  # Filter out None values

df = pd.DataFrame(data, columns=['Age', 'Gender', 'Race', 'DateTime', 'FilePath'])

In [18]:
df.head(5)

Unnamed: 0,Age,Gender,Race,DateTime,FilePath
0,100,1,0,20170110183726390,C:\Users\shint\Documents\Multitask Learning Re...
1,100,1,2,20170105174847679,C:\Users\shint\Documents\Multitask Learning Re...
2,100,1,2,20170110182836729,C:\Users\shint\Documents\Multitask Learning Re...
3,101,1,2,20170105174739309,C:\Users\shint\Documents\Multitask Learning Re...
4,10,0,0,20161220222308131,C:\Users\shint\Documents\Multitask Learning Re...


In [19]:
df['Age'] = df['Age'].astype(int)
df['Gender'] = df['Gender'].astype(int)
df['Race'] = df['Race'].astype(int)
df['DateTime'] = pd.to_datetime(df['DateTime'], format='%Y%m%d%H%M%S%f')

In [20]:
df.head(5)

Unnamed: 0,Age,Gender,Race,DateTime,FilePath
0,100,1,0,2017-01-10 18:37:26.390,C:\Users\shint\Documents\Multitask Learning Re...
1,100,1,2,2017-01-05 17:48:47.679,C:\Users\shint\Documents\Multitask Learning Re...
2,100,1,2,2017-01-10 18:28:36.729,C:\Users\shint\Documents\Multitask Learning Re...
3,101,1,2,2017-01-05 17:47:39.309,C:\Users\shint\Documents\Multitask Learning Re...
4,10,0,0,2016-12-20 22:23:08.131,C:\Users\shint\Documents\Multitask Learning Re...


In [21]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.preprocessing import LabelEncoder

In [22]:
# Split train and test data.
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

In [23]:
label_encoders = {
    'Gender': LabelEncoder(),
    'Race': LabelEncoder()
}

df['Gender'] = label_encoders['Gender'].fit_transform(df['Gender'])
df['Race'] = label_encoders['Race'].fit_transform(df['Race'])

In [24]:
def preprocess_image(file_path):
    image = load_img(file_path, target_size=IMG_SIZE)
    image = img_to_array(image)
    image = image / 255.0  # Normalize to [0, 1]
    return image

# Create TensorFlow dataset
def load_data(df):
    image_paths = df['FilePath'].values
    ages = df['Age'].values
    genders = df['Gender'].values
    races = df['Race'].values

    def generator():
        for img_path, age, gender, race in zip(image_paths, ages, genders, races):
            yield preprocess_image(img_path), (age, gender, race)

    dataset = tf.data.Dataset.from_generator(
        generator,
        output_types=(tf.float32, (tf.int32, tf.int32, tf.int32)),
        output_shapes=(tf.TensorShape(IMG_SIZE + (3,)), (tf.TensorShape([]), tf.TensorShape([]), tf.TensorShape([])))
    )
    return dataset

In [25]:
dataset = load_data(df)
dataset = dataset.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)