In [7]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import re
from PIL import Image
from tensorflow.keras.utils import Sequence, to_categorical
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf

# Define image dimensions
IMG_HEIGHT = 128  # You can reduce this to 64 if you still face memory issues
IMG_WIDTH = 128
batch_size = 32  # Adjust as needed based on your system's memory

# Define the dataset directory
dataset_dir = 'UTKFace'  # Replace with your actual dataset directory

# Get list of image files
image_files = [f for f in os.listdir(dataset_dir) if f.endswith('.jpg')]

# Extract labels from filenames and create DataFrame
data = []

# Regular expression pattern to extract labels
pattern = r'^(\d+)_(\d+)_(\d+)_\d+\.jpg\.chip\.jpg$'
# Or use the optional .chip part
# pattern = r'^(\d+)_(\d+)_(\d+)_\d+\.jpg(?:\.chip)?\.jpg$'


data = []

for img_name in image_files:
    match = re.match(pattern, img_name)
    if match:
        age = int(match.group(1))
        gender = int(match.group(2))
        ethnicity = int(match.group(3))
        data.append({'img_name': img_name, 'age': age, 'gender': gender, 'ethnicity': ethnicity})
    else:
        print(f'Filename {img_name} did not match the pattern.')


# Create DataFrame
df = pd.DataFrame(data)

# Verify the DataFrame
print(df.head())
print(df.info())

# Extract labels
age_labels = df['age'].values
gender_labels = df['gender'].values
ethnicity_labels = df['ethnicity'].values

# One-Hot Encode Gender and Ethnicity Labels
gender_labels_cat = to_categorical(gender_labels, num_classes=2)
ethnicity_labels_cat = to_categorical(ethnicity_labels, num_classes=5)

# Verify the shapes of the one-hot encoded labels
print('gender_labels_cat shape:', gender_labels_cat.shape)
print('ethnicity_labels_cat shape:', ethnicity_labels_cat.shape)

# Split the data into training+validation and test sets

# First, split into training+validation and test sets
filenames = df['img_name'].values

filenames_temp, filenames_test, age_temp, age_test, gender_temp, gender_test, ethnicity_temp, ethnicity_test = train_test_split(
    filenames, age_labels, gender_labels_cat, ethnicity_labels_cat, test_size=0.15, random_state=42
)

filenames_train, filenames_val, age_train, age_val, ethnicity_train, ethnicity_val, gender_train, gender_val = train_test_split(
    filenames_temp, age_temp, ethnicity_temp, gender_temp, test_size=0.1765, random_state=42
)


# Verify the shapes after splitting
print('Number of training samples:', len(filenames_train))
print('Number of validation samples:', len(filenames_val))
print('Number of test samples:', len(filenames_test))

# Prepare labels as dictionaries
train_labels = {
    'age_output': age_train,
    'gender_output': gender_train,
    'ethnicity_output': ethnicity_train
}

val_labels = {
    'age_output': age_val,
    'gender_output': gender_val,
    'ethnicity_output': ethnicity_val
}

test_labels = {
    'age_output': age_test,
    'gender_output': gender_test,
    'ethnicity_output': ethnicity_test
}

# Create custom data generator
class DataGenerator(Sequence):
    def __init__(self, image_filenames, labels, batch_size, img_dir, img_height, img_width, shuffle=True):
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size
        self.img_dir = img_dir
        self.img_height = img_height
        self.img_width = img_width
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.image_filenames))
        self.on_epoch_end()
    
    def __len__(self):
        return int(np.ceil(len(self.image_filenames) / self.batch_size))
    
    def __getitem__(self, idx):
        # Generate indexes of the batch
        batch_indexes = self.indexes[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        # Find list of IDs
        batch_filenames = [self.image_filenames[k] for k in batch_indexes]
        
        # Generate data
        X, y = self.__data_generation(batch_filenames, batch_indexes)
        
        return X, y  # Return labels as a dictionary

    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)
    
    def __data_generation(self, batch_filenames, batch_indexes):
        X = []
        # Initialize label dictionaries for this batch
        y = {
            'age_output': self.labels['age_output'][batch_indexes],
            'gender_output': self.labels['gender_output'][batch_indexes],
            'ethnicity_output': self.labels['ethnicity_output'][batch_indexes]
        }
    
        
    
        # Rest of your code...

        
        for filename in batch_filenames:
            img_path = os.path.join(self.img_dir, filename)
            try:
                img = Image.open(img_path)
                img = img.convert('RGB')
                img = img.resize((self.img_width, self.img_height))
                img_array = img_to_array(img)
                img_array = img_array / 255.0
                X.append(img_array)
            except Exception as e:
                print(f'Error processing image {img_path}: {e}')
                # Append a zero array in case of error to keep batch size consistent
                X.append(np.zeros((self.img_height, self.img_width, 3)))
        
        X = np.array(X)
        return X, y

# Create generators
train_generator = DataGenerator(
    filenames_train,
    train_labels,
    batch_size=batch_size,
    img_dir=dataset_dir,
    img_height=IMG_HEIGHT,
    img_width=IMG_WIDTH
)

val_generator = DataGenerator(
    filenames_val,
    val_labels,
    batch_size=batch_size,
    img_dir=dataset_dir,
    img_height=IMG_HEIGHT,
    img_width=IMG_WIDTH
)

test_generator = DataGenerator(
    filenames_test,
    test_labels,
    batch_size=batch_size,
    img_dir=dataset_dir,
    img_height=IMG_HEIGHT,
    img_width=IMG_WIDTH,
    shuffle=False  # No need to shuffle test data
)

# Build the multi-task learning model
input_layer = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))

# Shared Convolutional Layers
x = Conv2D(32, (3, 3), activation='relu')(input_layer)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)

# Output Layers with Correct Names
# Use tf.identity to assign names to the output tensors
from tensorflow.keras.layers import Lambda

# Output Layers with Correct Names using Lambda
age_output = Lambda(lambda z: z, name='age_output')(Dense(1)(x))
gender_output = Lambda(lambda z: z, name='gender_output')(Dense(2, activation='softmax')(x))
ethnicity_output = Lambda(lambda z: z, name='ethnicity_output')(Dense(5, activation='softmax')(x))


outputs = {
    'age_output': age_output,
    'gender_output': gender_output,
    'ethnicity_output': ethnicity_output
}

model = Model(inputs=input_layer, outputs=outputs)


# Verify the model's output names
print('Model output names:', model.output_names)

# Compile the model
model.compile(
    optimizer='adam',
    loss={
        'age_output': 'mse',
        'gender_output': 'categorical_crossentropy',
        'ethnicity_output': 'categorical_crossentropy'
    },
    metrics={
        'age_output': 'mae',
        'gender_output': 'accuracy',
        'ethnicity_output': 'accuracy'
    }
)

# Set up callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

model_checkpoint = ModelCheckpoint(
    'best_multitask_model.keras',
    save_best_only=True,
    monitor='val_loss'
)

epochs = 50

# Train the model using generators
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=epochs,
    callbacks=[early_stopping, model_checkpoint]
)

# Evaluate the model on the test set
test_loss = model.evaluate(test_generator, verbose=2)

# Print the evaluation results
print('Test loss and metrics:', test_loss)


Filename 24_0_1_20170116220224657 .jpg.chip.jpg did not match the pattern.
Filename 39_1_20170116174525125.jpg.chip.jpg did not match the pattern.
Filename 55_0_0_20170116232725357jpg.chip.jpg did not match the pattern.
Filename 61_1_20170109142408075.jpg.chip.jpg did not match the pattern.
Filename 61_1_20170109150557335.jpg.chip.jpg did not match the pattern.
                                 img_name  age  gender  ethnicity
0  100_0_0_20170112213500903.jpg.chip.jpg  100       0          0
1  100_0_0_20170112215240346.jpg.chip.jpg  100       0          0
2  100_1_0_20170110183726390.jpg.chip.jpg  100       1          0
3  100_1_0_20170112213001988.jpg.chip.jpg  100       1          0
4  100_1_0_20170112213303693.jpg.chip.jpg  100       1          0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23703 entries, 0 to 23702
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   img_name   23703 non-null  object
 1   age       

  self._warn_if_super_not_called()


Epoch 1/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 331ms/step - age_output_loss: 440.5686 - age_output_mae: 15.8793 - ethnicity_output_accuracy: 0.2774 - ethnicity_output_loss: 3.3347 - gender_output_accuracy: 0.5361 - gender_output_loss: 1.6596 - loss: 445.5641 - val_age_output_loss: 199.2537 - val_age_output_mae: 11.0607 - val_ethnicity_output_accuracy: 0.4272 - val_ethnicity_output_loss: 1.4254 - val_gender_output_accuracy: 0.6746 - val_gender_output_loss: 0.6126 - val_loss: 200.5428
Epoch 2/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 226ms/step - age_output_loss: 208.3466 - age_output_mae: 10.9048 - ethnicity_output_accuracy: 0.4028 - ethnicity_output_loss: 1.4812 - gender_output_accuracy: 0.6337 - gender_output_loss: 0.6627 - loss: 210.6163 - val_age_output_loss: 136.5079 - val_age_output_mae: 8.8649 - val_ethnicity_output_accuracy: 0.4663 - val_ethnicity_output_loss: 1.3184 - val_gender_output_accuracy: 0.6884 - val_gender_ou

In [8]:
gender_counts = df['gender'].value_counts()
print("Gender Distribution:")
print(gender_counts)


Gender Distribution:
gender
0    12389
1    11314
Name: count, dtype: int64


In [9]:
ethnicity_counts = df['ethnicity'].value_counts()
print("\nEthnicity Distribution:")
print(ethnicity_counts)



Ethnicity Distribution:
ethnicity
0    10077
1     4525
3     3975
2     3434
4     1692
Name: count, dtype: int64


In [10]:
from sklearn.metrics import classification_report

# Get true and predicted labels for gender
y_true_gender = []
y_pred_gender = []

for i in range(len(test_generator)):
    X_batch, y_batch = test_generator[i]
    y_true_gender.extend(np.argmax(y_batch['gender_output'], axis=1))
    predictions = model.predict(X_batch)
    y_pred_gender.extend(np.argmax(predictions[1], axis=1))  # Assuming gender_output is the second output

print(classification_report(y_true_gender, y_pred_gender, target_names=['Male', 'Female']))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step


KeyError: 1

In [15]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import re
from PIL import Image
from tensorflow.keras.utils import Sequence, to_categorical
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split
from sklearn.utils import resample, class_weight
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf

# Define image dimensions
IMG_HEIGHT = 128  # Adjust as needed
IMG_WIDTH = 128
batch_size = 32  # Adjust as needed based on your system's memory

# Define the dataset directory
dataset_dir = 'UTKFace'  # Replace with your actual dataset directory

# Get list of image files
image_files = [f for f in os.listdir(dataset_dir) if f.endswith('.jpg')]

# Extract labels from filenames and create DataFrame
data = []

# Regular expression pattern to extract labels
pattern = r'^(\d+)_(\d+)_(\d+)_\d+\.jpg(?:\.chip)?\.jpg$'

for img_name in image_files:
    match = re.match(pattern, img_name)
    if match:
        age = int(match.group(1))
        gender = int(match.group(2))
        ethnicity = int(match.group(3))
        data.append({'img_name': img_name, 'age': age, 'gender': gender, 'ethnicity': ethnicity})
    else:
        print(f'Filename {img_name} did not match the pattern.')

# Create DataFrame
df = pd.DataFrame(data)

# Verify the DataFrame
print(df.head())
print(df.info())

# Analyze class distribution for ethnicity
print("\nEthnicity Distribution Before Balancing:")
print(df['ethnicity'].value_counts())

# Extract labels
age_labels = df['age'].values
gender_labels = df['gender'].values
ethnicity_labels = df['ethnicity'].values

# One-Hot Encode Gender and Ethnicity Labels
gender_labels_cat = to_categorical(gender_labels, num_classes=2)
ethnicity_labels_cat = to_categorical(ethnicity_labels, num_classes=5)

# Create a balanced dataset for ethnicity by oversampling minority classes
# Separate majority and minority classes
df_majority = df[df['ethnicity'] == 0]
df_minority_1 = df[df['ethnicity'] == 1]
df_minority_2 = df[df['ethnicity'] == 2]
df_minority_3 = df[df['ethnicity'] == 3]
df_minority_4 = df[df['ethnicity'] == 4]

# Find the maximum class count
max_count = df['ethnicity'].value_counts().max()

# Oversample minority classes
df_minority_1_upsampled = resample(
    df_minority_1,
    replace=True,
    n_samples=max_count,
    random_state=42
)

df_minority_2_upsampled = resample(
    df_minority_2,
    replace=True,
    n_samples=max_count,
    random_state=42
)

df_minority_3_upsampled = resample(
    df_minority_3,
    replace=True,
    n_samples=max_count,
    random_state=42
)

df_minority_4_upsampled = resample(
    df_minority_4,
    replace=True,
    n_samples=max_count,
    random_state=42
)

# Combine majority class with upsampled minority classes
df_balanced = pd.concat([
    df_majority,
    df_minority_1_upsampled,
    df_minority_2_upsampled,
    df_minority_3_upsampled,
    df_minority_4_upsampled
])

# Shuffle the dataset
df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

# Verify the new class distribution
print("\nEthnicity Distribution After Balancing:")
print(df_balanced['ethnicity'].value_counts())

# Update labels after balancing
age_labels_balanced = df_balanced['age'].values
gender_labels_balanced = df_balanced['gender'].values
ethnicity_labels_balanced = df_balanced['ethnicity'].values

# One-Hot Encode Gender and Ethnicity Labels
gender_labels_balanced_cat = to_categorical(gender_labels_balanced, num_classes=2)
ethnicity_labels_balanced_cat = to_categorical(ethnicity_labels_balanced, num_classes=5)

# Extract filenames
filenames_balanced = df_balanced['img_name'].values

# Split the data into training+validation and test sets

# First, split into training+validation and test sets
filenames_temp, filenames_test, age_temp, age_test, gender_temp, gender_test, ethnicity_temp, ethnicity_test = train_test_split(
    filenames_balanced, age_labels_balanced, gender_labels_balanced_cat, ethnicity_labels_balanced_cat, test_size=0.15, random_state=42
)

# Then, split training+validation set into training and validation sets
filenames_train, filenames_val, age_train, age_val, gender_train, gender_val, ethnicity_train, ethnicity_val = train_test_split(
    filenames_temp, age_temp, gender_temp, ethnicity_temp, test_size=0.1765, random_state=42
)

# Verify the shapes after splitting
print('\nNumber of training samples:', len(filenames_train))
print('Number of validation samples:', len(filenames_val))
print('Number of test samples:', len(filenames_test))

# Prepare labels as dictionaries
train_labels = {
    'age_output': age_train,
    'gender_output': gender_train,
    'ethnicity_output': ethnicity_train
}

val_labels = {
    'age_output': age_val,
    'gender_output': gender_val,
    'ethnicity_output': ethnicity_val
}

test_labels = {
    'age_output': age_test,
    'gender_output': gender_test,
    'ethnicity_output': ethnicity_test
}

# Create custom data generator
class DataGenerator(Sequence):
    def __init__(self, image_filenames, labels, batch_size, img_dir, img_height, img_width,
                 shuffle=True, augment=False, return_sample_weights=False, **kwargs):
        super().__init__(**kwargs)
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size
        self.img_dir = img_dir
        self.img_height = img_height
        self.img_width = img_width
        self.shuffle = shuffle
        self.augment = augment
        self.return_sample_weights = return_sample_weights
        self.indexes = np.arange(len(self.image_filenames))
        self.on_epoch_end()
        
        # Define augmentation parameters if augment is True
        if self.augment:
            self.datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=10,
                width_shift_range=0.1,
                height_shift_range=0.1,
                horizontal_flip=True
            )
        else:
            self.datagen = None

    
    def __len__(self):
        return int(np.ceil(len(self.image_filenames) / self.batch_size))
    
    def __getitem__(self, idx):
        # Generate indexes of the batch
        batch_indexes = self.indexes[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        # Find list of IDs
        batch_filenames = [self.image_filenames[k] for k in batch_indexes]
        
        # Generate data
        X, y, sample_weights = self.__data_generation(batch_filenames, batch_indexes)
        
        if self.return_sample_weights:
            return X, y, sample_weights  # Return sample weights
        else:
            return X, y

    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)
    
    def __data_generation(self, batch_filenames, batch_indexes):
        X = []
        y = {
            'age_output': self.labels['age_output'][batch_indexes],
            'gender_output': self.labels['gender_output'][batch_indexes],
            'ethnicity_output': self.labels['ethnicity_output'][batch_indexes]
        }
        
        # Initialize sample weights
        sample_weights = {
            'age_output': np.ones(len(batch_indexes)),
            'gender_output': np.ones(len(batch_indexes)),
            'ethnicity_output': np.ones(len(batch_indexes))
        }
        
        # Compute sample weights for ethnicity_output
        for i in range(len(batch_indexes)):
            # Get the class index for ethnicity (one-hot to class index)
            ethnicity_class_idx = np.argmax(y['ethnicity_output'][i])
            sample_weights['ethnicity_output'][i] = ethnicity_class_weights[ethnicity_class_idx]
        
        # Load and preprocess images
        for i, filename in enumerate(batch_filenames):
            img_path = os.path.join(self.img_dir, filename)
            try:
                img = Image.open(img_path)
                img = img.convert('RGB')
                img = img.resize((self.img_width, self.img_height))
                img_array = img_to_array(img)
                img_array = img_array / 255.0
                
                # Apply augmentation if enabled
                if self.augment and self.datagen is not None:
                    img_array = self.datagen.random_transform(img_array)
                
                X.append(img_array)
            except Exception as e:
                print(f'Error processing image {img_path}: {e}')
                X.append(np.zeros((self.img_height, self.img_width, 3)))
        
        X = np.array(X)
        return X, y, sample_weights


# Create generators
train_generator = DataGenerator(
    filenames_train,
    train_labels,
    batch_size=batch_size,
    img_dir=dataset_dir,
    img_height=IMG_HEIGHT,
    img_width=IMG_WIDTH,
    augment=True,  # Enable augmentation for training data
    return_sample_weights=True  # Return sample weights
)


val_generator = DataGenerator(
    filenames_val,
    val_labels,
    batch_size=batch_size,
    img_dir=dataset_dir,
    img_height=IMG_HEIGHT,
    img_width=IMG_WIDTH
    # return_sample_weights=False (default)
)

test_generator = DataGenerator(
    filenames_test,
    test_labels,
    batch_size=batch_size,
    img_dir=dataset_dir,
    img_height=IMG_HEIGHT,
    img_width=IMG_WIDTH,
    shuffle=False  # No need to shuffle test data
    # return_sample_weights=False (default)
)



# Build the multi-task learning model
input_layer = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))

# Shared Convolutional Layers
x = Conv2D(32, (3, 3), activation='relu')(input_layer)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)

# Output Layers with Correct Names in Dense layers
age_output = Dense(1, name='age_output')(x)
gender_output = Dense(2, activation='softmax', name='gender_output')(x)
ethnicity_output = Dense(5, activation='softmax', name='ethnicity_output')(x)

# Create the Model
outputs = {
    'age_output': age_output,
    'gender_output': gender_output,
    'ethnicity_output': ethnicity_output
}

model = Model(inputs=input_layer, outputs=outputs)

# Verify the model's output names
print('\nModel output names:', model.output_names)

# Compute class weights for ethnicity
import numpy as np
ethnicity_classes = np.unique(df_balanced['ethnicity'])
ethnicity_class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=ethnicity_classes,
    y=df_balanced['ethnicity']
)
ethnicity_class_weights = dict(enumerate(ethnicity_class_weights))

print('\nEthnicity class weights:', ethnicity_class_weights)

# Compile the model
model.compile(
    optimizer='adam',
    loss={
        'age_output': 'mse',
        'gender_output': 'categorical_crossentropy',
        'ethnicity_output': 'categorical_crossentropy'
    },
    metrics={
        'age_output': 'mae',
        'gender_output': 'accuracy',
        'ethnicity_output': 'accuracy'
    }
)

# Set up callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

model_checkpoint = ModelCheckpoint(
    'best_multitask_model.keras',
    save_best_only=True,
    monitor='val_loss'
)

epochs = 50

# Train the model using generators
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=epochs,
    callbacks=[early_stopping, model_checkpoint]
    # Removed class_weight parameter
)


# Evaluate the model on the test set
test_loss = model.evaluate(test_generator, verbose=2)

# Print the evaluation results
print('\nTest loss and metrics:', test_loss)

# Save the model
model.save('multitask_model.h5')


Filename 24_0_1_20170116220224657 .jpg.chip.jpg did not match the pattern.
Filename 39_1_20170116174525125.jpg.chip.jpg did not match the pattern.
Filename 55_0_0_20170116232725357jpg.chip.jpg did not match the pattern.
Filename 61_1_20170109142408075.jpg.chip.jpg did not match the pattern.
Filename 61_1_20170109150557335.jpg.chip.jpg did not match the pattern.
                                 img_name  age  gender  ethnicity
0  100_0_0_20170112213500903.jpg.chip.jpg  100       0          0
1  100_0_0_20170112215240346.jpg.chip.jpg  100       0          0
2  100_1_0_20170110183726390.jpg.chip.jpg  100       1          0
3  100_1_0_20170112213001988.jpg.chip.jpg  100       1          0
4  100_1_0_20170112213303693.jpg.chip.jpg  100       1          0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23703 entries, 0 to 23702
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   img_name   23703 non-null  object
 1   age       

ValueError: I/O operation on closed file.