In [98]:
import os
import shutil
import warnings
import random as r
import numpy as np
import pandas as pd
import seaborn as sns

# Matplotlib Imports
import matplotlib  
import matplotlib.pyplot as plt
matplotlib.use(u'nbAgg')
%matplotlib inline

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.preprocessing import MultiLabelBinarizer



In [83]:
df_labels = pd.read_csv('class_information_english.csv')
df_labels

Unnamed: 0.1,Unnamed: 0,english_class
0,0,Scattered
1,1,One foot
2,2,Bucket
3,3,Right arm
4,4,bookcase
...,...,...
4370,4370,coin
4371,4371,study hard
4372,4372,stall
4373,4373,watch TV


In [84]:
class_labels = df_labels['english_class'].values.tolist()
class_labels
num_classes = len(class_labels)

In [85]:
num_classes

4375

In [86]:
df_images = pd.read_csv('train_label_converted.csv')
df_images

Unnamed: 0.1,Unnamed: 0,file_name,label
0,0,72dbecab-8471-454a-8b0d-ae54a8b9501d.jpg,"[808, 673, 45, 289]"
1,1,e13eaae3-c2f3-47a0-aebc-066d5a6737c9.jpg,"[1992, 284, 1423, 228, 3889, 798, 761, 2664]"
2,2,5b8be376-fbdc-41d4-87f4-eda8a1991529.jpg,"[3889, 1315, 2437, 1737, 761, 228, 289, 1923, ..."
3,3,cd624d25-7386-47a8-8b0d-388a877c03c7.jpg,"[2592, 3455, 4249, 284, 1591, 3923, 1549, 1626..."
4,4,89dd6787-420d-4c30-8d69-6031fb69e91a.jpg,"[4161, 2397, 289, 3565, 1186]"
...,...,...,...
14995,14995,9cc15d3a-4d10-478f-a932-a0cac124beb0.jpg,"[289, 3457, 106, 2651, 3923, 1217, 776, 1626, ..."
14996,14996,cee67e0a-50d6-4218-b386-08d9b5bd9efd.jpg,"[228, 669, 4057, 911, 2212, 3380, 939, 2689, 9..."
14997,14997,4e7ef64c-61d0-4c55-b304-39bf15247e93.jpg,"[3684, 228, 4249, 4189, 86, 1085, 1626, 284, 1..."
14998,14998,434534b4-26c9-4d7b-9797-143c9028d217.jpg,"[2625, 2424, 3365, 673, 4249, 562, 2049, 284, ..."


In [87]:
df_labels = df_labels.rename(columns={'Unnamed: 0': 'id'})
df_labels

Unnamed: 0,id,english_class
0,0,Scattered
1,1,One foot
2,2,Bucket
3,3,Right arm
4,4,bookcase
...,...,...
4370,4370,coin
4371,4371,study hard
4372,4372,stall
4373,4373,watch TV


In [88]:
rows_with_missing_bracket = df_images[~df_images['label'].str.endswith(']')]

print(rows_with_missing_bracket)

Empty DataFrame
Columns: [Unnamed: 0, file_name, label]
Index: []


In [89]:
import ast

df_images['label'] = df_images['label'].apply(lambda x: ast.literal_eval(x)) 

df_labels['id'] = df_labels['id'].astype(str)

df_images = df_images.explode('label')


df_images

Unnamed: 0.1,Unnamed: 0,file_name,label
0,0,72dbecab-8471-454a-8b0d-ae54a8b9501d.jpg,808
0,0,72dbecab-8471-454a-8b0d-ae54a8b9501d.jpg,673
0,0,72dbecab-8471-454a-8b0d-ae54a8b9501d.jpg,45
0,0,72dbecab-8471-454a-8b0d-ae54a8b9501d.jpg,289
1,1,e13eaae3-c2f3-47a0-aebc-066d5a6737c9.jpg,1992
...,...,...,...
14999,14999,b8c627f0-be11-45ea-8a01-abd89367472b.jpg,950
14999,14999,b8c627f0-be11-45ea-8a01-abd89367472b.jpg,673
14999,14999,b8c627f0-be11-45ea-8a01-abd89367472b.jpg,1315
14999,14999,b8c627f0-be11-45ea-8a01-abd89367472b.jpg,3538


In [90]:
print(df_images['label'].dtype)
print(df_labels['id'].dtype)

object
object


In [91]:
df_images['label'] = df_images['label'].astype(str).str.strip()
df_labels['id'] = df_labels['id'].str.strip()

merged_df = df_images.merge(df_labels, left_on='label', right_on='id')

string_labels = merged_df['english_class']

In [92]:
merged_df

Unnamed: 0.1,Unnamed: 0,file_name,label,id,english_class
0,0,72dbecab-8471-454a-8b0d-ae54a8b9501d.jpg,808,808,Station
1,23,8e53d520-26a6-484e-8e38-5c139536ad51.jpg,808,808,Station
2,112,67c30ae5-d178-483e-b880-bc936672c585.jpg,808,808,Station
3,182,e930f45e-32ed-4da0-989d-3b957fa8fd66.jpg,808,808,Station
4,201,87197506-6bd0-467c-b382-d75947c477c9.jpg,808,808,Station
...,...,...,...,...,...
167124,14978,7d43aa54-22f2-43cf-a019-d49419464e32.jpg,2676,2676,light pink
167125,14986,55f01360-aab2-4975-a37c-7bfd5f16c90a.jpg,4268,4268,butt
167126,14991,4f392d36-16d3-4264-abca-136ef33af859.jpg,3839,3839,Wrapped frame
167127,14997,4e7ef64c-61d0-4c55-b304-39bf15247e93.jpg,60,60,Upper shop


In [93]:
merged_df.set_index('file_name', inplace=True)

result_df = merged_df.groupby(merged_df.index)['english_class'].agg(list).reset_index()
result_df = result_df.sort_index().reset_index(drop=True)

result_df

Unnamed: 0,file_name,english_class
0,0000afa0-a150-497c-a381-b6611140cd51.jpg,"[woman, Wear, Holding hands, hand in hand, Sky..."
1,0003767f-f783-4c3a-840e-6e557f49b9a3.jpg,"[man, houses, the way, Men, ground, lift, One ..."
2,0003f764-7c70-4111-aa8c-d8b4825f1ccd.jpg,"[man, woman, Stand, hall, Lighter, Room, Ping ..."
3,00042a11-3d05-4867-94ad-b6a120a5ed67.jpg,"[stage, woman, Wear, skirt, walk, light, Hold,..."
4,0012fea3-6abc-4829-b6ae-29a6c7ab69d6.jpg,"[woman, Wear, child, In the house, Room, Chair..."
...,...,...
14995,fff1537e-f7a0-4447-b886-7f4b14197fb8.jpg,"[man, clothing, grassland, lawn, Green, Men, U..."
14996,fff578f3-dde9-419c-b9ce-53d29f9083ca.jpg,"[woman, Wear, Sit in, Put in, clothing, Left a..."
14997,fff8abae-a77f-45cf-9c0a-f8292c3a0fd1.jpg,"[woman, Wear, skirt, Hold, corridor, Room, Cro..."
14998,fffc0c1c-6ca8-4429-90bb-692a67863f05.jpg,"[stage, microphone, man, Performance, Sing, li..."


In [116]:
images_dir = "train"

result_df['english_class'] = result_df['english_class'].astype(str)
result_df['english_class'] = result_df['english_class'].apply(lambda x: x.strip())

mlb = MultiLabelBinarizer()
encoded_labels = mlb.fit_transform(result_df['english_class'].str.split(', '))

print(encoded_labels.shape)


train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2 
)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=result_df[:10500],
    directory=images_dir, 
    x_col="file_name",
    y_col="english_class",
    target_size=(224, 224),
    class_mode='raw',  
    subset='training'
)

validation_generator = train_datagen.flow_from_dataframe(
    dataframe=result_df[10500:12750],
    directory=images_dir, 
    x_col="file_name",
    y_col="english_class",
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw',  
    subset='validation'
)

test_datagen = ImageDataGenerator(rescale=1.0 / 255)  

test_generator = test_datagen.flow_from_dataframe(
    dataframe=result_df[12750:],
    directory=images_dir,  
    x_col="file_name",
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw',
    subset='training',
    shuffle=False
)

num_classes = encoded_labels.shape[1] 
print(num_classes)


base_model = keras.applications.InceptionV3(
    include_top=False, 
    weights='imagenet',  
    input_shape=(224, 224, 3),
)

base_model.trainable = False

model = keras.Sequential([
    base_model,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(num_classes, activation='sigmoid'),  
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy', 
    metrics=[
        'accuracy',
        tf.keras.metrics.TopKCategoricalAccuracy(k=5),  # Top-5 accuracy
    ],
)

model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10,  
    verbose=1,
)

test_results = model.evaluate(test_generator, verbose=1)


(15000, 5208)
Found 8400 validated image filenames.
Found 450 validated image filenames.


KeyError: 'class'