In [1]:
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split



In [2]:
# the values per image that belong to the given attributes for each class
attributes = np.load("feather-in-focus/attributes.npy")

# names of each bird class
labels = np.load("feather-in-focus/class_names.npy", allow_pickle=True).item()

# list of training data containing: image location, labels per image 
data = pd.read_csv("feather-in-focus/train_images.csv")


In [3]:
labels = np.load("feather-in-focus/class_names.npy", allow_pickle=True).item()
df = pd.read_csv("feather-in-focus/train_images.csv")
att_list = []
for i in range(len(df)):
    att_list.append(attributes[df['label'][i]-1])  
    
df['attributes'] = att_list
df

Unnamed: 0,image_path,label,attributes
0,/train_images/1.jpg,1,"[0.010638400403539122, 0.010638400403539122, 0..."
1,/train_images/2.jpg,1,"[0.010638400403539122, 0.010638400403539122, 0..."
2,/train_images/3.jpg,1,"[0.010638400403539122, 0.010638400403539122, 0..."
3,/train_images/4.jpg,1,"[0.010638400403539122, 0.010638400403539122, 0..."
4,/train_images/5.jpg,1,"[0.010638400403539122, 0.010638400403539122, 0..."
...,...,...,...
3921,/train_images/3922.jpg,200,"[0.04378018711713792, 0.02814440600394273, 0.0..."
3922,/train_images/3923.jpg,200,"[0.04378018711713792, 0.02814440600394273, 0.0..."
3923,/train_images/3924.jpg,200,"[0.04378018711713792, 0.02814440600394273, 0.0..."
3924,/train_images/3925.jpg,200,"[0.04378018711713792, 0.02814440600394273, 0.0..."


In [4]:
data['label'] = data['label'] - 1
df['label'] = df['label'] - 1

def load_and_preprocess_images(image_paths):
    images = []
    for img_path in image_paths:
        img = cv2.imread(f'feather-in-focus/train_images{img_path}' )
        img = cv2.resize(img, (224, 224))
        img = img / 255.0
        images.append(img)
    return images

# Load and preprocess images
train_images = load_and_preprocess_images(data['image_path'])

df['pre_image'] = train_images
df

Unnamed: 0,image_path,label,attributes,pre_image
0,/train_images/1.jpg,0,"[0.010638400403539122, 0.010638400403539122, 0...","[[[0.6078431372549019, 0.3843137254901961, 0.1..."
1,/train_images/2.jpg,0,"[0.010638400403539122, 0.010638400403539122, 0...","[[[0.6588235294117647, 0.6235294117647059, 0.5..."
2,/train_images/3.jpg,0,"[0.010638400403539122, 0.010638400403539122, 0...","[[[0.8274509803921568, 0.796078431372549, 0.79..."
3,/train_images/4.jpg,0,"[0.010638400403539122, 0.010638400403539122, 0...","[[[0.6352941176470588, 0.6196078431372549, 0.6..."
4,/train_images/5.jpg,0,"[0.010638400403539122, 0.010638400403539122, 0...","[[[0.7098039215686275, 0.7137254901960784, 0.6..."
...,...,...,...,...
3921,/train_images/3922.jpg,199,"[0.04378018711713792, 0.02814440600394273, 0.0...","[[[0.3568627450980392, 0.3568627450980392, 0.3..."
3922,/train_images/3923.jpg,199,"[0.04378018711713792, 0.02814440600394273, 0.0...","[[[0.3411764705882353, 0.36470588235294116, 0...."
3923,/train_images/3924.jpg,199,"[0.04378018711713792, 0.02814440600394273, 0.0...","[[[0.043137254901960784, 0.13725490196078433, ..."
3924,/train_images/3925.jpg,199,"[0.04378018711713792, 0.02814440600394273, 0.0...","[[[0.3137254901960784, 0.39215686274509803, 0...."


In [5]:
from keras.layers import Input, Conv2D, Flatten, Dense, Concatenate
from keras.models import Model

In [6]:
# Assuming you have loaded your data and attributes as train_df and attributes, respectively

# Load the InceptionV3 model with pre-trained weights
base_model = tf.keras.applications.InceptionV3(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
base_model.trainable = False

# Create a Sequential model
model = tf.keras.Sequential()
model.add(base_model)

# Flatten the output from InceptionV3
flattened_output = Flatten()(model.output)

# Assuming your attribute features have a shape of (312,)
attribute_input = Input(shape=(312,))
dense_attribute_output = Dense(128, activation='relu')(attribute_input)

# Concatenate InceptionV3 output with attribute features
concatenated_output = Concatenate()([flattened_output, dense_attribute_output])

# Output Dense layer
output_layer = Dense(200, activation="softmax", name="output-layer")(concatenated_output)

# Create the final model
model = Model(inputs=[model.input, attribute_input], outputs=output_layer)

# As we allow the model to be trainable in some layers,
# it will be better to decrease the learning rate in this region
# to avoid an exaggerated change in the model weights
base_learning_rate = 0.01 
adam_optimizer = tf.keras.optimizers.Adam(learning_rate=base_learning_rate)

# Compile the model
model.compile(loss="categorical_crossentropy", optimizer=adam_optimizer, metrics=["accuracy"])

# Fit the model
epochs = 10  # You can adjust the number of epochs as needed


model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 inception_v3_input (InputLayer  [(None, 224, 224, 3  0          []                               
 )                              )]                                                                
                                                                                                  
 inception_v3 (Functional)      (None, 5, 5, 2048)   21802784    ['inception_v3_input[0][0]']     
                                                                                                  
 input_2 (InputLayer)           [(None, 312)]        0           []                               
                                                                                                  
 flatten (Flatten)              (None, 51200)        0           ['inception_v3[0][0]']       

In [7]:
# Split your data into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Convert 'pre_image' column to numpy array
train_images = np.stack(train_df['pre_image'].to_numpy())
val_images = np.stack(val_df['pre_image'].to_numpy())

# Convert labels to categorical
train_labels = tf.keras.utils.to_categorical(train_df['label'])
val_labels = tf.keras.utils.to_categorical(val_df['label'])

# Convert 'attributes' column to numpy array
train_attributes = np.stack(train_df['attributes'].to_numpy())
val_attributes = np.stack(val_df['attributes'].to_numpy())

# Create datasets
batch_size = 64

train_dataset = tf.data.Dataset.from_tensor_slices(([train_images, train_attributes], train_labels))
train_dataset = train_dataset.shuffle(buffer_size=len(train_images)).batch(batch_size)

val_dataset = tf.data.Dataset.from_tensor_slices(([val_images, val_attributes], val_labels))
val_dataset = val_dataset.batch(batch_size)

ValueError: Can't convert non-rectangular Python sequence to Tensor.

In [None]:
model.fit(train_dataset, epochs=epochs, validation_data=val_dataset)

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 inception_v3_input (InputLayer  [(None, 224, 224, 3  0          []                               
 )                              )]                                                                
                                                                                                  
 inception_v3 (Functional)      (None, 5, 5, 2048)   21802784    ['inception_v3_input[0][0]']     
                                                                                                  
 input_6 (InputLayer)           [(None, 312)]        0           []                               
                                                                                                  
 flatten_2 (Flatten)            (None, 51200)        0           ['inception_v3[0][0]']     

In [None]:
from sklearn.model_selection import train_test_split

# Split your data into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Convert 'image' column to numpy array
train_images = np.stack(train_df['pre_image'].to_numpy())
val_images = np.stack(val_df['pre_image'].to_numpy())

# Convert labels to categorical
train_labels = tf.keras.utils.to_categorical(train_df['label'])
val_labels = tf.keras.utils.to_categorical(val_df['label'])

# Create datasets
batch_size = 16

train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
train_dataset = train_dataset.shuffle(buffer_size=len(train_images)).batch(batch_size)

val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))
val_dataset = val_dataset.batch(batch_size)

# Example: Check the shape of the datasets
for images, labels in train_dataset.take(1):
    print("Training batch shape:", images.shape, labels.shape)

for images, labels in val_dataset.take(1):
    print("Validation batch shape:", images.shape, labels.shape)

Training batch shape: (16, 224, 224, 3) (16, 198)
Validation batch shape: (16, 224, 224, 3) (16, 198)


In [None]:
#Setting the early_stop to avoid overfitting
early_stop = tf.keras.callbacks.EarlyStopping(
    patience=3,
    min_delta=0.001,
    restore_best_weights=True,)


history = model.fit(
    train_dataset,
    attributes,
    epochs=10,
    steps_per_epoch=len(train_dataset),
    validation_data=val_dataset,
    validation_steps=int(0.25 * len(val_dataset)),
    callbacks=[early_stop],
)

ValueError: `y` argument is not supported when using dataset as input.