In [35]:
import pandas as pd
import numpy as np
import os
import cv2
from  matplotlib import pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import InceptionV3
from pathlib import Path
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

In [2]:
## Relative paths
path_to_train_images = "/Users/stathis/Desktop/feather-in-focus/train_images/train_images"
path_to_resized_images = "/Users/stathis/Desktop/feather-in-focus/train_images_resized"
path_to_test_images = "/Users/stathis/Desktop/feather-in-focus/test_images/test_images"
path_to_resized_images_test = "/Users/stathis/Desktop/feather-in-focus/test_images_resized/test_images"

### 1. Load Data 

In [43]:
# Import data
class_names_dict = np.load("class_names.npy", allow_pickle=True).item()
attributes_npy = np.load("attributes.npy", allow_pickle=True)
attributes_df = pd.read_csv("attributes.txt", header=None, index_col = 0, sep=" |::", names=("attribute", "value"), engine='python')
training_df = pd.read_csv("train_images.csv", header = 0)
augmented_df = pd.read_csv("augmented_labels/augmented_labels.csv", header = 0)
test_images_paths = pd.read_csv("test_images_path.csv", header = 0)
test_images_sample = pd.read_csv("test_images_sample.csv", header = 0)


In [44]:
training_df['image_path'] = "train_images"+training_df['image_path']


In [55]:
training_df['label'] = training_df['label'].tolist()

In [56]:
training_df

Unnamed: 0,image_path,label
0,train_images/train_images/1.jpg,1
1,train_images/train_images/2.jpg,1
2,train_images/train_images/3.jpg,1
3,train_images/train_images/4.jpg,1
4,train_images/train_images/5.jpg,1
...,...,...
3921,train_images/train_images/3922.jpg,200
3922,train_images/train_images/3923.jpg,200
3923,train_images/train_images/3924.jpg,200
3924,train_images/train_images/3925.jpg,200


In [46]:

datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Define batch size
batch_size = 32

# Train data generator
train_generator = datagen.flow_from_dataframe(
    dataframe=training_df,
    x_col='image_path',
    y_col='label',
    subset='training',
    batch_size=batch_size,
    seed=42,
    shuffle=True,
    class_mode='categorical',  # Use 'binary' for binary classification
    target_size=(224, 224)  # Adjust the target size based on your model's input size
)

# Validation data generator
valid_generator = datagen.flow_from_dataframe(
    dataframe=training_df,
    x_col='image_path',
    y_col='label',
    subset='validation',
    batch_size=batch_size,
    seed=42,
    shuffle=False,
    class_mode='categorical',
    target_size=(224, 224)
)


TypeError: If class_mode="categorical", y_col="label" column values must be type string, list or tuple.

In [28]:
# Get a batch of data and labels
data_batch, labels_batch = next(train_generator)

# Print the shapes of the batch
print("Data batch shape:", data_batch.shape)
print("Labels batch shape:", labels_batch.shape)

# Optionally, print the actual data and labels
print("Data batch:", data_batch)
print("Labels batch:", labels_batch)

Data batch shape: (32, 224, 224, 3)
Labels batch shape: (32, 200)
Data batch: [[[[0.02745098 0.03137255 0.03921569]
   [0.02745098 0.03137255 0.03921569]
   [0.02745098 0.03137255 0.03921569]
   ...
   [0.02745098 0.03921569 0.05882353]
   [0.02745098 0.03921569 0.05882353]
   [0.02745098 0.03921569 0.05882353]]

  [[0.02745098 0.03137255 0.03921569]
   [0.02745098 0.03137255 0.03921569]
   [0.02745098 0.03137255 0.03921569]
   ...
   [0.02745098 0.03921569 0.05882353]
   [0.02745098 0.03921569 0.05882353]
   [0.02745098 0.03921569 0.05882353]]

  [[0.02745098 0.03137255 0.03921569]
   [0.02745098 0.03137255 0.03921569]
   [0.02745098 0.03137255 0.03921569]
   ...
   [0.02745098 0.03921569 0.05882353]
   [0.02745098 0.03921569 0.05882353]
   [0.02745098 0.03921569 0.05882353]]

  ...

  [[0.         0.01960784 0.        ]
   [0.         0.01960784 0.        ]
   [0.         0.01960784 0.        ]
   ...
   [0.00392157 0.00392157 0.01176471]
   [0.01176471 0.01176471 0.01176471]
   [0.0

In [24]:
#--------Base Model

model = models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))

# Flatten layer
model.add(layers.Flatten())

# Dense layers
model.add(layers.Dense(64, activation='relu'))

## Fix me -> the output layer should be 200 but the labels should be 0-199

# Output layer
model.add(layers.Dense(200, activation='softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 111, 111, 32)     0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 54, 54, 64)       0         
 2D)                                                             
                                                                 
 conv2d_6 (Conv2D)           (None, 52, 52, 64)        36928     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 26, 26, 64)      

In [36]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
train_generator.classes = to_categorical(train_generator.classes, num_classes=200)
history = model.fit(train_generator, epochs=15, validation_data=valid_generator)

IndexError: arrays used as indices must be of integer (or boolean) type