In [None]:
import os
import random
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Set the path to the directory containing the dataset
dataset_dir = '/content/main_data_croped'

# Read the CSV file
data = pd.read_csv(os.path.join(dataset_dir, '/content/drive/MyDrive/computer_vision/tfti2.csv'), usecols=["key", "class"])

# Convert the 'class' column to string
data['class'] = data['class'].astype(str)

# Filter the data to include only classes 1 and 2
data = data[data['class'].isin(['1', '2', '3'])]

# Split the data into training, validation, and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=0)
valid_data, test_data = train_test_split(test_data, test_size=0.5, random_state=0)

# Print the number of samples in each set
print('Number of train samples:', train_data.shape[0])
print('Number of valid samples:', valid_data.shape[0])
print('Number of test samples:', test_data.shape[0])

# Preprocess data
train_data["key"] = train_data["key"].apply(lambda x: x + ".jpg")
valid_data["key"] = valid_data["key"].apply(lambda x: x + ".jpg")
test_data["key"] = test_data["key"].apply(lambda x: x + ".jpg")

BATCH_SIZE = 16
HEIGHT = 224
WIDTH = 224
N_CLASSES = 3

# Create data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    shear_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory=dataset_dir,
    x_col="key",
    y_col="class",
    class_mode="categorical",
    batch_size=BATCH_SIZE,
    target_size=(HEIGHT, WIDTH),
    seed=0
)

valid_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    shear_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

valid_generator = valid_datagen.flow_from_dataframe(
    dataframe=valid_data,
    directory=dataset_dir,
    x_col="key",
    y_col="class",
    class_mode="categorical",
    batch_size=BATCH_SIZE,
    target_size=(HEIGHT, WIDTH),
    seed=0
)

test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_data,
    directory=dataset_dir,
    x_col="key",
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False,
    target_size=(HEIGHT, WIDTH),
    seed=0
)

# Load the VGG16 model
vgg_model = VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(HEIGHT, WIDTH, 3)
)

# Build the model architecture
input_tensor = Input(shape=(HEIGHT, WIDTH, 3))
x = vgg_model(input_tensor)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)  # Add dropout regularization
x = Dense(N_CLASSES, activation='softmax', kernel_regularizer=l2(0.01), name='output')(x)
model_vgg = Model(inputs=input_tensor, outputs=x)

# Compile the model
model_vgg.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Learning Rate Scheduling
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_lr=1e-7)

# Fine-tune the model
EPOCHS = 50
history_model1 = model_vgg.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=valid_generator,
    callbacks=[reduce_lr],
    verbose=2
)

# Evaluate the model on the test set
test_loss, test_acc = model_vgg.evaluate(
    test_generator,
    verbose=0
)

print('Test loss:', test_loss)
print('Test accuracy:', test_acc)