In [2]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense,Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping,ReduceLROnPlateau

data_set_dir = 'D:/Tea Withering project/tea leaves'
batch_size = 64
img_size = (224, 224)
valid_image_extensions = ['.jpg', '.jpeg', '.png', '.bmp']


image_files = [f for f in os.listdir(data_set_dir) if f.lower().endswith(tuple(valid_image_extensions))]

if len(image_files) == 0:
    print("No valid image files found in the dataset directory.")
else:
    # Create DataFrame
    data = {
        'id': image_files,
        'label': np.random.randint(0, 2, len(image_files))  #Random labels for demonstration
    }
    df = pd.DataFrame(data)
    df['label'] = df['label'].astype(str)
    print(f"DataFrame created successfully with {len(df)} records!")

    train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)
    val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

    if len(train_df) == 0 or len(val_df) == 0 or len(test_df) == 0:
     raise ValueError("One of the train, validation, or test sets is empty. Check your dataset splitting.")
    
    val_test_datagen = ImageDataGenerator(rescale=1./255)

# Image Data Augmentation for Training
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=50,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    zoom_range=0.2,
    shear_range=0.2,
    brightness_range=[0.7,1.3]
)

train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=data_set_dir,
    x_col='id',
    y_col='label',
    target_size=img_size,
    class_mode='binary',
    batch_size=batch_size
)

validation_generator = datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=data_set_dir,
    x_col='id',
    y_col='label',
    target_size=img_size,
    class_mode='binary',
    batch_size=batch_size
)


test_generator = datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=data_set_dir,
    x_col='id',
    y_col='label',
    target_size=img_size,
    class_mode='binary',
    batch_size=batch_size,
    shuffle=False  # No need to shuffle test data
)

# Load VGG16 base model without the top layers
resnet50_base = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add custom classification layers
x = GlobalAveragePooling2D()(resnet50_base.output)
x = Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)


model = Model(inputs=resnet50_base.input, outputs=output)

# Freeze the layers of VGG16 to prevent them from being trained
for layer in resnet50_base.layers[-8:]:   #unfreeze the last 4 layers for the fine tuning       
    layer.trainable = True

model.compile(optimizer=Adam(learning_rate=1e-6), loss='binary_crossentropy', metrics=['accuracy'])

# Set up a checkpoint to save the best model during training and for regularozation for adjusting learinig rate dynamically
checkpoint = ModelCheckpoint('restnet50_best_model.keras', monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-7)


history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    validation_data=validation_generator,
    validation_steps=len(validation_generator),
    epochs=100,
    callbacks=[checkpoint,early_stopping]
)


test_loss, test_acc = model.evaluate(test_generator, steps=len(test_generator))
print(f'Test accuracy: {test_acc * 100:.2f}%')

























DataFrame created successfully with 384 records!
Found 268 validated image filenames belonging to 2 classes.
Found 58 validated image filenames belonging to 2 classes.
Found 58 validated image filenames belonging to 2 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 1us/step
Epoch 1/100


  self._warn_if_super_not_called()


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59s/step - accuracy: 0.5035 - loss: 12.3717  
Epoch 1: val_loss improved from inf to 12.33282, saving model to restnet50_best_model.keras
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 65s/step - accuracy: 0.5066 - loss: 12.3720 - val_accuracy: 0.4483 - val_loss: 12.3328
Epoch 2/100


  self.gen.throw(value)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/100


  self._save_model(epoch=epoch, batch=None, logs=logs)
  current = self.get_monitor_value(logs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31s/step - accuracy: 0.4647 - loss: 12.4805 
Epoch 3: val_loss did not improve from 12.33282
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 35s/step - accuracy: 0.4631 - loss: 12.4784 - val_accuracy: 0.4483 - val_loss: 12.3347
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 133ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35s/step - accuracy: 0.4682 - loss: 12.4345 
Epoch 5: val_loss did not improve from 12.33282
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 38s/step - accuracy: 0.4660 - loss: 12.4376 - val_accuracy: 0.4483 - val_loss: 12.3451
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 134ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39s/step - accuracy: 0.4749 - loss: 12.4436 
Epo