In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load the VGG16 model, excluding the top layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the layers in the base model to retain pretrained weights
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers for our specific task
x = base_model.output
x = Flatten()(x)  # Flatten the output from the convolutional layers
x = Dense(512, activation='relu')(x)  # Fully connected layer
x = Dense(128, activation='relu')(x)  # Additional fully connected layer
x = Dense(4, activation='softmax')(x)  # Output layer for 4 classes

# Create the model
model = Model(inputs=base_model.input, outputs=x)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model architecture
model.summary()



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [None]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
path = '/content/drive/MyDrive/plant-pathology-2020-fgvc7/'

train = pd.read_csv(path + 'train.csv')
test = pd.read_csv(path + 'test.csv')
submission = pd.read_csv(path + 'sample_submission.csv')

Mounted at /content/drive


In [None]:
train.iloc[33]

Unnamed: 0,33
image_id,Train_33
healthy,1
multiple_diseases,0
rust,0
scab,0


In [None]:
test

Unnamed: 0,image_id
0,Test_0
1,Test_1
2,Test_2
3,Test_3
4,Test_4
...,...
1816,Test_1816
1817,Test_1817
1818,Test_1818
1819,Test_1819


In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and validation sets
train_df, val_df = train_test_split(train, test_size=0.2, stratify=train[['healthy', 'multiple_diseases', 'rust', 'scab']], random_state=42)

In [None]:
import os
import shutil

# Create directories for training and validation
base_train_dir = '/content/drive/MyDrive/plant-pathology-2020-fgvc7/train'
base_val_dir = '/content/drive/MyDrive/plant-pathology-2020-fgvc7/validation'

for cls in ['healthy', 'multiple_diseases', 'rust', 'scab']:
    os.makedirs(os.path.join(base_val_dir, cls), exist_ok=True)


In [None]:
for cls in ['healthy', 'multiple_diseases', 'rust', 'scab']:
    os.makedirs(os.path.join(base_train_dir, cls), exist_ok=True)

In [None]:
img_dir = '/content/drive/MyDrive/plant-pathology-2020-fgvc7/images'

# Move images to corresponding directories for training set
for index, row in train_df.iterrows():
    img_id = row['image_id']
    img_path = f'{img_dir}/{img_id}.jpg'  # Adjust image format as necessary
    for cls in ['healthy', 'multiple_diseases', 'rust', 'scab']:
        if row[cls]:  # Assuming class columns contain binary indicators (1/0)
            shutil.copy(img_path, os.path.join(base_train_dir, cls))

# Move images to corresponding directories for validation set
for index, row in val_df.iterrows():
    img_id = row['image_id']
    img_path = f'{img_dir}/{img_id}.jpg'  # Adjust image format as necessary
    for cls in ['healthy', 'multiple_diseases', 'rust', 'scab']:
        if row[cls]:  # Assuming class columns contain binary indicators (1/0)
            shutil.copy(img_path, os.path.join(base_val_dir, cls))

In [None]:
# Set up data generators with batch size of 16
batch_size = 16
train_datagen = ImageDataGenerator(rescale=1.0/255, horizontal_flip=True, rotation_range=15)
validation_datagen = ImageDataGenerator(rescale=1.0/255)

train_generator = train_datagen.flow_from_directory(
    '/content/drive/MyDrive/plant-pathology-2020-fgvc7/train',
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical'
)
validation_generator = validation_datagen.flow_from_directory(
    '/content/drive/MyDrive/plant-pathology-2020-fgvc7/validation',  # Replace with the path to your validation data
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical'
)


Found 1456 images belonging to 4 classes.
Found 365 images belonging to 4 classes.


In [None]:
print(validation_generator.class_indices)


{'healthy': 0, 'multiple_diseases': 1, 'rust': 2, 'scab': 3}


In [None]:
# Train the model
history = model.fit(
    train_generator,
    epochs=30,
    validation_data=validation_generator
)

# Save the model
model.save("vgg16_transfer_learning_modelx.h5")
model.save("vgg16_transfer_learning_modelx")

Epoch 1/10


  self._warn_if_super_not_called()


[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1260s[0m 14s/step - accuracy: 0.4607 - loss: 1.1999 - val_accuracy: 0.6301 - val_loss: 0.9228
Epoch 2/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1296s[0m 14s/step - accuracy: 0.6319 - loss: 0.9281 - val_accuracy: 0.6685 - val_loss: 0.8945
Epoch 3/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1257s[0m 14s/step - accuracy: 0.6997 - loss: 0.7607 - val_accuracy: 0.6438 - val_loss: 0.9634
Epoch 4/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1249s[0m 14s/step - accuracy: 0.7417 - loss: 0.6536 - val_accuracy: 0.6767 - val_loss: 0.8309
Epoch 5/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1247s[0m 14s/step - accuracy: 0.7932 - loss: 0.5740 - val_accuracy: 0.6822 - val_loss: 0.9060
Epoch 6/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1282s[0m 14s/step - accuracy: 0.8254 - loss: 0.5229 - val_accuracy: 0.6795 - val_loss: 0.8718
Epoch 7/10
[1m91/91[0m [32m━━━

