In [None]:
!pip install tensorflow
!pip install scikit-learn
!pip install torch torchvision
!pip install torch-geometric
!pip install optuna
!pip install scikit-learn
!pip install torch torchvision torchaudio
!pip install torch-geometric
!pip install timm
# # or for huggingface transformers if you'd like to use that:
!pip install transformers
!pip install scikit-learn
!pip install matplotlib opencv-python
!pip install tensorflow
!pip install keras-tuner

In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("sivm205/soybean-diseased-leaf-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/soybean-diseased-leaf-dataset


In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Subset

from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# Import PyTorch Geometric modules
from torch_geometric.nn import GCNConv, global_mean_pool

import optuna


In [19]:
import tensorflow as tf
import tensorflow.keras.layers as layers
import numpy as np
import networkx as nx
import scipy.sparse as sp
import os
import cv2
from tensorflow.keras.applications import MobileNetV2
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import optuna
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Dataset Directory
dataset_dir = "/kaggle/input/soybean-diseased-leaf-dataset"

# Load dataset images and labels
def load_dataset(dataset_dir, img_size=(224, 224)):
    images = []
    labels = []
    class_names = sorted(os.listdir(dataset_dir))
    class_dict = {class_name: idx for idx, class_name in enumerate(class_names)}
    
    for class_name in class_names:
        class_path = os.path.join(dataset_dir, class_name)
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            img = cv2.imread(img_path)
            img = cv2.resize(img, img_size)
            img = img / 255.0  # Normalize image
            images.append(img)
            labels.append(class_dict[class_name])
    
    return np.array(images), np.array(labels)

# Load actual dataset
images, labels = load_dataset(dataset_dir)

# Data Preprocessing & Augmentation
def preprocess_data(X, y):
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        rescale=1./255
    )
    return datagen.flow(X, y, batch_size=32)

# Load MobileNetV2 as feature extractor
def create_cnn(input_shape):
    base_model = MobileNetV2(input_shape=input_shape, include_top=False, weights='imagenet')
    base_model.trainable = False
    inputs = tf.keras.Input(shape=input_shape)
    x = base_model(inputs)
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(128, activation='relu')(x)
    model = tf.keras.Model(inputs, outputs)
    return model

# Extract features from images using MobileNetV2
def extract_features(images):
    cnn_model = create_cnn((224, 224, 3))
    batch_size = 16  # Reduce batch size to prevent memory overflow
    return cnn_model.predict(images, batch_size=batch_size)

# Construct graph using cosine similarity of image features
def create_graph(image_features, threshold=0.8):
    similarity_matrix = cosine_similarity(image_features)
    adj_matrix = (similarity_matrix > threshold).astype(int)
    return sp.coo_matrix(adj_matrix)

# Graph Convolutional Network (GCN) Layer
def create_gcn(adj_matrix, features):
    input_features = tf.keras.Input(shape=(features.shape[1],))
    x = layers.Dense(64, activation='relu')(input_features)
    x = layers.Dense(32, activation='relu')(x)
    outputs = layers.Dense(10, activation='softmax')(x)  # Output layer for classification
    return tf.keras.Model(inputs=input_features, outputs=outputs)

# Sequential CNN-GCN Model
def create_sequential_cnn_gnn(input_shape, adj_matrix):
    cnn_model = create_cnn(input_shape)
      # Free up GPU memory
    cnn_output = cnn_model.output  # CNN extracts features first
    
    # Generate adjacency matrix and pass features to GCN
    
    gcn_model = create_gcn(adj_matrix, node_features)
    
    model = tf.keras.Model(inputs=cnn_model.input, outputs=gcn_model(cnn_output))
    return model

# Extract features and create graph
node_features = extract_features(images)
adj_matrix = create_graph(node_features)

# Train-Test Split
train_data, test_data, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)

# Create and compile the model
sequential_model = create_sequential_cnn_gnn((224, 224, 3), adj_matrix)
sequential_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
sequential_model.fit(train_data, tf.keras.utils.to_categorical(train_labels, num_classes=10), epochs=20, batch_size=32, validation_data=(test_data, tf.keras.utils.to_categorical(test_labels, num_classes=10)))

# Evaluate the model
def evaluate_model(model, test_data, test_labels):
    test_labels = np.argmax(test_labels, axis=1)  # Convert one-hot encoding to categorical labels
    predictions = model.predict(test_data)
    predicted_labels = np.argmax(predictions, axis=1)
    
    acc = accuracy_score(test_labels, predicted_labels)
    precision = precision_score(test_labels, predicted_labels, average='weighted')
    recall = recall_score(test_labels, predicted_labels, average='weighted')
    f1 = f1_score(test_labels, predicted_labels, average='weighted')
    
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    
    return acc, precision, recall, f1

# Evaluate and save the model
evaluate_model(sequential_model, test_data, tf.keras.utils.to_categorical(test_labels, num_classes=10))
sequential_model.save('sequential_cnn_gnn.h5')


[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 64ms/step
Epoch 1/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 532ms/step - accuracy: 0.4212 - loss: 1.7785 - val_accuracy: 0.8794 - val_loss: 0.5699
Epoch 2/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.9106 - loss: 0.4581 - val_accuracy: 0.9149 - val_loss: 0.2917
Epoch 3/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.9436 - loss: 0.1829 - val_accuracy: 0.9362 - val_loss: 0.1943
Epoch 4/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.9755 - loss: 0.1116 - val_accuracy: 0.9504 - val_loss: 0.1683
Epoch 5/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.9726 - loss: 0.0849 - val_accuracy: 0.9433 - val_loss: 0.1612
Epoch 6/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.9878 - loss: 0.0540 