In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import kagglehub
path = kagglehub.dataset_download("nirmalsankalana/rice-leaf-disease-image")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/rice-leaf-disease-image


In [4]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import matplotlib.pyplot as plt
import torchvision.models as models
import tensorflow as tf
from tqdm import tqdm
import seaborn as sns
import cv2

In [5]:
data_dir = "/kaggle/input/rice-leaf-disease-image"
diseases = os.listdir(data_dir)
diseases

['Tungro', 'Bacterialblight', 'Blast', 'Brownspot']

In [6]:
def load_and_resize_image(file_path, target_shape=(128, 128)):
    image = cv2.imread(file_path)
    resized_image = cv2.resize(image, target_shape)
    return resized_image

In [7]:
def load_image_class_by_directory(image_dir):
    image_files = os.listdir(image_dir)
    images = []
    for file in image_files:
        if file.endswith('.jpg') or file.endswith('.JPG'):   
            image_path = os.path.join(image_dir, file)
            resized_image = load_and_resize_image(image_path)
            images.append(resized_image)

    print(f"Num of images: {len(images)}")        
    print(f"Single image shape before flattening: {images[0].shape}")
    return images

In [8]:
load_Bacterialblight = load_image_class_by_directory('/kaggle/input/rice-leaf-disease-image/Bacterialblight')
load_Blast = load_image_class_by_directory('/kaggle/input/rice-leaf-disease-image/Blast')
load_Brownspot = load_image_class_by_directory('/kaggle/input/rice-leaf-disease-image/Brownspot')
load_Tungro = load_image_class_by_directory('/kaggle/input/rice-leaf-disease-image/Tungro')

Num of images: 1584
Single image shape before flattening: (128, 128, 3)
Num of images: 1440
Single image shape before flattening: (128, 128, 3)
Num of images: 1600
Single image shape before flattening: (128, 128, 3)
Num of images: 1308
Single image shape before flattening: (128, 128, 3)


In [9]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import datasets, layers, models

In [10]:
import os
import shutil
import random
from pathlib import Path

def split_dataset(input_dir, output_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15, seed=42):
    random.seed(seed)
    class_names = os.listdir(input_dir)

    for class_name in class_names:
        class_path = os.path.join(input_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        images = os.listdir(class_path)
        random.shuffle(images)

        total = len(images)
        train_end = int(train_ratio * total)
        val_end = train_end + int(val_ratio * total)

        splits = {
            'train': images[:train_end],
            'val': images[train_end:val_end],
            'test': images[val_end:]
        }

        for split, split_images in splits.items():
            split_class_dir = Path(output_dir) / split / class_name
            split_class_dir.mkdir(parents=True, exist_ok=True)
            for img in split_images:
                src = os.path.join(class_path, img)
                dst = os.path.join(split_class_dir, img)
                shutil.copy(src, dst)

split_dataset(
    input_dir= '/kaggle/input/rice-leaf-disease-image',      
    output_dir='/kaggle/working/data_split',  # nơi lưu train/test/val
    train_ratio=0.7,
    val_ratio=0.15,
    test_ratio=0.15
)


In [11]:
train_data = '/kaggle/working/data_split/train'
test_data = '/kaggle/working/data_split/test'
val_data = '/kaggle/working/data_split/val'

In [12]:
img_size = (128, 128)
batch_size = 32

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_data,
    image_size=img_size,
    batch_size=batch_size
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    val_data,
    image_size=img_size,
    batch_size=batch_size
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    test_data,
    image_size=img_size,
    batch_size=batch_size
)

Found 4150 files belonging to 4 classes.


I0000 00:00:1748741989.571624      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1748741989.572372      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Found 889 files belonging to 4 classes.
Found 893 files belonging to 4 classes.


In [13]:
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [14]:
num_classes = len(diseases)

resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
resnet.trainable = False

model = models.Sequential([
    resnet,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax') 
])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [15]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
              metrics=['accuracy'])

In [16]:
model.summary()

In [17]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=50
)

Epoch 1/50


  output, from_logits = _get_logits(
I0000 00:00:1748742052.067435     107 service.cc:148] XLA service 0x797e88015500 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748742052.068219     107 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1748742052.068238     107 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1748742053.546982     107 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  1/130[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m36:04[0m 17s/step - accuracy: 0.3125 - loss: 1.7596

I0000 00:00:1748742057.072501     107 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 116ms/step - accuracy: 0.8359 - loss: 0.4519 - val_accuracy: 0.9944 - val_loss: 0.0356
Epoch 2/50
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 0.9991 - loss: 0.0152 - val_accuracy: 1.0000 - val_loss: 0.0089
Epoch 3/50
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 0.9984 - loss: 0.0089 - val_accuracy: 1.0000 - val_loss: 0.0035
Epoch 4/50
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 1.0000 - loss: 0.0014 - val_accuracy: 1.0000 - val_loss: 0.0022
Epoch 5/50
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 1.0000 - loss: 9.1475e-04 - val_accuracy: 1.0000 - val_loss: 0.0017
Epoch 6/50
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 1.0000 - loss: 6.3576e-04 - val_accuracy: 1.0000 - val_loss: 0.0015
Epoch 7/50
[1m130/130

In [18]:
loss, acc = model.evaluate(test_ds)
print(f'Test Accuracy: {acc:.4f}')


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 160ms/step - accuracy: 0.9990 - loss: 0.0029
Test Accuracy: 0.9989
