In [None]:
!pip install -q kaggle

In [None]:
!mkdir ~/.kaggle

In [None]:
!cp kaggle.json ~/.kaggle

In [None]:
!chmod 600 /root/.kaggle/kaggle.json


In [None]:
!kaggle datasets download -d imbikramsaha/paddy-doctor

Downloading paddy-doctor.zip to /content
 99% 1.01G/1.02G [00:12<00:00, 118MB/s] 
100% 1.02G/1.02G [00:13<00:00, 83.4MB/s]


In [None]:
!unzip /content/paddy-doctor.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: paddy-disease-classification/train_images/downy_mildew/100992.jpg  
  inflating: paddy-disease-classification/train_images/downy_mildew/100995.jpg  
  inflating: paddy-disease-classification/train_images/downy_mildew/101015.jpg  
  inflating: paddy-disease-classification/train_images/downy_mildew/101020.jpg  
  inflating: paddy-disease-classification/train_images/downy_mildew/101022.jpg  
  inflating: paddy-disease-classification/train_images/downy_mildew/101024.jpg  
  inflating: paddy-disease-classification/train_images/downy_mildew/101034.jpg  
  inflating: paddy-disease-classification/train_images/downy_mildew/101050.jpg  
  inflating: paddy-disease-classification/train_images/downy_mildew/101078.jpg  
  inflating: paddy-disease-classification/train_images/downy_mildew/101096.jpg  
  inflating: paddy-disease-classification/train_images/downy_mildew/101103.jpg  
  inflating: paddy-disease-classification/tr

In [None]:
!pip install mpi4py


Collecting mpi4py
  Downloading mpi4py-3.1.5.tar.gz (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: mpi4py
  Building wheel for mpi4py (pyproject.toml) ... [?25l[?25hdone
  Created wheel for mpi4py: filename=mpi4py-3.1.5-cp310-cp310-linux_x86_64.whl size=2746503 sha256=00586c1e32fcd14891318bb9b431f80f2b3036de411071da47d342d70174bf00
  Stored in directory: /root/.cache/pip/wheels/18/2b/7f/c852523089e9182b45fca50ff56f49a51eeb6284fd25a66713
Successfully built mpi4py
Installing collected packages: mpi4py
Successfully installed mpi4py-3.1.5


In [None]:
import math

# Calculate steps per epoch
steps_per_epoch = math.ceil(8846 / 19)  # 19 is the batch size


In [None]:
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, BatchNormalization
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from kaggle.api.kaggle_api_extended import KaggleApi
from mpi4py import MPI

# Initialize MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()

# Define data paths
train_data_path = 'paddy-disease-classification/train_images/'
validation_data_path = 'paddy-disease-classification/train_images/'

# Load and preprocess data
def load_and_preprocess_data(data_path, batch_size, image_size, subset=None, validation_split=None):
    data = tf.keras.preprocessing.image_dataset_from_directory(
        data_path,
        batch_size=batch_size,
        image_size=image_size,
        shuffle=True,
        seed=123,
        subset=subset,
        validation_split=validation_split
    )
    return data

# Define model
def create_model():
    resnet_model = Sequential()
    pretrained_model = ResNet50(include_top=False,
                                input_shape=(240, 240, 3),
                                pooling='avg',
                                weights='imagenet')
    for layer in pretrained_model.layers:
        layer.trainable = False
    resnet_model.add(pretrained_model)
    resnet_model.add(Flatten())
    resnet_model.add(BatchNormalization())
    resnet_model.add(Dense(512, activation='relu'))
    resnet_model.add(BatchNormalization())
    resnet_model.add(Dense(10, activation='softmax'))
    resnet_model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return resnet_model

# Divide dataset among processes
training_data = load_and_preprocess_data(train_data_path, batch_size=70, image_size=(240, 240), subset='training', validation_split=0.15)
validation_data = load_and_preprocess_data(validation_data_path, batch_size=70, image_size=(240, 240), subset='validation', validation_split=0.15)

# Split data among processes
training_data = training_data.unbatch().batch(batch_size=int(70 / size))
validation_data = validation_data.unbatch().batch(batch_size=int(70 / size))

# Define callbacks
checkpoint_path = "model_checkpoint.h5"
checkpoint = ModelCheckpoint(filepath=checkpoint_path, save_best_only=True, save_weights_only=True, verbose=1)

# Create model
model = create_model()

# Compile the model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(),
    metrics=['accuracy']
)

# Train the model
# Train the model
model.fit(
    training_data,
    steps_per_epoch=steps_per_epoch,
    epochs=1,
    validation_data=validation_data,
)


# Finalize MPI
MPI.Finalize()


Found 10407 files belonging to 10 classes.
Using 8846 files for training.
Found 10407 files belonging to 10 classes.
Using 1561 files for validation.



