<a href="https://colab.research.google.com/github/mahal7446/Kisan_Sathi/blob/main/Kisan_Sathi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Connecting Google Colab with Google Drive to access datasets and save outputs
# This allows us to work directly with files stored in Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Checking where the dataset is located inside Google Drive
# This helps confirm the correct folder path before starting preprocessing

In [None]:
import os

os.listdir('/content/drive/MyDrive')

['Colab Notebooks',
 'Getting started.pdf',
 'Untitled spreadsheet (2).gsheet',
 'Untitled spreadsheet (1).gsheet',
 'Resume.gdoc',
 'Untitled spreadsheet.gsheet',
 'Google Earth',
 'student_marks.csv.gsheet',
 'Google AI Studio (1)',
 'Google AI Studio',
 'impquestions end term.gdoc',
 'DSA for PLACEMENTS.xlsx',
 'Quantitative analysis .gdoc',
 'MyXvBcppsW2FkNYCX_ifobHAoMjQs9s6bKS_68af14bacc98b1c3f8b892a4_1756320007401_completion_certificate_copy.pdf',
 'Internship 6.0 social media post template.pptx',
 'Copy of AgriDetect AI: Web-Based Plant Disease Identification System.docx',
 'Rice and Pulses',
 'Rice and Pulses.zip',
 'Corn_1k.zip',
 'BlackGram_1k.zip',
 'Mental Health Check-in Form.gform',
 'Kisan_Sathi']

In [None]:
# Verifying the dataset structure and counting images in each class
# This ensures all images are readable and correctly organized before training

In [None]:
DATASET_ROOT = '/content/drive/MyDrive/Kisan_Sathi'
os.listdir(DATASET_ROOT)

['Corn', 'BlackGram']

In [None]:
# Checking the dataset for corrupted or unreadable images
# Removing such images to avoid errors during model training

In [None]:
import os
from collections import Counter
from PIL import Image

DATASET_ROOT = '/content/drive/MyDrive/Kisan_Sathi'

class_counts = Counter()
corrupted = []

for crop in os.listdir(DATASET_ROOT):
    crop_path = os.path.join(DATASET_ROOT, crop)
    if not os.path.isdir(crop_path):
        continue

    for cls in os.listdir(crop_path):
        cls_path = os.path.join(crop_path, cls)
        if not os.path.isdir(cls_path):
            continue

        for img_name in os.listdir(cls_path):
            if img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                img_path = os.path.join(cls_path, img_name)
                class_counts[f"{crop}/{cls}"] += 1
                try:
                    with Image.open(img_path) as img:
                        img.verify()
                except:
                    corrupted.append(img_path)

print("Image count per class:")
for k, v in class_counts.items():
    print(f"{k}: {v}")

print("\nCorrupted images found:", len(corrupted))

Image count per class:
Corn/Gray_Leaf_Spot: 250
Corn/Healthy 250: 250
Corn/Blight: 250
Corn/Common_Rust: 250
BlackGram/Powdery Mildew: 180
BlackGram/Anthracnose: 230
BlackGram/Healthy: 221
BlackGram/Yellow Mosaic: 224
BlackGram/Leaf Crinckle: 152

Corrupted images found: 0


In [None]:
# Splitting the dataset into training, validation, and testing sets .This helps evaluate the model properly and prevents data leakage

In [None]:
import os
from PIL import Image

DATASET_ROOT = '/content/drive/MyDrive/Kisan_Sathi'

removed = 0

for crop in os.listdir(DATASET_ROOT):
    crop_path = os.path.join(DATASET_ROOT, crop)
    if not os.path.isdir(crop_path):
        continue

    for disease in os.listdir(crop_path):
        disease_path = os.path.join(crop_path, disease)
        if not os.path.isdir(disease_path):
            continue

        for img in os.listdir(disease_path):
            if img.lower().endswith(('.jpg', '.jpeg', '.png')):
                img_path = os.path.join(disease_path, img)
                try:
                    im = Image.open(img_path)
                    im.verify()
                except:
                    os.remove(img_path)
                    removed += 1

print("Total corrupted images removed:", removed)

Total corrupted images removed: 0


In [None]:
# Normalizing image pixel values and applying augmentation to training data and It helps the model learn better and reduces overfitting

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

SOURCE_DIR = '/content/drive/MyDrive/Kisan_Sathi'
TARGET_DIR = '/content/drive/MyDrive/Kisan_Sathi_Split'

splits = ['train', 'val', 'test']

for split in splits:
    os.makedirs(os.path.join(TARGET_DIR, split), exist_ok=True)

for crop in os.listdir(SOURCE_DIR):
    crop_path = os.path.join(SOURCE_DIR, crop)
    if not os.path.isdir(crop_path):
        continue

    for disease in os.listdir(crop_path):
        disease_path = os.path.join(crop_path, disease)
        if not os.path.isdir(disease_path):
            continue

        images = [img for img in os.listdir(disease_path)
                  if img.lower().endswith(('.jpg', '.jpeg', '.png'))]

        train_imgs, temp_imgs = train_test_split(
            images, test_size=0.30, random_state=42)

        val_imgs, test_imgs = train_test_split(
            temp_imgs, test_size=0.50, random_state=42)

        for split_name, img_list in zip(
            ['train', 'val', 'test'],
            [train_imgs, val_imgs, test_imgs]
        ):
            dest_dir = os.path.join(
                TARGET_DIR, split_name, crop, disease)
            os.makedirs(dest_dir, exist_ok=True)

            for img in img_list:
                shutil.copy(
                    os.path.join(disease_path, img),
                    os.path.join(dest_dir, img)
                )

print("Dataset split completed.")

Dataset split completed.


In [None]:
# Creating a CNN model to learn patterns from leaf images
# Defining layers that help the model classify healthy and diseased crops

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

BASE_DIR = '/content/drive/MyDrive/Kisan_Sathi_Split'

train_dir = BASE_DIR + '/train'
val_dir   = BASE_DIR + '/val'
test_dir  = BASE_DIR + '/test'

train_gen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    zoom_range=0.2,
    horizontal_flip=True
)

val_gen = ImageDataGenerator(rescale=1.0/255)
test_gen = ImageDataGenerator(rescale=1.0/255)

train_data = train_gen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

val_data = val_gen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

test_data = test_gen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

Found 1403 images belonging to 2 classes.
Found 299 images belonging to 2 classes.
Found 305 images belonging to 2 classes.


In [None]:
# Training the CNN model using the prepared training and validation data
# This step allows the model to learn features and improve accuracy

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Flatten, Dense, Dropout

model = Sequential()

model.add(Conv2D(32, (3,3), activation='relu', input_shape=(224,224,3)))
model.add(MaxPooling2D(2,2))

model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D(2,2))

model.add(Conv2D(128, (3,3), activation='relu'))
model.add(MaxPooling2D(2,2))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Evaluating the trained model using unseen test data
# This helps measure the real-world performance of the model

In [None]:
EPOCHS = 10

history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=EPOCHS
)


  self._warn_if_super_not_called()


Epoch 1/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 3s/step - accuracy: 0.5936 - loss: 0.8226 - val_accuracy: 0.8562 - val_loss: 0.4462
Epoch 2/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 3s/step - accuracy: 0.7776 - loss: 0.4944 - val_accuracy: 0.8629 - val_loss: 0.4203
Epoch 3/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 3s/step - accuracy: 0.8479 - loss: 0.3767 - val_accuracy: 0.8629 - val_loss: 0.3971
Epoch 4/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 3s/step - accuracy: 0.8651 - loss: 0.3254 - val_accuracy: 0.8796 - val_loss: 0.3690
Epoch 5/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 3s/step - accuracy: 0.9041 - loss: 0.2415 - val_accuracy: 0.9331 - val_loss: 0.2506
Epoch 6/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 3s/step - accuracy: 0.9392 - loss: 0.1885 - val_accuracy: 0.9365 - val_loss: 0.2523
Epoch 7/10
[1m44/44[0m [32m━━━━

In [None]:
# Saving the trained model for use and deployment

In [None]:
test_loss, test_accuracy = model.evaluate(test_data)

print("Test accuracy:", test_accuracy)
print("Test loss:", test_loss)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 628ms/step - accuracy: 0.9588 - loss: 0.1196
Test accuracy: 0.9278688430786133
Test loss: 0.1804901659488678


In [None]:
MODEL_PATH = '/content/drive/MyDrive/Kisan_Sathi_Model.h5'
model.save(MODEL_PATH)

print("Model saved at:", MODEL_PATH)




Model saved at: /content/drive/MyDrive/Kisan_Sathi_Model.h5


In [None]:
model.save('/content/drive/MyDrive/Kisan_Sathi_Model.keras')
print("Model saved in new Keras format")


Model saved in new Keras format
