In [39]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [40]:
!ls "/content/drive/My Drive/Colab Notebooks"

datasheet_weed_detection  plant_image_data.zip	travell_package.ipynb
plant_image_data	  projecttt.ipynb


In [41]:
!unzip "/content/drive/My Drive/Colab Notebooks/plant_image_data.zip" -d "/content/"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/train/tobacco/tobacco190.jpg  
  inflating: /content/train/tobacco/tobacco191.jpg  
  inflating: /content/train/tobacco/tobacco192.jpg  
  inflating: /content/train/tobacco/tobacco193.jpg  
  inflating: /content/train/tobacco/tobacco194.jpg  
  inflating: /content/train/tobacco/tobacco195.jpg  
  inflating: /content/train/tobacco/tobacco196.jpg  
  inflating: /content/train/tobacco/tobacco197.jpg  
  inflating: /content/train/tobacco/tobacco198.jpg  
  inflating: /content/train/tobacco/tobacco199.jpg  
  inflating: /content/train/tobacco/tobacco2.jpg  
  inflating: /content/train/tobacco/tobacco20.jpg  
  inflating: /content/train/tobacco/tobacco200.jpg  
  inflating: /content/train/tobacco/tobacco201.jpg  
  inflating: /content/train/tobacco/tobacco202.jpg  
  inflating: /content/train/tobacco/tobacco203.jpg  
  inflating: /content/train/tobacco/tobacco204.jpg  
  inflating: /content/train/tobacco/t

In [42]:
!ls /content/train
!ls /content/val
!ls /content/testy

aloevera       coconut	 ginger     orange	shallot        watermelon
banana	       corn	 guava	    paddy	soybeans
bilimbi        cucumber  kale	    papaya	spinach
cantaloupe     curcuma	 longbeans  peperchili	sweetpotatoes
cassava        eggplant  mango	    pineapple	tobacco
classname.txt  galangal  melon	    pomelo	waterapple
aloevera       coconut	 ginger     orange	shallot        watermelon
banana	       corn	 guava	    paddy	soybeans
bilimbi        cucumber  kale	    papaya	spinach
cantaloupe     curcuma	 longbeans  peperchili	sweetpotatoes
cassava        eggplant  mango	    pineapple	tobacco
classname.txt  galangal  melon	    pomelo	waterapple
ls: cannot access '/content/testy': No such file or directory


In [43]:
!pip install opencv-python



In [44]:
import os
import numpy as np
import cv2
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [52]:
def load_images_and_labels(directory, img_size=(64, 64), max_samples=1000):
    features = []
    labels = []
    total_loaded = 0
    valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp')

    for label in os.listdir(directory):
        class_path = os.path.join(directory, label)
        if not os.path.isdir(class_path):
            print(f"⚠️ Skipping non-directory: {class_path}")
            continue

        print(f"\nLoading class: {label}")
        for img_name in os.listdir(class_path):
            if not img_name.lower().endswith(valid_extensions):
                print(f" Skipping non-image: {img_name}")
                continue

            img_path = os.path.join(class_path, img_name)
            try:
                img = cv2.imread(img_path)
                if img is None:
                    print(f" Failed to load (corrupted?): {img_path}")
                    continue

                img = cv2.resize(img, img_size)
                features.append(img.flatten())
                labels.append(label)
                total_loaded += 1

                if total_loaded >= max_samples:
                    print(f"Reached max samples ({max_samples})")
                    return np.array(features), np.array(labels)

            except Exception as e:
                print(f" Error loading {img_path}: {str(e)}")

    print(f"\nFinished loading. Total: {total_loaded}")
    return np.array(features), np.array(labels)

In [53]:
train_dir = "/content/train"
val_dir = "/content/val"
test_dir = "/content/test"

In [54]:
base_data_dir = "/content/train"
X_all, y_all = load_images_and_labels(base_data_dir, max_samples=1000)

print(f"\nLoaded {len(X_all)} samples with {len(np.unique(y_all))} classes")


Loading class: orange

Loading class: kale
Reached max samples (1000)

Loaded 1000 samples with 2 classes


In [55]:
if len(X_all) == 0:
    raise ValueError("No images loaded! Check dataset path and structure.")

In [56]:
if len(np.unique(y_all)) > 1:
    X_train, X_temp, y_train, y_temp = train_test_split(
        X_all, y_all,
        test_size=0.3,
        random_state=42,
        stratify=y_all
    )
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp,
        test_size=0.5,
        random_state=42,
        stratify=y_temp
    )
else:
    X_train, X_temp, y_train, y_temp = train_test_split(
        X_all, y_all,
        test_size=0.3,
        random_state=42
    )
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp,
        test_size=0.5,
        random_state=42
    )

In [57]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [58]:
encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_val_enc = encoder.transform(y_val)
y_test_enc = encoder.transform(y_test)

In [59]:
def train_and_evaluate(models, X_train, y_train, X_val, y_val):
    results = {}
    for name, model in models.items():
        print(f"\nTraining {name}...")
        model.fit(X_train, y_train)

        y_pred = model.predict(X_val)
        acc = accuracy_score(y_val, y_pred)
        print(f"{name} Validation Accuracy: {acc:.4f}")
        print(classification_report(y_val, y_pred, target_names=encoder.classes_))

        results[name] = {
            'model': model,
            'accuracy': acc,
            'report': classification_report(y_val, y_pred, output_dict=True)
        }
    return results

In [60]:
models = {
    'SVM': SVC(kernel='linear', probability=True),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42)
}

In [61]:
results = train_and_evaluate(models, X_train_scaled, y_train_enc, X_val_scaled, y_val_enc)


Training SVM...
SVM Validation Accuracy: 0.9400
              precision    recall  f1-score   support

        kale       0.95      0.84      0.89        45
      orange       0.94      0.98      0.96       105

    accuracy                           0.94       150
   macro avg       0.94      0.91      0.93       150
weighted avg       0.94      0.94      0.94       150


Training Random Forest...
Random Forest Validation Accuracy: 0.9067
              precision    recall  f1-score   support

        kale       0.92      0.76      0.83        45
      orange       0.90      0.97      0.94       105

    accuracy                           0.91       150
   macro avg       0.91      0.86      0.88       150
weighted avg       0.91      0.91      0.90       150



In [62]:
best_model_name = max(results, key=lambda x: results[x]['accuracy'])
best_model = results[best_model_name]['model']
print(f"\nSelected best model: {best_model_name}")


Selected best model: SVM


In [63]:
print("\nFinal Test Evaluation:")
y_test_pred = best_model.predict(X_test_scaled)
test_acc = accuracy_score(y_test_enc, y_test_pred)
print(f"Test Accuracy: {test_acc:.4f}")
print(classification_report(y_test_enc, y_test_pred, target_names=encoder.classes_))


Final Test Evaluation:
Test Accuracy: 0.9067
              precision    recall  f1-score   support

        kale       0.84      0.84      0.84        45
      orange       0.93      0.93      0.93       105

    accuracy                           0.91       150
   macro avg       0.89      0.89      0.89       150
weighted avg       0.91      0.91      0.91       150



In [64]:
os.makedirs('saved_models', exist_ok=True)
joblib.dump(best_model, 'saved_models/plant_classifier.pkl')
joblib.dump(scaler, 'saved_models/scaler.pkl')
joblib.dump(encoder, 'saved_models/label_encoder.pkl')

print("\nPipeline completed successfully! Saved models to 'saved_models/' directory")


Pipeline completed successfully! Saved models to 'saved_models/' directory
