<a href="https://colab.research.google.com/github/shahd1995913/Multi-Language-LLM-for-Plant-Leaf-Diseases/blob/main/Multi_Language_LLM_for_disease_description.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Part 1 Use Deep learning For train and test in Image Dataset

In [1]:
# =========================================
#1️⃣ Install the necessary libraries
# =========================================
!pip install torch torchvision torchaudio
!pip install timm
!pip install transformers
!pip install scikit-learn
!pip install matplotlib
!pip install pandas
!pip install transformers accelerate sentencepiece
!pip install transformers accelerate sentencepiece



In [26]:
# =========================================
# 1️⃣ Imports
# =========================================
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import transforms, datasets, models
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report
import os
import shutil

# =========================================
# 2️⃣ Setup Dataset
# =========================================
# Copy dataset to local tmp for speed
src_dir = "/content/drive/MyDrive/PALM Reserch/Palm Leaves Dataset"
data_dir = "/tmp/palm_dataset"
if not os.path.exists(data_dir):
    shutil.copytree(src_dir, data_dir)

# Filter out empty folders
valid_folders = [f for f in os.listdir(data_dir)
                 if os.path.isdir(os.path.join(data_dir,f))
                 and len(os.listdir(os.path.join(data_dir,f))) > 0]

# Image transforms (smaller size = faster)
transform = transforms.Compose([
    transforms.Resize((96,96)),  # smaller than 128x128 for speed
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])

# Load dataset
dataset = datasets.ImageFolder(root=data_dir, transform=transform)
classes = dataset.classes
print("✅ Classes Detected:", classes)

# =========================================
# 3️⃣ Split dataset 70/20/10
# =========================================
total_len = len(dataset)
train_len = int(0.7 * total_len)
val_len = int(0.2 * total_len)
test_len = total_len - train_len - val_len

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_len, val_len, test_len])

# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
# val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)


print(f"Total images: {total_len} | Train: {len(train_dataset)} | Val: {len(val_dataset)} | Test: {len(test_dataset)}")

# =========================================
# 4️⃣ Plot sample image from each class
# =========================================
# def imshow(img, title):
#     img = img.numpy().transpose((1,2,0))
#     img = img * np.array([0.229,0.224,0.225]) + np.array([0.485,0.456,0.406])
#     img = np.clip(img, 0,1)
#     plt.imshow(img)
#     plt.title(title)
#     plt.axis('off')
#     plt.show()

# for i, cls in enumerate(classes):
#     idx = next(idx for idx, (_, label) in enumerate(dataset.samples) if label == i)
#     img, _ = dataset[idx]
#     imshow(img, cls)

# =========================================
# 5️⃣ Setup MobileNetV3 small
# =========================================
device = "cuda" if torch.cuda.is_available() else "cpu"
model = models.mobilenet_v3_small(pretrained=True)
model.classifier[3] = nn.Linear(model.classifier[3].in_features, len(classes))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# =========================================
# 6️⃣ Train function
# =========================================
def train_model(model, train_loader, val_loader, epochs=5):
    best_val_acc = 0
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        avg_train_loss = train_loss / len(train_loader)

        # Validation
        model.eval()
        val_preds, val_labels = [], []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                preds = torch.argmax(outputs, dim=1)
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())
        val_acc = accuracy_score(val_labels, val_preds)
        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {avg_train_loss:.4f} | Val Acc: {val_acc:.4f}")
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), "best_model.pth")
    print("Training complete. Best Val Acc:", best_val_acc)

# =========================================
# 7️⃣ Train
# =========================================
train_model(model, train_loader, val_loader, epochs=5)

# =========================================
# 8️⃣ Evaluate on Test Set
# =========================================
model.load_state_dict(torch.load("best_model.pth"))
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

acc = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, average='weighted')
prec = precision_score(all_labels, all_preds, average='weighted')
rec = recall_score(all_labels, all_preds, average='weighted')
err_rate = 1 - acc
cm = confusion_matrix(all_labels, all_preds)

print("Accuracy:", acc)
print("F1 Score:", f1)
print("Precision:", prec)
print("Recall:", rec)
print("Error Rate:", err_rate)
print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(all_labels, all_preds, target_names=classes))

✅ Classes Detected: ['Bacterial leaf blight', 'Bug', 'Dubas', 'Healthy', 'Honey', 'brown spots', 'white scale']
Total images: 3848 | Train: 2693 | Val: 769 | Test: 386




Epoch 1/5 | Train Loss: 0.6957 | Val Acc: 0.6632
Epoch 2/5 | Train Loss: 0.3124 | Val Acc: 0.8010
Epoch 3/5 | Train Loss: 0.2877 | Val Acc: 0.8166
Epoch 4/5 | Train Loss: 0.2134 | Val Acc: 0.8153
Epoch 5/5 | Train Loss: 0.1237 | Val Acc: 0.8349
Training complete. Best Val Acc: 0.834850455136541
Accuracy: 0.8238341968911918
F1 Score: 0.8164955426864442
Precision: 0.8225794419459602
Recall: 0.8238341968911918
Error Rate: 0.17616580310880825
Confusion Matrix:
 [[ 1  2  0  2  0  0  0]
 [ 0 50  5  4  2  1  1]
 [ 0  9 50  1 10  0  0]
 [ 0  0  1 82  0  0  0]
 [ 0  9 13  2 21  0  0]
 [ 0  0  0  0  0 22  0]
 [ 0  0  0  6  0  0 92]]

Classification Report:
                        precision    recall  f1-score   support

Bacterial leaf blight       1.00      0.20      0.33         5
                  Bug       0.71      0.79      0.75        63
                Dubas       0.72      0.71      0.72        70
              Healthy       0.85      0.99      0.91        83
                Honey       

In [28]:
# =========================================
# 1️⃣ Imports
# =========================================
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import transforms, datasets, models
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report
import os
import shutil

# =========================================
# 2️⃣ Dataset Setup
# =========================================
src_dir = "/content/drive/MyDrive/PALM Reserch/Palm Leaves Dataset"
data_dir = "/tmp/palm_dataset"
if not os.path.exists(data_dir):
    shutil.copytree(src_dir, data_dir)

# Filter out empty folders
valid_folders = [f for f in os.listdir(data_dir)
                 if os.path.isdir(os.path.join(data_dir,f))
                 and len(os.listdir(os.path.join(data_dir,f))) > 0]

transform = transforms.Compose([
    transforms.Resize((96,96)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

dataset = datasets.ImageFolder(root=data_dir, transform=transform)
classes = dataset.classes
print("✅ Classes Detected:", classes)

# Split 70/20/10
total_len = len(dataset)
train_len = int(0.7 * total_len)
val_len = int(0.2 * total_len)
test_len = total_len - train_len - val_len
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_len, val_len, test_len])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)

# =========================================
# 3️⃣ Model Setup
# =========================================
device = "cuda" if torch.cuda.is_available() else "cpu"
# model = models.mobilenet_v3_small(pretrained=True)
from torchvision.models import MobileNet_V3_Small_Weights
weights = MobileNet_V3_Small_Weights.IMAGENET1K_V1  # or DEFAULT for latest weights
model = models.mobilenet_v3_small(weights=weights)

model.classifier[3] = nn.Linear(model.classifier[3].in_features, len(classes))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# scaler = torch.cuda.amp.GradScaler()  # Mixed precision
scaler = torch.cuda.amp.GradScaler()

# =========================================
# 4️⃣ Training Function
# =========================================
def train_model(model, train_loader, val_loader, epochs=5):
    best_val_acc = 0
    for epoch in range(epochs):
        # ---- Training ----
        model.train()
        running_loss = 0
        running_corrects = 0
        total_samples = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.cuda.amp.autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item() * images.size(0)
            preds = torch.argmax(outputs, dim=1)
            running_corrects += torch.sum(preds == labels).item()
            total_samples += images.size(0)

        train_loss = running_loss / total_samples
        train_acc = running_corrects / total_samples

        # ---- Validation ----
        model.eval()
        val_loss = 0
        val_corrects = 0
        val_samples = 0
        all_preds, all_labels = [], []

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * images.size(0)
                preds = torch.argmax(outputs, dim=1)
                val_corrects += torch.sum(preds == labels).item()
                val_samples += images.size(0)

                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        val_loss /= val_samples
        val_acc = val_corrects / val_samples

        print(f"Epoch {epoch+1}/{epochs} | "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), "best_model.pth")
    print("Training complete. Best Val Acc:", best_val_acc)

# =========================================
# 5️⃣ Train
# =========================================
train_model(model, train_loader, val_loader, epochs=5)

# =========================================
# 6️⃣ Evaluate on Test Set
# =========================================
model.load_state_dict(torch.load("best_model.pth"))
model.eval()

all_preds, all_labels = [], []
test_loss = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * images.size(0)
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

test_loss /= len(test_dataset)
test_acc = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, average='weighted')
prec = precision_score(all_labels, all_preds, average='weighted')
rec = recall_score(all_labels, all_preds, average='weighted')
err_rate = 1 - test_acc
cm = confusion_matrix(all_labels, all_preds)

print("\n✅ Test Results")
print("Loss:", test_loss)
print("Accuracy:", test_acc)
print("F1 Score:", f1)
print("Precision:", prec)
print("Recall:", rec)
print("Error Rate:", err_rate)
print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(all_labels, all_preds, target_names=classes))


✅ Classes Detected: ['Bacterial leaf blight', 'Bug', 'Dubas', 'Healthy', 'Honey', 'brown spots', 'white scale']


  with torch.cuda.amp.autocast(device_type="cuda"):


TypeError: autocast.__init__() got an unexpected keyword argument 'device_type'

# Multi-LLM Setup

In [35]:
import requests
from bs4 import BeautifulSoup
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch


In [None]:
# =========================================
# 1️⃣ Imports
# =========================================
import torch
from torchvision import transforms, models
from PIL import Image
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from functools import lru_cache

# =========================================
# 2️⃣ Device
# =========================================
device = "cuda" if torch.cuda.is_available() else "cpu"

# =========================================
# 3️⃣ Load DL model (MobileNetV3) for classification
# =========================================
classes = ["Bacterial leaf blight", "Bug", "Dubas", "Healthy", "Honey", "brown spots", "white scale"]

weights = models.MobileNet_V3_Small_Weights.IMAGENET1K_V1
dl_model = models.mobilenet_v3_small(weights=weights)
dl_model.classifier[3] = torch.nn.Linear(dl_model.classifier[3].in_features, len(classes))

# تأكد من وضع مسار best_model.pth الصحيح
dl_model.load_state_dict(torch.load("best_model.pth", map_location=device))
dl_model = dl_model.to(device)
dl_model.eval()

# =========================================
# 4️⃣ Image transform
# =========================================
transform = transforms.Compose([
    transforms.Resize((96, 96)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
])

# =========================================
# 5️⃣ Load LLM (small) on CPU
# =========================================
llm_model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
llm_model = AutoModelForSeq2SeqLM.from_pretrained(llm_model_name)
llm_gen = pipeline("text2text-generation", model=llm_model, tokenizer=tokenizer, device=-1)

# =========================================
# 6️⃣ Fallback templates
# =========================================
FALLBACK_SNIPPETS = {
    "Bacterial leaf blight": {"symptoms":"Watery spots on leaves turning brown.","causes":"Bacterial infection.","treatment":"Remove infected leaves, apply copper sprays."},
    "Dubas": {"symptoms":"Yellowing leaves with sticky honeydew.","causes":"Dubas bug feeding on sap.","treatment":"Sprays, remove weeds, sticky traps."},
    "Bug": {"symptoms":"Chewing or yellowing damage.","causes":"Insect pests.","treatment":"Field scouting, traps, pesticide if needed."},
    "white scale": {"symptoms":"White scales on fronds.","causes":"Scale insects sucking sap.","treatment":"Prune infested fronds, mineral oil sprays."},
    "brown spots": {"symptoms":"Brown spots on fronds.","causes":"Fungal infection.","treatment":"Prune infected fronds, apply fungicide."},
    "Honey": {"symptoms":"Sticky honeydew on fronds.","causes":"Insect secretions.","treatment":"Control insects, wash leaves."},
    "Healthy": {"symptoms":"Green, healthy fronds.","causes":"No disease.","treatment":"Maintain normal care."}
}

# =========================================
# 7️⃣ LLM Description Function
# =========================================
PROMPT_TEMPLATE = """You are an expert agronomist for date palm diseases.
Write a short description for "{disease}" with exactly three sections:

Symptoms: 1-2 sentences
Causes: 1-2 sentences
Treatment: 1-2 sentences

Keep it simple for farmers and avoid repeating phrases.
"""

def clean_text(text):
    # إزالة التكرار
    words = text.split()
    seen = set()
    result = []
    for w in words:
        if w.lower() not in seen:
            result.append(w)
            seen.add(w.lower())
    return " ".join(result)

@lru_cache(maxsize=128)
def get_llm_description(disease_name):
    try:
        prompt = PROMPT_TEMPLATE.format(disease=disease_name)
        out = llm_gen(prompt, max_new_tokens=150, do_sample=False)[0]['generated_text']
        out_clean = clean_text(out)
        # fallback if output empty
        if not out_clean.strip():
            snippet = FALLBACK_SNIPPETS.get(disease_name, FALLBACK_SNIPPETS["Bug"])
            out_clean = f"Symptoms: {snippet['symptoms']}\nCauses: {snippet['causes']}\nTreatment: {snippet['treatment']}"
        return out_clean
    except:
        snippet = FALLBACK_SNIPPETS.get(disease_name, FALLBACK_SNIPPETS["Bug"])
        return f"Symptoms: {snippet['symptoms']}\nCauses: {snippet['causes']}\nTreatment: {snippet['treatment']}"

# =========================================
# 8️⃣ Predict function
# =========================================
def predict_image(img_path):
    img = Image.open(img_path).convert("RGB")
    x = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = dl_model(x)
        probs = torch.nn.functional.softmax(outputs, dim=1)
        conf, idx = torch.max(probs, dim=1)
        label = classes[idx.item()]
    description = get_llm_description(label)
    return {"label": label, "confidence": conf.item(), "description": description}

# =========================================
# 9️⃣ Example usage
# =========================================
if __name__ == "__main__":
    img_path = "/content/drive/MyDrive/PALM Reserch/Palm Leaves Dataset/Bacterial leaf blight/DSC_0365.JPG"
    result = predict_image(img_path)
    print(f"Diagnosis: {result['label']} | Confidence: {result['confidence']:.2f}")
    print("Description:\n", result["description"])
