In [1]:
import os
import json
import pandas as pd

# Path to the directory containing your images
data_path = "C:\\Users\\laksh\\DL\\PlantVillage"  # Replace with the actual path to your image directory

# Dictionary to map folder names (labels) to disease stage labels
disease_stage_mapping = {
    'Pepper__bell___Bacterial_spot': 3,
    'Pepper__bell___healthy': 1,
    'Potato___Early_blight': 2,
    'Potato___healthy': 0,
    'Potato___Late_blight': 4,
    'Tomato___Bacterial_spot': 3,
    'Tomato___Early_blight': 2,
    'Tomato___healthy': 1,
    'Tomato___Late_blight': 4,
    'Tomato___Leaf_Mold': 4,
    'Tomato___Septoria_leaf_spot': 4,
    'Tomato___Spider_mites_Two_spotted_spider_mite': 5,
    'Tomato___Target_Spot': 4,
    'Tomato___Tomato_mosaic_virus': 3,
    'Tomato___Tomato_YellowLeaf__Curl_Virus': 5
}

# List all folders (assuming each folder represents a label)
labels = [label for label in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, label))]

# List to store image data
image_data = []

for label in labels:
    label_path = os.path.join(data_path, label)
    
    # Debugging: Print folder name and number of images
    print(f"Folder: {label}")
    
    count = 0  # Counter to keep track of the number of images in each folder
    
    # Include subdirectories
    for root, _, files in os.walk(label_path):
        for image_file in files:
            if image_file.endswith(('.JPG', '.JPEG', '.PNG')):  
                image_path = os.path.join(root, image_file)
                
                # Extract virus name from folder name
                virus_name = label.split("___")[1] if "___" in label else "none"
                
                # Determine if healthy or not
                is_healthy = 1 if virus_name == "healthy" else 0
                
                # Get disease intensity from mapping
                disease_intensity = disease_stage_mapping.get(label, 0)
                
                image_data.append({
                    'image_url': os.path.abspath(image_path),
                    'is_healthy': is_healthy,
                    'virus_name': virus_name if is_healthy == 0 else "none",
                    'disease_intensity': disease_intensity
                })
                
                count += 1  # Increment the counter for each valid image file
    
    print(f"Number of images: {count}")
    print("-----------------------")

# Convert the image data to a DataFrame
df = pd.DataFrame(image_data)

# Save the DataFrame to a CSV file
df.to_csv("image_data.csv", index=False)


Folder: Pepper__bell___Bacterial_spot
Number of images: 997
-----------------------
Folder: Pepper__bell___healthy
Number of images: 1476
-----------------------
Folder: Potato___Early_blight
Number of images: 1000
-----------------------
Folder: Potato___healthy
Number of images: 152
-----------------------
Folder: Potato___Late_blight
Number of images: 1000
-----------------------
Folder: Tomato___Bacterial_spot
Number of images: 2127
-----------------------
Folder: Tomato___Early_blight
Number of images: 1000
-----------------------
Folder: Tomato___healthy
Number of images: 1590
-----------------------
Folder: Tomato___Late_blight
Number of images: 1756
-----------------------
Folder: Tomato___Leaf_Mold
Number of images: 952
-----------------------
Folder: Tomato___Septoria_leaf_spot
Number of images: 1771
-----------------------
Folder: Tomato___Spider_mites_Two_spotted_spider_mite
Number of images: 1676
-----------------------
Folder: Tomato___Target_Spot
Number of images: 1474
-