# Leaf Disease Classification

### Import all the Dependencies

In [5]:
import os
import shutil
import splitfolders

### Download Dataset named PlantVillage from:
Dataset credits: https://www.kaggle.com/arjuntejaswi/plant-village

### Segregating all the folders into their respective Plant Category folder in Leaf_Data directory

In [6]:
os.listdir(r"PlantVillage")

['Pepper__bell___Bacterial_spot',
 'Pepper__bell___healthy',
 'Potato___Early_blight',
 'Potato___healthy',
 'Potato___Late_blight',
 'Tomato_Bacterial_spot',
 'Tomato_Early_blight',
 'Tomato_healthy',
 'Tomato_Late_blight',
 'Tomato_Leaf_Mold',
 'Tomato_Septoria_leaf_spot',
 'Tomato_Spider_mites_Two_spotted_spider_mite',
 'Tomato__Target_Spot',
 'Tomato__Tomato_mosaic_virus',
 'Tomato__Tomato_YellowLeaf__Curl_Virus']

In [11]:
# List of folder names from downloaded data
folders = os.listdir('PlantVillage')

# Create directories for categories in PlantVillage
leaf_categories = ['Pepper', 'Potato', 'Tomato']

if not os.path.exists('Leaf_Data'):  # Create a directory to sort data according to Leaf Categories
    os.makedirs('Leaf_Data')
    
for category in leaf_categories:
    category_folder_path = os.path.join('Leaf_Data', category)
    if not os.path.exists(category_folder_path):
        os.makedirs(category_folder_path)

# Organize folders into respective categories
for folder in folders:
    plant_name = folder.split('_')[0]  # Extract the plant name from folder name
    for category in leaf_categories:
        if category.lower() in plant_name.lower():
            source = os.path.join('PlantVillage', folder)
            destination = os.path.join('Leaf_Data', category, folder)
            shutil.move(source, destination)


In [7]:
os.listdir(r"Leaf_Data")

['Pepper', 'Potato', 'Tomato']

In [9]:
os.listdir(r"DataSets/Pepper")

['test', 'train', 'val']

### Using splitfolders Function to Split Dataset into 

1. Training: Dataset to be used while training
2. Validation: Dataset to be tested against while training
3. Test: Dataset to be tested against after we trained a model

In [12]:
# Renaming the old PlantVillage folder to DataSet which has all the data categorized according leaf 
# And split into train, test and val (validation)
os.rename('PlantVillage', 'DataSets')

### Split the each category folder from Leaf_Data directory into three folders train, test, val in DataSets directory

In [13]:
# Path to the PlantVillage folder containing Pepper, Potato, and Tomato folders

input_folder = 'Leaf_Data/'

# Output folder where the split data will be saved
output_folder = 'DataSets/'

# Lets create 3 folders according to categories in PlantVillage directory where we will save train, test, val data        
        
# Splitting the Pepper folder into train, test, and validation sets
splitfolders.ratio(input_folder + 'Pepper', output=output_folder + 'Pepper', seed=42, ratio=(.7, .15, .15))

# Splitting the Potato folder into train, test, and validation sets
splitfolders.ratio(input_folder + 'Potato', output=output_folder + 'Potato', seed=42, ratio=(.7, .15, .15))

# Splitting the Tomato folder into train, test, and validation sets
splitfolders.ratio(input_folder + 'Tomato', output=output_folder + 'Tomato', seed=42, ratio=(.7, .15, .15))

Copying files: 2475 files [00:03, 791.20 files/s]
Copying files: 2152 files [00:02, 767.81 files/s]
Copying files: 16012 files [00:17, 896.97 files/s] 
