In [5]:
import pandas as pd

# Load plant data from CSV
df = pd.read_csv("plant_123.csv")

# Preview data
print(df.head())

df['img_url'] = df['img_url'].str.strip()


   Unnamed: 0                   common_name     family categories  \
0           0                 Janet Craig    Liliaceae   Dracaena   
1           1                   Lady palm    Arecaceae       Palm   
2           2    Tailflower ,  Wax flower      Araceae  Anthurium   
3           3                Elephant ear      Araceae      Other   
4           4          Malaysian Dracaena    Liliaceae   Dracaena   

         origin         climate     zone  \
0    Cultivar          Tropical  [11,10]   
1       China       Subtropical  [11, 9]   
2    Cultivar          Tropical     [11]   
3      Hybrid    Tropical humid     [11]   
4    Cultivar          Tropical  [11,10]   

                                             img_url  
0  http://www.tropicopia.com/house-plant/thumbnai...  
1  http://www.tropicopia.com/house-plant/thumbnai...  
2  http://www.tropicopia.com/house-plant/thumbnai...  
3  http://www.tropicopia.com/house-plant/thumbnai...  
4  http://www.tropicopia.com/house-plant/thum

In [7]:
import requests

def check_url(url):
    try:
        response = requests.get(url, stream=True)
        return response.status_code
    except requests.RequestException:
        return "Error"

df['status_code'] = df['img_url'].apply(check_url)
print(df[['common_name', 'img_url', 'status_code']])


                         common_name  \
0                      Janet Craig     
1                        Lady palm     
2         Tailflower ,  Wax flower     
3                     Elephant ear     
4               Malaysian Dracaena     
..                               ...   
350              Chinese Evergreen     
351              Chinese Evergreen     
352                Silver Calathea     
353                   Rubber plant     
354    Blue Agave ,  Century plant     

                                               img_url  status_code  
0    http://www.tropicopia.com/house-plant/thumbnai...          200  
1    http://www.tropicopia.com/house-plant/thumbnai...          200  
2    http://www.tropicopia.com/house-plant/thumbnai...          200  
3    http://www.tropicopia.com/house-plant/thumbnai...          200  
4    http://www.tropicopia.com/house-plant/thumbnai...          200  
..                                                 ...          ...  
350  http://www.tropicopia.co

In [8]:
from ultralytics import YOLO

# Load the pre-trained YOLOv8 model (for object detection)
model = YOLO("yolov8n.pt")  # "n" is the nano version, you can also use "yolov8s.pt" or "yolov8m.pt"


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/Users/sheebamoghal/Library/Application Support/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 20.2MB/s]


In [9]:
# Create a unique ID for each plant category
unique_categories = df["categories"].unique()
category_to_id = {category: i for i, category in enumerate(unique_categories)}

# Map categories to IDs
df["class_id"] = df["categories"].map(category_to_id)

# Save the category mapping
df[["categories", "class_id"]].drop_duplicates().to_csv("class_mapping.csv", index=False)

print("Class ID Mapping Saved!")
print(df[["common_name", "categories", "class_id"]].head())

Class ID Mapping Saved!
                    common_name categories  class_id
0                 Janet Craig     Dracaena         0
1                   Lady palm         Palm         1
2    Tailflower ,  Wax flower    Anthurium         2
3                Elephant ear        Other         3
4          Malaysian Dracaena     Dracaena         0


In [12]:
import os
import pandas as pd
from PIL import Image

# Load dataset
df = pd.read_csv("plant_123.csv")

# Create labels directory
os.makedirs("dataset/labels/train", exist_ok=True)

def generate_bounding_box(image_path):
    """Generate a bounding box assuming the object is centered."""
    with Image.open(image_path) as img:
        width, height = img.size
        xmin, ymin = int(width * 0.1), int(height * 0.1)  # 10% margin
        xmax, ymax = int(width * 0.9), int(height * 0.9)  # 90% boundary
        return xmin, ymin, xmax, ymax, width, height

for _, row in df.iterrows():
    # Ensure common_name is a string and replace special characters
    common_name = str(row['common_name']).strip()  # Convert to string and remove spaces
    if common_name == "nan":  # Check if it's a NaN converted to string
        continue  # Skip this row

    common_name = common_name.replace(" ", "_")  # Replace spaces with underscores
    image_path = f"dataset/images/train/{common_name}.jpg"
    label_path = f"dataset/labels/train/{common_name}.txt"

    if os.path.exists(image_path):
        # Generate bounding box
        xmin, ymin, xmax, ymax, img_width, img_height = generate_bounding_box(image_path)
        
        # Normalize bounding box for YOLO
        x_center = (xmin + xmax) / (2 * img_width)
        y_center = (ymin + ymax) / (2 * img_height)
        box_width = (xmax - xmin) / img_width
        box_height = (ymax - ymin) / img_height
        
        # Write YOLO annotation
        with open(label_path, "w") as f:
            f.write(f"{row['class_id']} {x_center:.6f} {y_center:.6f} {box_width:.6f} {box_height:.6f}\n")

print("✅ Bounding Boxes Generated Successfully!")


✅ Bounding Boxes Generated Successfully!


In [14]:
import requests
import shutil
import pandas as pd

# Load dataset
df = pd.read_csv("plant_123.csv")

# Ensure the directories exist
os.makedirs("dataset/images/train", exist_ok=True)

def download_image(img_url, save_path):
    """Download image from URL and save to folder."""
    try:
        response = requests.get(img_url, stream=True)
        if response.status_code == 200:
            with open(save_path, "wb") as file:
                shutil.copyfileobj(response.raw, file)
            print(f"✅ Downloaded: {save_path}")
        else:
            print(f"⚠️ Failed: {img_url} (Status: {response.status_code})")
    except Exception as e:
        print(f"❌ Error downloading {img_url}: {e}")

# Download all images
for _, row in df.iterrows():
    common_name = str(row["common_name"]).strip().replace(" ", "_")  # Clean filename
    img_path = f"dataset/images/train/{common_name}.jpg"
    download_image(row["img_url"], img_path)

print("✅ Image download complete!")


✅ Downloaded: dataset/images/train/Janet_Craig.jpg
✅ Downloaded: dataset/images/train/Lady_palm.jpg
✅ Downloaded: dataset/images/train/Tailflower_,__Wax_flower.jpg
✅ Downloaded: dataset/images/train/Elephant_ear.jpg
✅ Downloaded: dataset/images/train/Malaysian_Dracaena.jpg
✅ Downloaded: dataset/images/train/Chinese_Evergreen.jpg
✅ Downloaded: dataset/images/train/Malaysian_Dracaena.jpg
✅ Downloaded: dataset/images/train/Herringbone_,__Pink_praying_plant.jpg
✅ Downloaded: dataset/images/train/Guzmania.jpg
✅ Downloaded: dataset/images/train/Chinese_Evergreen.jpg
✅ Downloaded: dataset/images/train/Peace_lily.jpg
✅ Downloaded: dataset/images/train/Chinese_Evergreen.jpg
✅ Downloaded: dataset/images/train/Oleander_,__Rose_bay.jpg
✅ Downloaded: dataset/images/train/African_evergreen_,__Arrowhead_vine_,__Goosefoot_plant.jpg
✅ Downloaded: dataset/images/train/Bird_nest_Anthurium_,__Cabbage_Anthurium.jpg
✅ Downloaded: dataset/images/train/Bella_palm_,__Neanthebella_palm_,__Dwarf_palm.jpg
✅ Downl

In [16]:
import pandas as pd
import os

# Load dataset
df = pd.read_csv("plant_123.csv")

# Ensure column exists
if "common_name" not in df.columns or "class_id" not in df.columns:
    raise ValueError("⚠️ Missing 'common_name' or 'class_id' columns in CSV!")

# Create label folder
os.makedirs("dataset/labels/train", exist_ok=True)

# Generate YOLO Labels
for _, row in df.iterrows():
    common_name = str(row["common_name"]).strip().replace(" ", "_")  # Normalize filename
    label_path = f"dataset/labels/train/{common_name}.txt"

    # Full image bounding box: centered at (0.5, 0.5) with width=1, height=1
    yolo_label = f"{row['class_id']} 0.5 0.5 1.0 1.0\n"

    # Write label file
    with open(label_path, "w") as f:
        f.write(yolo_label)

print("✅ YOLO Labels Generated Successfully!")


ValueError: ⚠️ Missing 'common_name' or 'class_id' columns in CSV!