In [1]:
import requests
import pandas as pd
import json
import warnings

import os

In [2]:
# load .csv files to pandas dataframe
CUR_DIR = os.getcwd()
DB_LIST_PATH = os.path.join(CUR_DIR, 'plant_list.csv')
DB_LIST_ENTRIES = ['id', 'common_name', 'scientific_name', 'other_name', 'family', 'hybrid', 'authority', 'subspecies', 'cultivar', 'variety', 'species_epithet', 'genus', 'regular_url', 'medium_url', 'small_url', 'thumbnail']

# check if file exists and load it, else create a new dataframe
if os.path.exists(DB_LIST_PATH):
    plant_list_df = pd.read_csv(DB_LIST_PATH)
    # print number of rows
    print(f"Loaded {len(plant_list_df)} entries from {DB_LIST_PATH}.")
else:
    print("No plant list found.")

Loaded 381 entries from c:\Users\steph\OneDrive\Studium_Uni_Tuebingen\Master Informatik\01_SCMU\Project\plant_list.csv.


In [3]:
# remove all rows with 'null' in 'regular_url', 'medium_url', 'small_url', and 'thumbnail' columns
plant_list_df = plant_list_df.dropna(subset=['regular_url', 'medium_url', 'small_url', 'thumbnail'])
print(f"{len(plant_list_df)} entries remaining after removing rows with 'null' URLs.")

359 entries remaining after removing rows with 'null' URLs.


In [4]:
# download images from 'small_url' column to plant_img/small/ directory -> skip if url is "https://perenual.com/storage/image/upgrade_access.jpg"
# create directories if they don't exist
os.makedirs(os.path.join(CUR_DIR, 'plant_img', 'small'), exist_ok=True)

# iterate over rows in the dataframe and download images
for index, row in plant_list_df.iterrows():
    small_url = row['small_url']
    if small_url == "https://perenual.com/storage/image/upgrade_access.jpg":
        print(f"No free access for {row['common_name']}")
        continue
    try:
        response = requests.get(small_url, stream=True)
        if response.status_code == 200:
            with open(os.path.join(CUR_DIR, 'plant_img', 'small', f"{row['id']}.jpg"), 'wb') as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)
            print(f"Downloaded {small_url} for {row['common_name']}")
        else:
            print(f"Failed to download {small_url} for {row['common_name']}")
    except Exception as e:
        print(f"Error downloading {small_url} for {row['common_name']}: {e}")

Downloaded https://perenual.com/storage/species_image/425_abutilon_hybridum/small/24527654869_ac712f58c7_b.jpg for flowering-maple
Downloaded https://perenual.com/storage/species_image/426_abutilon_hybridum_bella_red/small/abutilon-mallow-flower-red.jpg for flowering-maple
Downloaded https://perenual.com/storage/species_image/427_abutilon_moonchimes/small/Abutilon_x_hybridum_27Moonchimes27_Flower_2800px.jpg for flowering-maple
Downloaded https://perenual.com/storage/species_image/428_abutilon_pictum_gold_dust/small/Redvein_Abutilon.jpg for flowering-maple
Downloaded https://perenual.com/storage/species_image/434_acalypha_wilkesiana/small/24945289366_5035e5d0a9_b.jpg for Jacob's coat
Downloaded https://perenual.com/storage/species_image/540_adenium_obesum/small/9244335137_6d662ed77c_b.jpg for desert rose
Downloaded https://perenual.com/storage/species_image/543_adiantum_capillus-veneris/small/49636882698_cb4fa4c685_b.jpg for maidenhair fern
Downloaded https://perenual.com/storage/specie

In [5]:
# replace the csv file with the updated dataframe
plant_list_df.to_csv(DB_LIST_PATH, index=False)
print(f"Updated {DB_LIST_PATH} with {len(plant_list_df)} entries.")

Updated c:\Users\steph\OneDrive\Studium_Uni_Tuebingen\Master Informatik\01_SCMU\Project\plant_list.csv with 359 entries.
