In [32]:
import requests
import pandas as pd
import json
import warnings

import os

In [33]:
DB_DETAILS_ENTRIES = [
    'id',
    'common_name',
    'scientific_name',
    'family',
    'origin',
    'type',
    'dimensions'
    'cycle',
    'watering',
    'watering_frequency_value',
    'watering_frequency_unit',
    'plant_anatomy',
    'pruning_month',
    'pruning_count',
    'pruning_interval',
    'seeds',
    'propagation',
    'flowers',
    'flowering_season',
    'sunlight',
    'soil',
    'pest_susceptibility',
    'cones',
    'fruits',
    'edible_fruit',
    'fruiting_season',
    'harvest_season',
    'harvest_method',
    'leaf',
    'edible_leaf',
    'growth_rate',
    'maintenance',
    'medicinal',
    'poisonous_to_humans',
    'poisonous_to_pets',
    'drought_tolerant',
    'salt_tolerant',
    'thorny',
    'invasive',
    'rare',
    'tropical',
    'cuisine',
    'indoor',
    'care_level',
    'description',
    'xWateringPeriod',
    'xWateringBasedTemperature',
    'xWateringPhLevel',
    'xSunlightDuration'
]

In [34]:
# load .csv files to pandas dataframe
CUR_DIR = os.getcwd()
DB_LIST_PATH = os.path.join(CUR_DIR, 'filtered_plant_list.csv')
DB_DETAILS_PATH = os.path.join(CUR_DIR, 'plant_details.csv')
DB_LIST_ENTRIES = ['id', 'common_name', 'scientific_name', 'other_name', 'family', 'hybrid', 'authority', 'subspecies', 'cultivar', 'variety', 'species_epithet', 'genus', 'regular_url', 'medium_url', 'small_url', 'thumbnail']


# check if file exists and load it, else create a new dataframe
if os.path.exists(DB_LIST_PATH):
    plant_list_df = pd.read_csv(DB_LIST_PATH)
    # print number of rows
    print(f"Loaded {len(plant_list_df)} entries from {DB_LIST_PATH}.")
else:
    print("No plant list found.")

if os.path.exists(DB_DETAILS_PATH):
    plant_details_df = pd.read_csv(DB_DETAILS_PATH)
    # print number of rows
    print(f"Loaded {len(plant_details_df)} entries from {DB_DETAILS_PATH}.")
else:
    plant_details_df = pd.DataFrame(columns=DB_DETAILS_ENTRIES)
    print("No plant details found.")

Loaded 132 entries from c:\Users\steph\OneDrive\Studium_Uni_Tuebingen\Master Informatik\01_SCMU\Project\filtered_plant_list.csv.
No plant details found.


In [None]:
PERENUAL_API_URL = "https://perenual.com/api/v2/species/details/"
PERENUAL_API_KEY = "sk-CIqR67f7e96325b299717"

ids_in_details_db = plant_details_df['id'].tolist()
ids_in_list_db = plant_list_df['id'].tolist()

missing_ids = list(set(ids_in_list_db) - set(ids_in_details_db))

print(f"Missing ids: {missing_ids}")

data = {}

for id in missing_ids:
    url = f"{PERENUAL_API_URL}{id}?key={PERENUAL_API_KEY}"
    print(f"requesting {url}")

    payload = {}
    headers = {}

    response = requests.request("GET", url, headers=headers, data=payload)

    if response.status_code == 200:
        # get the data
        data = response.json()

        

        row_details = pd.DataFrame([{
            'id': data['id'],
            'common_name': data.get('common_name', "null"),
            'scientific_name': data.get('scientific_name', "null"),
            'family': data.get('family', "null"),
            'origin': data.get('origin', "null"),
            'type': data.get('type', "null"),
            'dimensions': data.get('dimensions', "null"),
            'cycle': data.get('cycle', "null"),
            'watering': data.get('watering', "null"),
            'watering_frequency_value': data.get('watering_frequency_value', "null"),
            'watering_frequency_unit': data.get('watering_frequency_unit', "null"),
            'plant_anatomy': data.get('plant_anatomy', "null"),
            'pruning_month': data.get('pruning_month', "null"),
            'pruning_count': data.get('pruning_count', "null"),
            'pruning_interval': data.get('pruning_interval', "null"),
            'seeds': data.get('seeds', "null"),
            'propagation': data.get('propagation', "null"),
            'flowers': data.get('flowers', "null"),
            'flowering_season': data.get('flowering_season', "null"),
            'sunlight': data.get('sunlight', "null"),
            'soil': data.get('soil', "null"),
            'pest_susceptibility': data.get('pest_susceptibility', "null"),
            'cones': data.get('cones', "null"),
            'fruits': data.get('fruits', "null"),
            'edible_fruit': data.get('edible_fruit', "null"),
            'fruiting_season': data.get('fruiting_season', "null"),
            'harvest_season': data.get('harvest_season', "null"),
            'harvest_method': data.get('harvest_method', "null"),
            'leaf': data.get('leaf', "null"),
            'edible_leaf': data.get('edible_leaf', "null"),
            'growth_rate': data.get('growth_rate', "null"),
            'maintenance': data.get('maintenance', "null"),
            'medicinal': data.get('medicinal', "null"),
            'poisonous_to_humans': data.get('poisonous_to_humans', "null"),
            'poisonous_to_pets': data.get('poisonous_to_pets', "null"),
            'drought_tolerant': data.get('drought_tolerant', "null"),
            'salt_tolerant': data.get('salt_tolerant', "null"),
            'thorny': data.get('thorny', "null"),
            'invasive': data.get('invasive', "null"),
            'rare': data.get('rare', "null"),
            'tropical': data.get('tropical', "null"),
            'cuisine': data.get('cuisine', "null"),
            'indoor': data.get('indoor', "null"),
            'care_level': data.get('care_level', "null"),
            'description': data.get('description', "null"),
            'xWateringPeriod': data.get('xWateringPeriod', "null"),
            'xWateringBasedTemperature': data.get('xWateringBasedTemperature', "null"),
            'xWateringPhLevel': data.get('xWateringPhLevel', "null"),
            'xSunlightDuration': data.get('xSunlightDuration', "null")
        }])

        print(plant_details_df.columns[plant_details_df.columns.duplicated()])
        print(row_details.columns[row_details.columns.duplicated()])

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            plant_details_df = pd.concat([plant_details_df, row_details], ignore_index=True)

    elif response.status_code == 429:
        print("Max API requests reached. Try again tomorrow.")
        break

    else:
        print("Some error occurred.")
        break

# save the dataframes to .csv files
print("Saving dataframe to .csv files...")
plant_details_df.to_csv(DB_DETAILS_PATH, index=False)
print("Dataframe saved.")


Missing ids: [1024, 1025, 1031, 2568, 1036, 1038, 2244, 540, 543, 546, 549, 551, 552, 1597, 1603, 1130, 1133, 625, 626, 628, 1147, 1149, 1150, 2193, 667, 1192, 1195, 1196, 1197, 1198, 1199, 1203, 1716, 1208, 1209, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 2242, 710, 711, 712, 713, 714, 715, 1222, 717, 1223, 1224, 1226, 721, 2773, 2774, 727, 728, 2263, 2775, 2272, 2274, 2275, 747, 2288, 2289, 2290, 2294, 2295, 1272, 2829, 2322, 2323, 1820, 1821, 1822, 2354, 1845, 1846, 1847, 1848, 1855, 2885, 1864, 2891, 1868, 1873, 855, 856, 861, 1891, 2915, 2954, 2955, 2956, 2957, 2958, 2959, 2961, 2962, 2963, 2965, 2976, 2468, 425, 426, 427, 428, 1457, 434, 1971, 1469, 1470, 1471, 2498, 2501, 1993, 1999, 2000, 2528, 2529, 2530, 2531, 2532, 2533, 1001, 2030, 1023]
requesting https://perenual.com/api/v2/species/details/1024?key=sk-CIqR67f7e96325b299717
Index([], dtype='object')
Index([], dtype='object')
requesting https://perenual.com/api/v2/species/details/1025?key=sk-CIqR67f7e96325b

NameError: name 'current_page' is not defined

In [36]:
# save the dataframes to .csv files
print("Saving dataframe to .csv files...")
plant_details_df.to_csv(DB_DETAILS_PATH, index=False)
print("Dataframe saved.")

Saving dataframe to .csv files...
Dataframe saved.
