In [1]:
import requests
import pandas as pd
import json
import warnings

import os

In [2]:
# load .csv files to pandas dataframe
CUR_DIR = os.getcwd()
DB_LIST_PATH = os.path.join(CUR_DIR, 'filtered_plant_list.csv')


# check if file exists and load it, else create a new dataframe
if os.path.exists(DB_LIST_PATH):
    plant_list_df = pd.read_csv(DB_LIST_PATH)
    # print number of rows
    print(f"Loaded {len(plant_list_df)} entries from {DB_LIST_PATH}.")
else:
    print("No plant list found.")

Loaded 132 entries from c:\Users\steph\OneDrive\Studium_Uni_Tuebingen\Master Informatik\01_SCMU\Project\filtered_plant_list.csv.


In [5]:
DF_COLUMNS = [
    'id',
    'common_name',
    'scientific_name',
    'other_name',
    'family',
    'species_epithet',
    'genus',
    'origin',
    'type',
    'cycle',
    'propagation',
    'hardiness_min',
    'hardiness_max',
    'watering',
    'sunlight',
    'pruning_month',
    'maintenance',
    'growth_rate',
    'drought_tolerant',
    'salt_tolerant',
    'thorny',
    'invasive',
    'tropical',
    'care_level',
    'flowers',
    'cones',
    'fruits',
    'edible_fruit',
    'leaf',
    'edible_leaf',
    'cuisine',
    'medicinal',
    'poisonous_to_humans',
    'poisonous_to_pets',
    'description'
]

In [11]:
ids = plant_list_df['id'].tolist()

merged_df = pd.DataFrame(columns=DF_COLUMNS)


ids_without_details = []

for id in ids:
    # check if a json file exists for the id
    json_file_path = os.path.join(CUR_DIR, 'plant_details', f'{id}.json')

    if not os.path.exists(json_file_path):
        # if not, add the id to the list
        ids_without_details.append(id)
        continue
    else:
        # read the json file
        with open(json_file_path, 'r') as f:
            data = json.load(f)

        # create a new row for the dataframe
        row = pd.DataFrame([{
            'id': id,
            'common_name': data.get('common_name', 'null'),
            'scientific_name': data.get('scientific_name', 'null'),
            'other_name': data.get('other_name', 'null'),
            'family': data.get('family', 'null'),
            'species_epithet': data.get('species_epithet', 'null'),
            'genus': data.get('genus', 'null'),
            'origin': data.get('origin', 'null'),
            'type': data.get('type', 'null'),
            'cycle': data.get('cycle', 'null'),
            'propagation': data.get('propagation', 'null'),
            'hardiness_min': data["hardiness"].get('min', 'null'),
            'hardiness_max': data["hardiness"].get('max', 'null'),
            'watering': data.get('watering', 'null'),
            'sunlight': data.get('sunlight', 'null'),
            'pruning_month': data.get('pruning_month', 'null'),
            'maintenance': data.get('maintenance', 'null'),
            'growth_rate': data.get('growth_rate', 'null'),
            'drought_tolerant': data.get('drought_tolerant', 'null'),
            'salt_tolerant': data.get('salt_tolerant', 'null'),
            'thorny': data.get('thorny', 'null'),
            'invasive': data.get('invasive', 'null'),
            'tropical': data.get('tropical', 'null'),
            'care_level': data.get('care_level', 'null'),
            'flowers': data.get('flowers', 'null'),
            'cones': data.get('cones', 'null'),
            'fruits': data.get('fruits', 'null'),
            'edible_fruit': data.get('edible_fruit', 'null'),
            'leaf': data.get('leaf', 'null'),
            'edible_leaf': data.get('edible_leaf', 'null'),
            'cuisine': data.get('cuisine', 'null'),
            'medicinal': data.get('medicinal', 'null'),
            'poisonous_to_humans': data.get('poisonous_to_humans', 'null'),
            'poisonous_to_pets': data.get('poisonous_to_pets', 'null'),
            'description': data.get('description', 'null')
        }])
        # append the row to the dataframe
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            plant_details_df = pd.concat([merged_df, row], ignore_index=True)
        break

# print the entire first row of the dataframe
print(plant_details_df.iloc[0])

id                                                                   425
common_name                                              flowering-maple
scientific_name                                      [Abutilon hybridum]
other_name                                                            []
family                                                         Malvaceae
species_epithet                                                 hybridum
genus                                                           Abutilon
origin                                                   [Garden origin]
type                                                 Broadleaf evergreen
cycle                                                          Perennial
propagation            [Stem Propagation, Root Cutting, Air Layering ...
hardiness_min                                                          9
hardiness_max                                                         10
watering                                           