# Plant phenotyping (sideview) - single-file notebook
This notebook reproduces the main processing pipeline of the repository in a stepwise, interactive way (no workers).
It: reads `config.json`, creates `metadata.csv` if missing, segments plants, analyses shape & colour, saves per-plant `parameters.csv` and optionally creates growth videos.

Notes: make sure `config.json` paths are correct and you have installed dependencies (see next cell).

In [None]:
# Install requirements (uncomment and run if needed)
# !pip install -r requirements.txt
# If plantcv fails via pip, follow PlantCV installation instructions for your platform.

In [None]:
import json
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display

DEFAULT_PATH = Path("config.default.json")
LOCAL_PATH = Path("config.local.json")

# Load merged config (as dict)
def load_cfg():
    cfg = {}
    if DEFAULT_PATH.exists():
        cfg.update(json.loads(DEFAULT_PATH.read_text()))
    if LOCAL_PATH.exists():
        cfg.update(json.loads(LOCAL_PATH.read_text()))
    return cfg

cfg = load_cfg()

# Example fields from the config (adjust names/types to your config)
blur_slider = widgets.FloatSlider(value=cfg.get("blur_sigma", 1.0), min=0.0, max=10.0, step=0.1, description='blur_sigma')
threshold_slider = widgets.IntSlider(value=cfg.get("threshold", 128), min=0, max=255, step=1, description='threshold')
min_area_text = widgets.IntText(value=cfg.get("min_area", 100), description='min_area')
use_morph_toggle = widgets.Checkbox(value=cfg.get("use_morphology", True), description='use_morphology')

save_button = widgets.Button(description="Save config.local.json", button_style='success')

def on_save_clicked(b):
    new_cfg = {
        "blur_sigma": float(blur_slider.value),
        "threshold": int(threshold_slider.value),
        "min_area": int(min_area_text.value),
        "use_morphology": bool(use_morph_toggle.value),
    }
    LOCAL_PATH.write_text(json.dumps(new_cfg, indent=2))
    print(f"Saved to {LOCAL_PATH}")

save_button.on_click(on_save_clicked)

ui = widgets.VBox([blur_slider, threshold_slider, min_area_text, use_morph_toggle, save_button])
display(ui)

In [None]:
# Load config.json (edit path if necessary)
config_path = 'config.json'
with open(config_path, 'r') as f:
    config = json.load(f)
config

In [None]:
# Small utility functions (adapted from utils.py)
def create_folder(root, foldername):
    folder = os.path.join(root, foldername)
    if not os.path.isdir(folder):
        os.makedirs(folder, exist_ok=True)
    return folder

def load_image(image_path, rotate=None):
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f'Unable to read image: {image_path}')
    if rotate is not None:
        image = cv2.rotate(image, rotate)
    return image

def resize_image(image, width=None, height=None, inter=cv2.INTER_AREA):
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    return cv2.resize(image, dim, interpolation=inter)

def setup_logger(filename='notebook_run'):
    logger = logging.getLogger('plant_notebook')
    logger.setLevel(logging.INFO)
    if not logger.handlers:
        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        formatter = logging.Formatter('[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s')
        ch.setFormatter(formatter)
        logger.addHandler(ch)
    return logger

logger = setup_logger()

In [None]:
# Preprocess: gather metadata.json files into metadata.csv (adapted from preprocess.py)
import re
def atof(text):
    try:
        return float(text)
    except ValueError:
        return text

def natural_keys(text):
    return [ atof(c) for c in re.split(r'[+-]?([0-9]+(?:[.][0-9]*)?|[.][0-9]+)', text) ]

def gather_metadata_files(config):
    meta_files = sorted(glob.glob(os.path.join(config['data_dir'], '**', '*.json'), recursive=True), key=natural_keys)
    meta_list = []
    for meta_filename in meta_files:
        try:
            with open(meta_filename) as f:
                data_dict = json.load(f)
            data_dict['FilePath'] = os.path.dirname(meta_filename)
            meta_list.append(data_dict)
        except Exception as e:
            logger.info(f'Failed to load {meta_filename}: {e}')
    meta_df = pd.json_normalize(meta_list)
    if 'Datetime' in meta_df.columns:
        meta_df['Datetime'] = pd.to_datetime(meta_df['Datetime'], format='%Y-%m-%d, %H:%M:%S.%f', errors='coerce')
    meta_df.rename(columns={'Extra.Angle': 'Angle', 'Extra.Height': 'Height', 'Extra.ConfigFile': 'ConfigFile'}, inplace=True)
    out_path = config['save_dir']
    os.makedirs(out_path, exist_ok=True)
    meta_df.to_csv(os.path.join(out_path, 'metadata.csv'), index=False)
    return meta_df

# If metadata.csv not present, create it
meta_csv = os.path.join(config['save_dir'], 'metadata.csv')
if os.path.exists(meta_csv):
    meta_df = pd.read_csv(meta_csv)
    meta_df['Datetime'] = pd.to_datetime(meta_df['Datetime'], errors='coerce')
else:
    logger.info('metadata.csv not found, creating it by scanning metadata.json files...')
    meta_df = gather_metadata_files(config)

# Basic sorting and columns expected by the pipeline
meta_df['Datetime'] = pd.to_datetime(meta_df['Datetime'], errors='coerce')
meta_df['Date'] = meta_df['Datetime'].dt.date
meta_df.sort_values(by='Datetime', inplace=True)
meta_df.head()

In [None]:
# SegmentPlant class (adapted from segment_plant.py)
import numpy as np
from skimage.measure import LineModelND, ransac
class SegmentPlant:
    def __init__(self, image):
        self.image = image
        self.orig_image = image.copy()
        self.stick_type = 'none'
        self.stick_pixels = 0
        self.pot_pixels = 0
        self.start_row_plant = 0

    def white_balance_patch(self, patch):
        rec_x0, rec_y0, rec_x1, rec_y1 = patch
        img_white_patch = self.image[rec_y0:rec_y1, rec_x0:rec_x1,:]
        img_wb = (1.0*self.image / img_white_patch.mean(axis=(0,1))).astype('float32')
        img_wb /= img_wb.max()
        self.image = (255*img_wb).astype('uint8')

    def mask_hsv_threshold(self, plant_value_thr=70, plant_saturation_thr=20):
        self.img_hsv = cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)
        mask_low_value = self.img_hsv[:,:,2] < plant_value_thr
        mask_high_saturation = self.img_hsv[:,:,1] > plant_saturation_thr
        mask_hsv = 255*(mask_low_value | mask_high_saturation).astype('uint8')
        strel = np.ones((3,3),np.uint8)
        mask_hsv = cv2.morphologyEx(cv2.morphologyEx(mask_hsv, cv2.MORPH_OPEN, strel), cv2.MORPH_CLOSE, strel)
        img_masked = cv2.bitwise_and(self.image, self.image, mask=mask_hsv)
        img_masked[mask_hsv==0] = (255,255,255)
        mask = cv2.inRange(img_masked, (0,0,0), (200,200,200))
        return mask

    def select_plant_coco(self, mask, min_coco_size = 10000, coco_from_bottom = 20, coco_from_center = 500):
        coco_n, coco_label_map, coco_stats, coco_centroids = cv2.connectedComponentsWithStats(mask, connectivity=8)
        coco_select = []
        for coco_i in range(1,coco_n):
            if coco_stats[coco_i][cv2.CC_STAT_AREA] > min_coco_size:
                bbox_ymax = coco_stats[coco_i][cv2.CC_STAT_TOP] + coco_stats[coco_i][cv2.CC_STAT_HEIGHT]
                bbox_mean_x = coco_stats[coco_i][cv2.CC_STAT_LEFT] + coco_stats[coco_i][cv2.CC_STAT_WIDTH]/2
                image_mean_x = self.image.shape[1]/2
                image_height = self.image.shape[0]
                if (bbox_ymax > image_height-coco_from_bottom and abs(bbox_mean_x - image_mean_x) < coco_from_center):
                    coco_select.append(coco_i)
        if len(coco_select)==1:
            starting_row = coco_stats[coco_select[0]][cv2.CC_STAT_TOP]
            coco_mask = 255*(coco_label_map == coco_select[0]).astype('uint8')
        else:
            coco_mask = mask
            starting_row = 0
        return coco_mask, starting_row

    def remove_stick(self, mask, stick_hue_thr=20, stick_min_nr_pixels=50000, stick_ransac_point=1000, starting_row=0):
        num_pixels = []
        for i in range(max(0, starting_row-2), mask.shape[0]):
            row_plant_pixels = [j for j in range(mask.shape[1]) if mask[i,j]==255]
            if len(num_pixels) > 20 and len(row_plant_pixels) > 1.5 * np.mean(np.asarray(num_pixels)):
                self.start_row_plant = i
                break
            if len(row_plant_pixels) > 0:
                num_pixels.append(len(row_plant_pixels))
        if len(num_pixels) > 40:
            stick_thickness = np.mean(np.asarray(num_pixels))
            mask_low_hue = (self.img_hsv[:,:,0] < stick_hue_thr).astype('uint8')
            mask_stick_color = mask & mask_low_hue
            strel = np.ones((5,5),np.uint8)
            mask_stick_color_clean = cv2.morphologyEx(cv2.morphologyEx(mask_stick_color, cv2.MORPH_OPEN, strel), cv2.MORPH_CLOSE, strel)
            all_stick_color_points = np.argwhere(mask_stick_color_clean)[:,[1,0]]
            if len(all_stick_color_points) > stick_min_nr_pixels:
                self.stick_type = 'bamboo'
                ransac_samples = min(stick_ransac_point, all_stick_color_points.shape[0])
                data_ids = np.linspace(0,all_stick_color_points.shape[0]-1,num=ransac_samples).astype('int')
                stick_color_points = all_stick_color_points[data_ids]
                model_robust, inliers = ransac(stick_color_points, LineModelND, min_samples=2, residual_threshold=int(stick_thickness*1.3)/2, max_trials=1000)
                line_x = [0, self.image.shape[1]]
                line_y = model_robust.predict_y(line_x).astype('int')
                mask_stick_line = np.zeros(self.image.shape[0:2], dtype='uint8')
                cv2.line(mask_stick_line, (line_x[0], line_y[0]), (line_x[1], line_y[1]), (255), int(stick_thickness*1.3))
                mask_stick = 255*(mask_stick_line & mask & mask_stick_color).astype('uint8')
                strel = np.ones((5,5),np.uint8)
                mask_stick = cv2.morphologyEx(mask_stick, cv2.MORPH_DILATE, strel)
                mask_without_stick = cv2.subtract(mask,mask_stick)
                filtered_mask = np.zeros(mask_without_stick.shape[:2], np.uint8)
                filtered_mask[self.start_row_plant-5:, :] = mask_without_stick[self.start_row_plant-5:, :]
                return filtered_mask
            else:
                self.stick_type = 'black'
                self.stick_pixels = np.sum(np.asarray(num_pixels))
                return mask
        else:
            return mask

    def calculate_pot_pixels(self, mask, pot_mask, plant_hue_range):
        pot_masked = cv2.bitwise_and(mask, mask, mask=pot_mask)
        min_hue = self.img_hsv[:,:,0] < plant_hue_range[0]
        max_hue = self.img_hsv[:,:,0] > plant_hue_range[1]
        plant_on_pot = min_hue | max_hue
        pot_mask = pot_masked & plant_on_pot
        self.pot_pixels = cv2.countNonZero(pot_mask)

In [None]:
# Analysis functions (from analyse_plant.py)
def analyse_shape(image, plant_mask, plant_max_height, plant_origin, stick_pixels=0, stick_type='none', pot_pixels=0, pixel_to_mm=230/516):
    plant_img = cv2.bitwise_and(image, image, mask=plant_mask)
    plant_img[plant_mask==0] = (255,255,255)
    if pcv is None:
        raise ImportError('plantcv is required for full analyse_shape functionality')
    id_objects, obj_hierarchy = pcv.find_objects(img=plant_img, mask=plant_mask)
    roi, roi_hierarchy= pcv.roi.rectangle(img=plant_img, x=0, y=plant_max_height, h=plant_mask.shape[0]-plant_max_height, w=plant_mask.shape[1])
    if stick_type == 'black' and cv2.countNonZero(plant_mask) > 200000:
        roi_type = 'cutto'
    else:
        roi_type = 'partial'
    roi_objects, hierarchy3, kept_mask, obj_area = pcv.roi_objects(img=plant_img, roi_contour=roi, roi_hierarchy=roi_hierarchy, object_contour=id_objects, obj_hierarchy=obj_hierarchy, roi_type=roi_type)
    obj, mask = pcv.object_composition(img=plant_img, contours=roi_objects, hierarchy=hierarchy3)
    analysis_img = np.copy(plant_img)
    ix, iy, iz = np.shape(plant_img)
    background = np.zeros((ix, iy, 3), dtype=np.uint8)
    background1 = np.zeros((ix, iy), dtype=np.uint8)
    background2 = np.zeros((ix, iy), dtype=np.uint8)
    hull = cv2.convexHull(obj)
    m = cv2.moments(plant_mask, binaryImage=True)
    area = m['m00'] - pot_pixels
    hull_area = cv2.contourArea(hull) - pot_pixels
    solidity = area / hull_area if hull_area != 0 else 0
    perimeter = cv2.arcLength(obj, closed=True)
    x, y, width, height = cv2.boundingRect(obj)
    height -= (plant_img.shape[0] - plant_origin[1])
    cmx, cmy = (float(m['m10'] / m['m00']), float(m['m01'] / m['m00']))
    center, axes, angle = cv2.fitEllipse(obj)
    major_axis = np.argmax(axes)
    minor_axis = 1 - major_axis
    major_axis_length = float(axes[major_axis])
    minor_axis_length = float(axes[minor_axis])
    eccentricity = float(np.sqrt(1 - (axes[minor_axis] / axes[major_axis]) ** 2))
    # caliper (longest path) calculation omitted here for brevity, keep as zero if not computed
    caliper_length = 0
    cv2.drawContours(analysis_img, [hull], -1, (255, 0, 255), 5)
    cv2.line(analysis_img, (x, y), (x + width, y), (255, 0, 255), 5)
    cv2.circle(analysis_img, plant_origin, 20, (255, 0, 255), -1)
    shape = {
        'area': area * (pixel_to_mm**2) / 100,
        'convex_hull_area': hull_area * (pixel_to_mm**2) / 100,
        'solidity': solidity,
        'perimeter': perimeter * pixel_to_mm / 10,
        'width': width * pixel_to_mm,
        'height': height * pixel_to_mm,
        'longest_path': caliper_length * pixel_to_mm,
        'center_of_mass_x': cmx,
        'center_of_mass_y': cmy,
        'convex_hull_vertices': len(hull),
        'ellipse_center_x': center[0],
        'ellipse_center_y': center[1],
        'ellipse_major_axis': major_axis_length * pixel_to_mm,
        'ellipse_minor_axis': minor_axis_length * pixel_to_mm,
        'ellipse_major_angle': float(angle),
        'ellipse_eccentricity': float(eccentricity)
    }
    return plant_img, analysis_img, shape

def analyse_colour(image, plant_mask, colourspaces='all'):
    if pcv is None:
        return None
    colour_histogram = pcv.analyze_color(rgb_img=image, mask=plant_mask, colorspaces=colourspaces, label='plant')
    return colour_histogram

In [None]:
# Postprocess class (adapted from postprocess.py)
class PostProcessResults:
    def __init__(self, results_folder):
        self.results_folder = results_folder
    def combine_csvs(self):
        result_files = sorted(glob.glob(os.path.join(self.results_folder, '*', 'parameters.csv')))
        all_plants_df = pd.concat(map(lambda file: pd.read_csv(file), result_files))
        all_plants_df['datetime'] = pd.to_datetime(all_plants_df['datetime'], errors='coerce')
        all_plants_df['date'] = all_plants_df['datetime'].dt.date
        all_plants_df.to_csv(os.path.join(self.results_folder, 'all_plants.csv'), index=False)
        return all_plants_df
    def summary_values(self):
        all_plants = os.path.join(self.results_folder, 'all_plants.csv')
        if not os.path.exists(all_plants):
            self.combine_csvs()
        df = pd.read_csv(all_plants)
        df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')
        # further summarization can be added here
        return df

In [None]:
# Helper: process one scanning date (adapted from worker.process_scanning_date)
def process_scanning_date(date_df, config, segmented_save_folder=None, error_folder=None):
    analysis_images = []
    date_dict_list = []
    for _, row in date_df.iterrows():
        image_files = glob.glob(os.path.join(row['FilePath'], 'Data', '*.png'))
        if len(image_files) < 1:
            logger.info(f'No images found in {row[
 ]}, scan skipped')
            continue
        image_file = image_files[0]
        try:
            img_bgr = cv2.transpose(cv2.imread(image_file))
        except Exception as e:
            logger.info(f'Unable to read {image_file}, skipping: {e}')
            continue
        SP = SegmentPlant(img_bgr)
        SP.white_balance_patch(config['segmentation_pars']['wb_patch'])
        mask_plant_stick = SP.mask_hsv_threshold(config['segmentation_pars']['plant_value_thr'], config['segmentation_pars']['plant_saturation_thr'])
        coco_mask, starting_row = SP.select_plant_coco(mask_plant_stick, config['segmentation_pars']['min_coco_size'], config['segmentation_pars']['coco_from_bottom'], config['segmentation_pars']['coco_from_center'])
        plant_mask = SP.remove_stick(coco_mask, config['segmentation_pars']['stick_hue_thr'], config['segmentation_pars']['stick_min_nr_pixels'], config['segmentation_pars']['stick_ransac_point'], starting_row=starting_row)
        pot_mask_path = config['segmentation_pars'].get('pot_mask', None)
        if pot_mask_path and os.path.exists(pot_mask_path):
            pot_mask = cv2.imread(pot_mask_path, 0)
            try:
                SP.calculate_pot_pixels(plant_mask, pot_mask, config['segmentation_pars']['hue_range_pop'])
            except Exception as e:
                logger.info('Pot mask mismatch or error when calculating pot pixels: %s' % e)
        try:
            plant_img, analysis_img, shape = analyse_shape(image=SP.image, plant_mask=plant_mask, plant_max_height=SP.start_row_plant, plant_origin=config['segmentation_pars']['plant_origin'], stick_pixels=SP.stick_pixels, stick_type=SP.stick_type, pot_pixels=SP.pot_pixels, pixel_to_mm=config['pixel_to_mm'])
        except Exception as e:
            logger.info(f'Error analysing {image_file}: {e}')
            continue
        if segmented_save_folder is not None:
            img_name = f

            cv2.imwrite(os.path.join(segmented_save_folder, img_name), plant_img)
        output_trait_dict = {'plant_id': row['PlantId'], 'datetime': row['Datetime'], 'angle': row['Angle'], **shape}
        analysis_img = resize_image(analysis_img, height=370)
        cv2.putText(analysis_img, f'angle: {row[
]/10}', (4, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,0), 1, cv2.LINE_AA)
        analysis_images.append(analysis_img)
        date_dict_list.append(output_trait_dict)
    return date_dict_list, analysis_images

In [None]:
# Driver cell: process all plants (single-process).
# WARNING: this can take significant time depending on dataset size.
save_root = config['save_dir']
os.makedirs(save_root, exist_ok=True)
error_folder = create_folder(save_root, 'errors') if config.get('save_errors', True) else None
plants = meta_df['PlantId'].unique()
for plant_id in plants:
    if config.get('individual_plants_mode', False) and plant_id not in config.get('selected_plants', []):
        continue
    plant_df = meta_df.loc[meta_df['PlantId'] == plant_id]
    save_folder = create_folder(save_root, plant_id)
    segmented_save_folder = create_folder(save_folder, 'segmented_plants') if config.get('save_segmented_plants', False) else None
    one_plant_param_list = []
    planting_date = plant_df['Date'].min()
    for current_date in plant_df['Date'].unique():
        date_df = plant_df.loc[plant_df['Date'] == current_date]
        date_dict_list, analysis_images = process_scanning_date(date_df, config, segmented_save_folder, error_folder)
        if len(date_dict_list) == 0:
            continue
        output_date_df = pd.DataFrame(date_dict_list)
        one_plant_param_list.append(output_date_df)
    if len(one_plant_param_list) == 0:
        logger.info(f'No data for plant {plant_id}, skipping save')
        continue
    one_plant_df = pd.concat(one_plant_param_list)
    one_plant_df.to_csv(os.path.join(save_folder, 'parameters.csv'), index=False)
    logger.info(f'Finished {plant_id}, saved parameters to {os.path.join(save_folder, 'parameters.csv')}')

# Optional: gather results
if config.get('gather_results', True):
    pp = PostProcessResults(save_root)
    pp.combine_csvs()
    logger.info('Postprocessing complete')

## How to run
1. Inspect `config.json` and update `data_dir` and `save_dir` paths.
2. In a terminal or inside the first notebook cell, install requirements if you haven't: `pip install -r requirements.txt`.
3. Open this notebook in Jupyter and run cells top-to-bottom.

Notes:
- The notebook is single-process and easier to debug. For large datasets consider re-enabling multiprocessing outside the notebook.
- PlantCV is required for full `analyse_shape` functionality; if missing the notebook will raise an ImportError at that step.