In [1]:
import os
from pathlib import Path
import dateutil.parser as dparser
import pandas as pd
import numpy as np
import datetime
import tomllib

In [28]:
pd.set_option('display.width', 200)
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.max_rows', 60)
pd.set_option('display.min_rows', 20)

# pd.set_option("display.max_rows", None)

with open('./dataset_locations.toml', 'rb') as f:
    config = tomllib.load(f)

In [23]:
var_to_letter = {
    # uppercase
    'Haines': 'A',
    'CNJ05-64-9': 'B',
    'CNJ05-73-39': 'C',
    'CNJ05-80-2': 'D',
    'CNJ06-22-10': 'E',
    'CNJ06-3-1': 'F',
    'CNJ12-30-24': 'G',
    'CNJ14-31-142': 'H',
    # lowercase
    'haines': 'A',
    'cnj05-64-9': 'B',
    'cnj05-73-39': 'C',
    'cnj05-80-2': 'D',
    'cnj06-22-10': 'E',
    'cnj06-3-1': 'F',
    'cnj12-30-24': 'G',
    'cnj14-31-142': 'H',
    # with underscore, no leading zeroes
    'Haines': 'A',
    'CNJ_5_64_9': 'B',
    'CNJ_5_73_39': 'C',
    'CNJ_5_80_2': 'D',
    'CNJ_6_22_10': 'E',
    'CNJ_6_3_1': 'F',
    'CNJ_12_30_24': 'G',
    'CNJ_14_31_142': 'H',
    # berrywise only
    'CNJ_12_20_24': 'I'
}

In [40]:
bog_2_root = config['bog_2_daily_photos_root']
varieties = [
    '/CNJ05-64-9',
    '/CNJ05-73-39',
    '/CNJ05-80-2',
    '/CNJ06-22-10',
    '/CNJ06-3-1',
    '/CNJ14-31-142',
    '/CNJ12-30-24',
    '/Haines'
]

def get_bog_2_df():
    bog_2_df = pd.DataFrame(columns=['filename', 'rel_path', 'plot', 'date', 'fungicide', 'file_type'])

    for variety in varieties:
        paths = list(Path(bog_2_root+variety).iterdir())
        for path in paths:
            if not path.is_file():
                    continue
            filename = path.parts[-1].lower()
            rel_path = str(Path(*path.parts[-2:]))

            if 'trt' in filename:
                fungicide = 'treatment'
            elif 'ctrl' in filename or 'control' in filename:
                fungicide = 'control'
            else:
                fungicide = None

            if 'cr2' in filename:
                file_type = 'cr2'
            if 'jpg' in filename:
                file_type = 'jpg'

            date_str = filename
            for substring in ['.', 'jpg', 'ctrl', 'control', 'trt1', 'trt', 'cr2', 'jpg']:
                date_str = date_str.replace(substring, '')
            date_str = ' '.join(date_str.replace('-', '$').replace('_', '$').split('$')[-3:])

            date = dparser.parse(date_str, fuzzy=True).date()

            plot = variety.lower()[1:]

            bog_2_df.loc[len(bog_2_df)] = [filename, rel_path, plot, date, fungicide, file_type]

    return bog_2_df

if __name__ == '__main__':
    bog_2_df = get_bog_2_df()
    # print('num control pics:', bog_2_df[(bog_2_df.file_type == 'jpg') & (bog_2_df.fungicide == 'control')].shape[0])
    # print('num trt pics:', bog_2_df[(bog_2_df.file_type == 'jpg') & (bog_2_df.fungicide == 'treatment')].shape[0])
    # display(bog_2_df)

    unique_dates = bog_2_df['date'].unique()
    images_per_date = [len(bog_2_df[bog_2_df['date'] == d].index) for d in unique_dates]

    # print(f'{len(unique_dates)=}')
    # print(f'{images_per_date=}')
    # print(f'{sum(images_per_date)/16=}')
    print(len(bog_2_df))

830


In [27]:
# berry_wise_root = config['berry_wise_root']
# def get_berry_wise_df():
#     berry_wise_df = pd.DataFrame(columns=['filename', 'plot', 'date', 'fungicide', 'file_type', 'is_rotten'])
#     plot_dirs = list(Path(berry_wise_root).iterdir())

#     for plot_dir in Path(berry_wise_root).iterdir():
#         for berry_dir in plot_dir.iterdir():

#             berry_files = list(berry_dir.iterdir())
#             plots = [p.parts[-3].replace('_TRT', '').replace('_CTRL', '') for p in berry_files]
#             dates = [int(p.stem) for p in berry_files]
#             fungicides = ['treatment' if 'TRT' in p.parts[-3] else 'control' for p in berry_files]
#             file_types = ['png' for _ in berry_files]

#             new_rows = pd.DataFrame({
#                 'filename': berry_files,
#                 'plot': plots,
#                 'date': dates,
#                 'fungicide': fungicides,
#                 'file_type': file_types
#             })

#             berry_wise_df = pd.concat([berry_wise_df, new_rows], ignore_index=True)

#     # manually filter out CNJ_6_3_1_TRT because the annotations are inaccurate
#     berry_wise_df = berry_wise_df[~((berry_wise_df['plot']=='CNJ_6_3_1') & (berry_wise_df['fungicide']=='treatment'))]
#     return berry_wise_df

def get_berry_wise_df():
    berry_wise_root = config['berry_wise_root']
    rot_file = Path(berry_wise_root) / "rot.txt"
    rot_dict = {}
    with open(rot_file, 'r') as f:
        for line in f:
            if line.strip():
                key, tracks = line.split(":")
                key = key.strip()
                track_set = {t.strip() for t in tracks.split(",")}
                rot_dict[key] = track_set

    berry_wise_df = pd.DataFrame(columns=['filename', 'plot', 'fungicide', 'track', 'track_unique', 'date', 'file_type', 'is_rotten'])
    for plot_dir in Path(berry_wise_root).iterdir():
        if plot_dir.is_file():
            continue
        raw_key = plot_dir.name
        for berry_dir in plot_dir.iterdir():
            if not berry_dir.is_dir():
                continue
            berry_files = list(berry_dir.iterdir())
            plots = [plot_dir.name.replace('_TRT', '').replace('_CTRL', '') for _ in berry_files]
            dates = [int(p.stem) for p in berry_files]
            fungicides = [('treatment' if 'TRT' in plot_dir.name else 'control') for _ in berry_files]
            file_types = ['png' for _ in berry_files]
            is_rotten_list = [(raw_key in rot_dict and berry_dir.name in rot_dict[raw_key]) for _ in berry_files]
            
            tracks = [int(berry_dir.name) for _ in berry_files]
            tracks_unique = [f'{t}{var_to_letter[p]}_{'ctrl' if f=='control' else 'trt'}' for t,p,f in zip(tracks, plots, fungicides) ]

            new_rows = pd.DataFrame({
                'filename': berry_files,
                'plot': plots,
                'track': tracks,
                'track_unique': tracks_unique,
                'date': dates,
                'fungicide': fungicides,
                'file_type': file_types,
                'is_rotten': is_rotten_list
            })

            berry_wise_df = pd.concat([berry_wise_df, new_rows], ignore_index=True)

    # manually filter out CNJ_6_3_1_TRT because the annotations are inaccurate
    berry_wise_df = berry_wise_df[~((berry_wise_df['plot'] == 'CNJ_6_3_1') & (berry_wise_df['fungicide'] == 'treatment'))]
    return berry_wise_df

if __name__ == "__main__":
    berry_wise_df = get_berry_wise_df()
    display(berry_wise_df)

Unnamed: 0,filename,plot,fungicide,track,track_unique,date,file_type,is_rotten
0,/home/ronan/Documents/Datasets/cranberry/berry_wise/SAM_CROPS/images/masks/CNJ_5_73_39_TRT/2/24.png,CNJ_5_73_39,treatment,2,2C_trt,24,png,True
1,/home/ronan/Documents/Datasets/cranberry/berry_wise/SAM_CROPS/images/masks/CNJ_5_73_39_TRT/2/16.png,CNJ_5_73_39,treatment,2,2C_trt,16,png,True
2,/home/ronan/Documents/Datasets/cranberry/berry_wise/SAM_CROPS/images/masks/CNJ_5_73_39_TRT/2/10.png,CNJ_5_73_39,treatment,2,2C_trt,10,png,True
3,/home/ronan/Documents/Datasets/cranberry/berry_wise/SAM_CROPS/images/masks/CNJ_5_73_39_TRT/2/18.png,CNJ_5_73_39,treatment,2,2C_trt,18,png,True
4,/home/ronan/Documents/Datasets/cranberry/berry_wise/SAM_CROPS/images/masks/CNJ_5_73_39_TRT/2/30.png,CNJ_5_73_39,treatment,2,2C_trt,30,png,True
5,/home/ronan/Documents/Datasets/cranberry/berry_wise/SAM_CROPS/images/masks/CNJ_5_73_39_TRT/2/7.png,CNJ_5_73_39,treatment,2,2C_trt,7,png,True
6,/home/ronan/Documents/Datasets/cranberry/berry_wise/SAM_CROPS/images/masks/CNJ_5_73_39_TRT/2/20.png,CNJ_5_73_39,treatment,2,2C_trt,20,png,True
7,/home/ronan/Documents/Datasets/cranberry/berry_wise/SAM_CROPS/images/masks/CNJ_5_73_39_TRT/2/4.png,CNJ_5_73_39,treatment,2,2C_trt,4,png,True
8,/home/ronan/Documents/Datasets/cranberry/berry_wise/SAM_CROPS/images/masks/CNJ_5_73_39_TRT/2/33.png,CNJ_5_73_39,treatment,2,2C_trt,33,png,True
9,/home/ronan/Documents/Datasets/cranberry/berry_wise/SAM_CROPS/images/masks/CNJ_5_73_39_TRT/2/32.png,CNJ_5_73_39,treatment,2,2C_trt,32,png,True


In [5]:
# ronan_photos_root = config['ronan_photos_root']
# varieties = [
#     '/control/CNJ05-80-2_Control',
#     '/control/CNJ06-22-10_Control',
#     '/control/CNJ06-3-1_Control',
#     '/control/CNJ14-31-142_Control',
#     '/control/CNJHaines_Control',
#     '/treatment/CNJ05-80-2',
#     '/treatment/CNJ06-22-10',
#     '/treatment/CNJ06-3-1',
#     '/treatment/CNJ14-31-142',
#     '/treatment/CNJHaines',
# ]

# ronan_photos_df = pd.DataFrame(columns=['filename', 'rel_path', 'plot', 'date', 'fungicide', 'file_type'])

# for v in varieties:
#     v_dir = Path(ronan_photos_root+v)
#     if not v_dir.is_dir():
#         print(f'{v_dir} does not exist. Skipping.')
#         continue
#     for path in v_dir.rglob("*"):
#         if not path.is_file():
#             continue

#         if 'treatment' in v:
#             fungicide = 'treatment'
#         elif 'control' in v:
#             fungicide = 'control'

#         plot = v.split("/")[-1].replace("Control", '').replace("_", '').lower()

#         date = dparser.parse(path.parts[-2], fuzzy=True).date()

#         filename = path.parts[-1].lower()
#         rel_path = str(Path(*path.parts[-2:]))

#         if 'cr2' in filename:
#             file_type = 'cr2'
#         elif 'cr3' in filename:
#             file_type = 'cr3'
#         elif 'jpg' in filename:
#             file_type = 'jpg'

#         ronan_photos_df.loc[len(ronan_photos_df)] = [filename, rel_path, plot, date, fungicide, file_type]

# if __name__ == '__main__':
#     print('num control pics:', ronan_photos_df[(ronan_photos_df.file_type == 'jpg') & (ronan_photos_df.fungicide == 'control')].shape[0])
#     print('num trt pics:', ronan_photos_df[(ronan_photos_df.file_type == 'jpg') & (ronan_photos_df.fungicide == 'treatment')].shape[0])

In [6]:
# groups = ronan_photos_df.groupby(['plot', 'date', 'fungicide'])

# if __name__ == '__main__':
#     display(groups['filename'].apply(list))