In [1]:
import pandas as pd
import numpy as np
import glob
import os
import datetime
import re
from datetime import datetime

# %cd /Volumes/SEB_USB/Season_10/

In [2]:
def get_date_from_string(text):
    match = re.search(r'\d{4}-\d{2}-\d{2}', text)
    date = datetime.strptime(match.group(), '%Y-%m-%d').date()
    return date


# --------------------------------------------------
def get_ps2_dataframe(csv_path):
    df_list = []

    for csv in glob.glob(csv_path):
        date_df = pd.read_csv(csv)
        date = get_date_from_string(os.path.basename(csv))
        date_df['date'] = pd.to_datetime(date)
        df_list.append(date_df)  

    ps2_df = pd.concat(df_list).drop('Unnamed: 0', axis=1)

    return ps2_df


# --------------------------------------------------
def clean_plot_column_rgb_flir(rgb_flir_merged):
    
    rgb_flir_merged['plot'] = rgb_flir_merged['plot'].str.split('_', expand=True)[6].astype(str).str.zfill(2) + rgb_flir_merged['plot'].str.split('_', expand=True)[8].astype(str).str.zfill(2)

    return rgb_flir_merged


# --------------------------------------------------
def clean_plot_column_ps2(ps2_df):

    ps2_df['plot'] = ps2_df['Plot'].str.split(' ', expand=True)[6].astype(str).str.zfill(2) + ps2_df['Plot'].str.split(' ', expand=True)[8].astype(str).str.zfill(2)
    ps2_df = ps2_df.drop('Plot', axis=1).set_index('plot')

    return ps2_df


# --------------------------------------------------
def get_3d_dataframe(csv_path):

    df_list = []

    for csv in glob.glob(csv_path):
        date_df = pd.read_csv(csv)
        date = get_date_from_string(os.path.basename(csv))
        date_df['date'] = pd.to_datetime(date)
        df_list.append(date_df) 

    three_df = pd.concat(df_list).drop('Unnamed: 0', axis=1)

    return three_df


# --------------------------------------------------
def add_fieldbook_information(df, fb_path):

    fb = pd.read_csv(fb_path, dtype='str')
    fb = fb.set_index('plot')

    if 'plant_name' in df.columns.unique().to_list():
        df = fb.join(df).reset_index().set_index(['date', 'plot', 'year', 'experiment', 'field', 'treatment', 'rep', 'range', 'column', 'genotype', 'plant_name'])
    else:
        df = fb.join(df).reset_index().set_index(['date', 'plot', 'year', 'experiment', 'field', 'treatment', 'rep', 'range', 'column', 'genotype'])

    return df

# RGB & FLIR

### Open data

In [3]:
rgb = pd.read_csv('Haiquan/stereoTop_full_season_clustering.csv').drop(['treatment', 'genotype'], axis=1)
rgb = clean_plot_column_rgb_flir(rgb)

thermal = pd.read_csv('Haiquan/s10_flir_rgb_clustering_v4.csv').drop(['min_x', 'max_x', 'min_y', 'max_y'], axis=1).drop(['index', 'treatment', 'genotype', 'bounding_area_m2', 'double_lettuce'], axis=1)
thermal = clean_plot_column_rgb_flir(thermal)

### Convert date string to datetime

In [4]:
thermal['date'] = pd.to_datetime(thermal['date'])
rgb['date'] = pd.to_datetime(rgb['date'])

### Set shared index

In [5]:
rgb = rgb.set_index('plot')
thermal = thermal.set_index('plot')

### Add fieldbook information

In [6]:
rgb = add_fieldbook_information(rgb, 'lettuce_field_book.csv')
thermal = add_fieldbook_information(thermal, 'lettuce_field_book.csv')


### Join RGB and FLIR

In [7]:
rgb_flir_merged = rgb.join(thermal).dropna(subset=['median', 'bounding_area_m2'], how='all')

# RGB|FLIR & PSII

### Open PSII data

In [8]:
ps2_df = get_ps2_dataframe('./Haiquan/PS2/*.csv')
ps2_df = clean_plot_column_ps2(ps2_df)

### Add fieldbook information

In [9]:
ps2_df = add_fieldbook_information(ps2_df, 'lettuce_field_book.csv')

### Merge RGB|FLIR and PSII

In [10]:
merged = rgb_flir_merged.join(ps2_df, how='outer').reset_index().dropna(subset=['bounding_area_m2', 'median', 'FV/FM'], how='all')

### Get individual plant mapping values

In [11]:
individual_plant_map = merged.drop_duplicates(subset='plant_name')[['plot', 'year', 'experiment', 'field', 'treatment', 'rep', 'range', 'column', 'genotype', 'plant_name']].set_index('plant_name')

# RGB|FLIR|PSII & 3D

### Open 3D data

In [12]:
three_df = get_3d_dataframe('tda_volumes_full_resolution/*.csv')
three_df = three_df.set_index('plant_name')
three_df = individual_plant_map.join(three_df).reset_index()

### Merge RGB|FLIR|PSII and 3D

In [15]:
final_merged = merged.set_index(['date', 'plant_name', 'plot', 'year', 'experiment', 'field', 'treatment', 'rep', 'range', 'column', 'genotype'])\
    .join(three_df.set_index(['date', 'plant_name', 'plot', 'year', 'experiment', 'field', 'treatment', 'rep', 'range', 'column', 'genotype']), how='outer').reset_index()\
        .dropna(subset=['bounding_area_m2', 'median', 'FV/FM', 'hull_volume'], how='all')

### Save final merged file

In [17]:
final_merged.to_csv('season10_lettuce_gantry_data_full.csv')