In [1]:
import random
import pandas as pd
import DuckbotExptSetupUtils as exp
import os
import json

### How to use this notebook
 - Enter the details requested in part 1
 - In part 2 each cell will execute a different analysis. Feel free to use and modify based on your own needs.
 
 ##### Assumptions made by this notebook
  - Experimental data are a timeseries of images of duckweed growing in 24 plat wells. Image files are stored in the same directory according to the format "exptname_plate_wellID_yyyymmdd.jpg.
  - Experiment metadata are stored in a .json file, with information on which samples and media are in which wells. 

### Part 1 - Define filenames and paths

In [4]:
#Where is the Json file with the experiment metadata and what is it called?
metadata_dir = '/Users/Orlando/Documents/Github_clones/duckbot/test_data/TestExptDir/'
metadata_filename = 'TestFile.json'
metadata_path = metadata_dir + metadata_filename

#Where are the image files that you want to analyze and what is the experiment ID (should be first part of each image filename)
img_data_dir = "/Users/Orlando/Documents/Github_clones/duckbot/test_data/odl_dataanalaysistest1"
expt_prefix = "TestData"

#Where do you want the figures produced by this script to end up?
output_figure_dir = '/Users/Orlando/Documents/Github_clones/duckbot/test_data/'

### Part 2 - Building a dataframe with percentage duckweed coverage in each image

In [83]:
import pathlib
import re

p = pathlib.Path(img_data_dir)   # current directory, insert your directory here
data_filenames = [x.stem for x in p.glob('*.jpg')]

# for d in data_filenames:
#     plate = re.search('plate(.+?)_well', d).group(1)
#     print(plate)

data_df = pd.DataFrame({"filename": data_filenames})
print(data_df)

def find_plate_well_id(str):
    well_id = re.search('well(.+?)_', str).group(1)
    plate_id = re.search('plate(.+?)_well', str).group(1)
    return(f'Plate_{plate_id}_Well_{well_id}')


data_df['plate_well_id'] = data_df.apply(lambda row: find_plate_well_id(row.filename), axis=1)
# data_df['well_id'] = data_df.apply(lambda row: find_well_id(row.filename), axis=1)
data_df['date'] = data_df.apply(lambda row: row.filename[-10:], axis=1)
# data_df['plate_well_id'] = f'Plate_{data_df['plate_id']}_Well{data_df['well_id']}'
# data_df['plate_well_id'] = data_df.apply(lambda row: f'Plate_{row.plate_id}_Well{row.well_id}')

# data_df'df.apply (lambda row: find_plate_id(row), axis=1)    
print(data_df)

                             filename
0   TestData_plate1_wellA1_2022-07-05
1   TestData_plate1_wellA1_2022-07-03
2   TestData_plate1_wellA1_2022-07-01
3   TestData_plate1_wellA4_2022-07-03
4   TestData_plate1_wellA2_2022-07-05
5   TestData_plate1_wellA3_2022-07-01
6   TestData_plate1_wellA3_2022-07-03
7   TestData_plate1_wellA4_2022-07-01
8   TestData_plate1_wellA4_2022-07-05
9   TestData_plate1_wellA2_2022-07-03
10  TestData_plate1_wellA3_2022-07-05
11  TestData_plate1_wellA2_2022-07-01
                             filename    plate_well_id        date
0   TestData_plate1_wellA1_2022-07-05  Plate_1_Well_A1  2022-07-05
1   TestData_plate1_wellA1_2022-07-03  Plate_1_Well_A1  2022-07-03
2   TestData_plate1_wellA1_2022-07-01  Plate_1_Well_A1  2022-07-01
3   TestData_plate1_wellA4_2022-07-03  Plate_1_Well_A4  2022-07-03
4   TestData_plate1_wellA2_2022-07-05  Plate_1_Well_A2  2022-07-05
5   TestData_plate1_wellA3_2022-07-01  Plate_1_Well_A3  2022-07-01
6   TestData_plate1_wellA3_2022-07-03

In [30]:
#Processing images
with open(metadata_path) as metadata:
    expt_data = json.load(metadata)

plate_set_up = expt_data["sample_info"]
md_df = pd.DataFrame(plate_set_up)
print (md_df[0:])

  genotype media  condition_replicate    plate_well_id    Plate Well
0   Sp7498  Mock                    1  Plate_1_Well_A1  Plate_1   A1
1   Sp7498  Salt                    2  Plate_1_Well_A2  Plate_1   A2
2   Sp7498  Salt                    1  Plate_1_Well_A3  Plate_1   A3
3   Sp7498  Mock                    2  Plate_1_Well_A4  Plate_1   A4


In [87]:
def find_metadata(input_value, shared_column_name, search_df, desired_column):
    df_subset = search_df[search_df[shared_column_name] == input_value][desired_column] #Find the desired column from the matching part of the search_df
    df_as_list = list(df_subset) #Convert to list to wipe clean the index carried over from search dataframe. 
    desired_value = df_as_list[0] #Specify that we want what should be a single value, rather than a whole list with one entry
    return desired_value

data_df['media'] = data_df.apply(lambda row: find_metadata(row.plate_well_id, 'plate_well_id', md_df, 'media'), axis=1)
data_df['genotype'] = data_df.apply(lambda row: find_metadata(row.plate_well_id, 'plate_well_id', md_df, 'genotype'), axis=1)
data_df['replicate'] = data_df.apply(lambda row: find_metadata(row.plate_well_id, 'plate_well_id', md_df, 'condition_replicate'), axis=1)

print(data_df.head())                


                            filename    plate_well_id        date media  \
0  TestData_plate1_wellA1_2022-07-05  Plate_1_Well_A1  2022-07-05  Mock   
1  TestData_plate1_wellA1_2022-07-03  Plate_1_Well_A1  2022-07-03  Mock   
2  TestData_plate1_wellA1_2022-07-01  Plate_1_Well_A1  2022-07-01  Mock   
3  TestData_plate1_wellA4_2022-07-03  Plate_1_Well_A4  2022-07-03  Mock   
4  TestData_plate1_wellA2_2022-07-05  Plate_1_Well_A2  2022-07-05  Salt   

  genotype  replicate  
0   Sp7498          1  
1   Sp7498          1  
2   Sp7498          1  
3   Sp7498          2  
4   Sp7498          2  


### Part 3 - Produce figures

In [2]:
#Building a master dataframe

#### Part 2


In [None]:
#