# Convert delta output to Pandas dataframes

In this notebook we convert Delta 2.0 default output into pandas data frames.

Note:  run this notebook in an environment in which Delta2 has been installed, see [here](https://delta.readthedocs.io/en/latest/usage/installation.html) for instructions.

In [11]:
import pathlib
import numpy as np
import pandas as pd
from delta_postprocess import delta_to_df 
import shutil
import pickle

In [12]:
#FOLDER STRUCTURE
# > Generic experiment name (data_root)
#     > Experiment#1 (data_folders)
#          > Strain#1 (folder)
#               > Postition#1 (images stack)

#Set paths to data folders
data_root = pathlib.Path(pathlib.Path.home(), 'Documents/Image_analysis/Pseudomonas')
data_folders = ['2023.09.27_Psy_AgarPads#13','2023.10.04_Psy_AgarPads#14']

#set output paths
output_root = pathlib.Path(pathlib.Path.home(), 'Documents/Image_analysis/Pseudomonas/Lopez-Pagan2024/Output')
(output_root).mkdir(exist_ok=True) #create output data folder

out_files_dir = output_root / 'data_files_individual_colonies'
(out_files_dir).mkdir(exist_ok=True) #create output data folder

csv_dir = output_root / 'data_files_individual_colonies' / 'csv_files_Psy'
(csv_dir).mkdir(exist_ok=True) #create output data folder

movie_dir = output_root / 'mp4_movies' 
(movie_dir).mkdir(exist_ok=True) #create output data folder

out_dir = output_root / 'data_files'
(out_dir).mkdir(exist_ok=True) #create output data folder

In [13]:
#Move Delta output data from individual experiments to a single place for data analysis
for sub_folder in data_folders:
    
    data_dir = data_root / sub_folder
    
    #find subfolders
    folder_names = [f.name for f in sorted(data_dir.glob('Psy*'))]
    print(folder_names)

    #copy movies 
    for folder in folder_names:
        #get images in subfolder
        movie_names = [f.name for f in sorted((data_dir / folder).glob('*Pos*'))]
        print(movie_names)

        for idx, movie in enumerate(movie_names):        
            #path to current position        
            datafiles = [f.name for f in sorted((data_dir / folder / movie / 'ProcessedData').glob('*.mp4'))]
            mp4_name = '%s_%s.mp4' %(folder,movie)
            
            path_old = data_dir / folder / movie / 'ProcessedData' / datafiles[0]
            path_new = movie_dir / mp4_name
            shutil.copyfile(path_old, path_new)
            
    for folder in folder_names:
        #get images in subfolder
        movie_names = [f.name for f in sorted((data_dir / folder).glob('*Pos*'))]

        for idx, movie in enumerate(movie_names):        
            #path to current position        
            datafiles = [f.name for f in sorted((data_dir / folder / movie / 'ProcessedData').glob('*.pkl'))]
            short_name = '%s_%s' %(folder,movie)
                    
            df = delta_to_df(data_dir / folder / movie / 'ProcessedData' / datafiles[0])
            
            df['strain'] = folder
            df['movie_name'] = short_name
            df['replicate'] = idx
            
            #save data-frame
            save_name = short_name + '.csv'
            df.to_csv(csv_dir / save_name)

['Psy-eGFP', 'Psy-fliC-GFP']
['2023.09.27_Psy_AgarPads#13_Pos41', '2023.09.27_Psy_AgarPads#13_Pos42', '2023.09.27_Psy_AgarPads#13_Pos44', '2023.09.27_Psy_AgarPads#13_Pos47', '2023.09.27_Psy_AgarPads#13_Pos49', '2023.09.27_Psy_AgarPads#13_Pos50', '2023.09.27_Psy_AgarPads#13_Pos51', '2023.09.27_Psy_AgarPads#13_Pos55', '2023.09.27_Psy_AgarPads#13_Pos60', '2023.09.27_Psy_AgarPads#13_Pos62', '2023.09.27_Psy_AgarPads#13_Pos63', '2023.09.27_Psy_AgarPads#13_Pos69', '2023.09.27_Psy_AgarPads#13_Pos71', '2023.09.27_Psy_AgarPads#13_Pos75']
['2023.09.27_Psy_AgarPads#13_Pos01', '2023.09.27_Psy_AgarPads#13_Pos02', '2023.09.27_Psy_AgarPads#13_Pos04', '2023.09.27_Psy_AgarPads#13_Pos08', '2023.09.27_Psy_AgarPads#13_Pos09', '2023.09.27_Psy_AgarPads#13_Pos14', '2023.09.27_Psy_AgarPads#13_Pos15', '2023.09.27_Psy_AgarPads#13_Pos18', '2023.09.27_Psy_AgarPads#13_Pos21', '2023.09.27_Psy_AgarPads#13_Pos23', '2023.09.27_Psy_AgarPads#13_Pos25', '2023.09.27_Psy_AgarPads#13_Pos28', '2023.09.27_Psy_AgarPads#13_Pos31

NameError: name 'movie' is not defined

## Combine dataframes of separate experiments and store to disk

In [4]:
file_list = [pd.read_csv(csv_dir/f.name) for f in sorted(csv_dir.glob('*.csv'))]
df_combined = pd.concat(file_list, ignore_index=True).reset_index()
save_name = 'Psy_combined_data.csv'
df_combined.to_csv(out_dir / save_name)