# Convert delta output to Pandas dataframes

In this notebook we convert Delta 2.0 default output into pandas data frames.

Note:  run this notebook in an environment in which Delta2 has been installed, see [here](https://delta.readthedocs.io/en/latest/usage/installation.html) for instructions.

In [1]:
import pathlib
import numpy as np
import pandas as pd
from delta_postprocess import delta_to_df 
import shutil
import pickle

In [4]:
#set paths
#set paths
data_dir = pathlib.Path.home() / 'Andreas' / 'processed_Pa3'

root = pathlib.Path(pathlib.Path.home(), 'switchdrive', 'Biozentrum', 'Andreas')
csv_dir = root / 'data_files_individual_colonies' / 'csv_files_Pa'
movie_dir = root / 'mp4_movies' / 'mp4_files_Pa'
out_dir = root / 'data_files' 

#find subfolders
folder_names = [f.name for f in sorted(data_dir.glob('UJP*'))]
print(folder_names)

['UJP1464', 'UJP315', 'UJP3428']


In [5]:
#copy movies 
for folder in folder_names:
    #get images in subfolder
    movie_names = [f.name for f in sorted((data_dir / folder).glob('*TL*'))]

    for idx, movie in enumerate(movie_names):        
        #path to current position        
        datafiles = [f.name for f in sorted((data_dir / folder / movie).glob('*.mp4'))]
        mp4_name = '%s_%s.mp4' %(folder,movie)
        
        path_old = data_dir / folder / movie / datafiles[0]
        path_new = movie_dir / mp4_name
        
        shutil.copyfile(path_old, path_new)

In [6]:
for folder in folder_names:
    #get images in subfolder
    movie_names = [f.name for f in sorted((data_dir / folder).glob('*TL*'))]


    for idx, movie in enumerate(movie_names):        
        #path to current position        
        datafiles = [f.name for f in sorted((data_dir / folder / movie).glob('*.pkl'))]
        short_name = '%s_%s' %(folder,movie)
                
        df = delta_to_df(data_dir / folder / movie / datafiles[0])
        
        df['strain'] = folder
        df['movie_name'] = short_name
        df['replicate'] = idx
        
        #save data-frame
        save_name = short_name + '.csv'
        df.to_csv(csv_dir / save_name)

## Combine dataframes of separate experiments and store to disk

In [7]:
file_list = [pd.read_csv(csv_dir/f.name) for f in sorted(csv_dir.glob('*.csv'))]
df_combined = pd.concat(file_list, ignore_index=True).reset_index()
save_name = 'pa_combined_data.csv'
df_combined.to_csv(out_dir / save_name)