## 1.0 Experiments clean raw data

## Simple setup
To use the `src` module inside of the notebooks:

In [1]:
%load_ext autoreload
%autoreload 2

## Imports

In [2]:
import janitor
import src
import pandas as pd

## Load raw data

### Identify zip files

In [3]:
experiment_files = {
    zip_file.stem: zip_file
    for zip_file in src.data_raw_dir().glob("*.zip")
}

experiment_files.keys()

dict_keys(['plasmid_lineages', 'chromosome_lineages'])

### Read experiments into a single Data Frame

In [4]:
df = src.read_experiments(
    experiment_files=experiment_files.values(),
    experiment_ids=experiment_files.keys(),
    encoding = "latin"
)

df.head()

Unnamed: 0.1,lineageID,trackID,cellID,motherID,frame,roiID,length,division,dead,GFP,DsRed,file_name_id,experiment_id,Unnamed: 0
0,0.0,20.38-44.0,20.38,20.38,20.0,roi_f20_n38_x463_y290,28.071135,0.0,0.0,202.571,98.776,pBGT-AMP-Pulse_xy01_lineages_all.csv,plasmid_lineages,
1,0.0,20.38-44.0,21.25,20.38,21.0,roi_f21_n25_x465_y290,27.384577,0.0,0.0,192.205,98.439,pBGT-AMP-Pulse_xy01_lineages_all.csv,plasmid_lineages,
2,0.0,20.38-44.0,22.24,20.38,22.0,roi_f22_n24_x466_y290,27.714666,0.0,0.0,188.813,98.337,pBGT-AMP-Pulse_xy01_lineages_all.csv,plasmid_lineages,
3,0.0,20.38-44.0,23.26,20.38,23.0,roi_f23_n26_x467_y291,28.855853,0.0,0.0,186.514,98.405,pBGT-AMP-Pulse_xy01_lineages_all.csv,plasmid_lineages,
4,0.0,20.38-44.0,24.25,20.38,24.0,roi_f24_n25_x467_y291,29.005438,0.0,0.0,188.081,98.698,pBGT-AMP-Pulse_xy01_lineages_all.csv,plasmid_lineages,


## Clean raw data

In [5]:
df = (
    df
    .clean_experiment_names()
    .remove_columns(column_names="unnamed_0")
    .center_frames(by="experiment_id", frame_col="frame")
)
df.head()

Unnamed: 0,lineage_id,track_id,cell_id,mother_id,frame,roi_id,length,division,dead,gfp,dsred,file_name_id,experiment_id
0,0.0,10.8-34.0,10.8,10.8,0,roi_f10_n8_x129_y214,25.849682,0.0,0.0,120.11,108.394,MGGT-AMP-Pulse_xy01_lineages_all.csv,chromosome_lineages
1,0.0,10.8-34.0,11.6,10.8,1,roi_f11_n6_x126_y216,41.415207,1.0,0.0,120.289,108.422,MGGT-AMP-Pulse_xy01_lineages_all.csv,chromosome_lineages
2,0.0,10.8-34.0,12.8,10.8,2,roi_f12_n8_x122_y225,19.348009,0.0,0.0,119.765,107.959,MGGT-AMP-Pulse_xy01_lineages_all.csv,chromosome_lineages
3,0.0,10.8-34.0,13.8,10.8,3,roi_f13_n8_x124_y224,26.301427,0.0,0.0,119.771,108.061,MGGT-AMP-Pulse_xy01_lineages_all.csv,chromosome_lineages
4,0.0,10.8-34.0,14.5,10.8,4,roi_f14_n5_x124_y220,26.475045,0.0,0.0,119.718,107.977,MGGT-AMP-Pulse_xy01_lineages_all.csv,chromosome_lineages


## Save result

In [6]:
df.to_csv(
    path_or_buf=src.data_processed_dir("experiments_data.csv"),
    index=False
)