In [5]:
# dates = ['13023','32423','32623','21823','22223','22323','22423','2723','21523','41923','31823',
#          '31723','31423','41323','41223','5223','5323','5823','51023','51823','42523','42723']
# len(dates)

In [1]:
import os
import pandas as pd
import fnmatch

path = 'Cell Alignment'
pattern = '*.czi'

treatment_options = ['Albumin bound S1P', 'Heparin 2 UmL','Heparin S1P','No Treatment']
flow_options = ['Static', 'DF', 'UF']
times = ['12 hr','0 hr','30 min']
cell_ages = ['P{}'.format(i) for i in range(12)]
dates = ['13023','32423','32623','21823','22223','22323','22423','2723','21523','41923','31823',
         '31723','31423','41323','41223','5223','5323','5823','51023','51823','42523','42723'] # should find a better way to do this

# Initialize an empty list to store dictionaries of file info
file_info_list = []

# Recursively search through the directory and its subdirectories
for root, dirs, files in os.walk(path):
    for filename in fnmatch.filter(files, pattern):
        # Extract treatment and flow from filename
        treatment = 'none'
        flow = 'none'
        time = 'none'
        cell_age = 'none'
        date = 'none'
        for c in treatment_options:
            if c in filename:
                if treatment == 'none':
                    treatment = c
                else:
                    treatment = 'multi'
        for f in flow_options:
            if f in filename:
                if flow == 'none':
                    flow = f
                else:
                    flow = 'multi'
        for c in times:
            if c in filename:
                if time == 'none':
                    time = c
                else:
                    time = 'multi'
        for c in cell_ages:
            if c in filename:
                if cell_age == 'none':
                    cell_age = c
                else:
                    cell_age = 'multi'
        for c in dates:
            if c in filename:
                if date == 'none':
                    date = c
                else:
                    date = 'multi'
        # Create a dictionary of file info and append it to the list
        file_info = {'file_name': filename, 'data_path':root,'treatment': treatment, 'flow': flow, 'time':time,
                     'cell_age':cell_age,'date':date}
        file_info_list.append(file_info)

# Create a pandas DataFrame from the list of dictionaries
df = pd.DataFrame(file_info_list)

# Print the DataFrame
#print(df)
df.insert(loc=0, column='exp_num', value=df.index.values)
df


Unnamed: 0,exp_num,file_name,data_path,treatment,flow,time,cell_age,date
0,0,HCAEC P8_12 hr UF_No Treatment_51023.czi,Cell Alignment/No Treatment/5.10.23,No Treatment,UF,12 hr,P8,51023
1,1,HCAEC P8_12 hr DF_No Treatment_51023.czi,Cell Alignment/No Treatment/5.10.23,No Treatment,DF,12 hr,P8,51023
2,2,HCAEC P8_12 hr Static_No Treatment_51023.czi,Cell Alignment/No Treatment/5.10.23,No Treatment,Static,12 hr,P8,51023
3,3,HCAEC P7_12 hr DF_No Treatment_5823.czi,Cell Alignment/No Treatment/5.8.23,No Treatment,DF,12 hr,P7,5823
4,4,HCAEC P7_12 hr Static_No Treatment_5823.czi,Cell Alignment/No Treatment/5.8.23,No Treatment,Static,12 hr,P7,5823
5,5,HCAEC P7_12 hr UF_No Treatment_5823.czi,Cell Alignment/No Treatment/5.8.23,No Treatment,UF,12 hr,P7,5823
6,6,HCAEC P8_12 hr Static_No Treatment_51823.czi,Cell Alignment/No Treatment/5.18.23,No Treatment,Static,12 hr,P8,51823
7,7,HCAEC P8_12 hr UF_No Treatment_51823.czi,Cell Alignment/No Treatment/5.18.23,No Treatment,UF,12 hr,P8,51823
8,8,HCAEC P8_12 hr DF_No Treatment_51823.czi,Cell Alignment/No Treatment/5.18.23,No Treatment,DF,12 hr,P8,51823
9,9,HCAEC P7_12 hr UF_No Treatment_5223.czi,Cell Alignment/No Treatment/5.2.23,No Treatment,UF,12 hr,P7,5223


In [2]:
df['group'] = df.groupby(['treatment', 'flow']).ngroup()

# Create a new column 'replicate' and populate it with the group number
df['replicate'] = df.groupby(['group'])['date'].rank(method='dense').astype(int)
df

Unnamed: 0,exp_num,file_name,data_path,treatment,flow,time,cell_age,date,group,replicate
0,0,HCAEC P8_12 hr UF_No Treatment_51023.czi,Cell Alignment/No Treatment/5.10.23,No Treatment,UF,12 hr,P8,51023,11,1
1,1,HCAEC P8_12 hr DF_No Treatment_51023.czi,Cell Alignment/No Treatment/5.10.23,No Treatment,DF,12 hr,P8,51023,9,1
2,2,HCAEC P8_12 hr Static_No Treatment_51023.czi,Cell Alignment/No Treatment/5.10.23,No Treatment,Static,12 hr,P8,51023,10,1
3,3,HCAEC P7_12 hr DF_No Treatment_5823.czi,Cell Alignment/No Treatment/5.8.23,No Treatment,DF,12 hr,P7,5823,9,5
4,4,HCAEC P7_12 hr Static_No Treatment_5823.czi,Cell Alignment/No Treatment/5.8.23,No Treatment,Static,12 hr,P7,5823,10,5
5,5,HCAEC P7_12 hr UF_No Treatment_5823.czi,Cell Alignment/No Treatment/5.8.23,No Treatment,UF,12 hr,P7,5823,11,5
6,6,HCAEC P8_12 hr Static_No Treatment_51823.czi,Cell Alignment/No Treatment/5.18.23,No Treatment,Static,12 hr,P8,51823,10,2
7,7,HCAEC P8_12 hr UF_No Treatment_51823.czi,Cell Alignment/No Treatment/5.18.23,No Treatment,UF,12 hr,P8,51823,11,2
8,8,HCAEC P8_12 hr DF_No Treatment_51823.czi,Cell Alignment/No Treatment/5.18.23,No Treatment,DF,12 hr,P8,51823,9,2
9,9,HCAEC P7_12 hr UF_No Treatment_5223.czi,Cell Alignment/No Treatment/5.2.23,No Treatment,UF,12 hr,P7,5223,11,3


In [4]:
df[df['group']==10]

Unnamed: 0,exp_num,file_name,data_path,treatment,flow,time,cell_age,date,group,replicate
2,2,HCAEC P8_12 hr Static_No Treatment_51023.czi,Cell Alignment/No Treatment/5.10.23,No Treatment,Static,12 hr,P8,51023,10,1
4,4,HCAEC P7_12 hr Static_No Treatment_5823.czi,Cell Alignment/No Treatment/5.8.23,No Treatment,Static,12 hr,P7,5823,10,5
6,6,HCAEC P8_12 hr Static_No Treatment_51823.czi,Cell Alignment/No Treatment/5.18.23,No Treatment,Static,12 hr,P8,51823,10,2
11,11,HCAEC P7_12 hr Static_No Treatment_5223.czi,Cell Alignment/No Treatment/5.2.23,No Treatment,Static,12 hr,P7,5223,10,3
14,14,HCAEC P7_12 hr Static_No Treatment_5323.czi,Cell Alignment/No Treatment/5.3.23,No Treatment,Static,12 hr,P7,5323,10,4


In [12]:
df

Unnamed: 0,exp_num,file_name,data_path,treatment,flow,time,cell_age,date,group,replicate
0,0,HCAEC_12 hr UF_P7_Heparin 2 UmL_32423.czi,Cell Alignment/Heparin Treated/3.24.23,Heparin 2 UmL,UF,12 hr,P7,32423,5,4
1,1,HCAEC_12 hr Static_P7_Heparin 2 UmL_32423.czi,Cell Alignment/Heparin Treated/3.24.23,Heparin 2 UmL,Static,12 hr,P7,32423,4,4
2,2,HCAEC_12 hr DF_P7_Heparin 2 UmL_32423.czi,Cell Alignment/Heparin Treated/3.24.23,Heparin 2 UmL,DF,12 hr,P7,32423,3,4
3,3,HCAEC P8_12 hr DF_Heparin 2 UmL_13023.czi,Cell Alignment/Heparin Treated/1.30.23,Heparin 2 UmL,DF,12 hr,P8,13023,3,1
4,4,HCAEC P8_12 hr UF_Heparin 2 UmL_13023.czi,Cell Alignment/Heparin Treated/1.30.23,Heparin 2 UmL,UF,12 hr,P8,13023,5,1
5,5,HCAEC_12 hr Static_P8_Heparin 2 UmL_13023.czi,Cell Alignment/Heparin Treated/1.30.23,Heparin 2 UmL,Static,12 hr,P8,13023,4,1
6,6,HCAEC_12 hr DF_P7_Heparin 2 UmL_1_21523.czi,Cell Alignment/Heparin Treated/2.15.23,Heparin 2 UmL,DF,12 hr,P7,21523,3,2
7,7,HCAEC_12 hr Static_P7_Heparin 2 UmL_1_21523.czi,Cell Alignment/Heparin Treated/2.15.23,Heparin 2 UmL,Static,12 hr,P7,21523,4,2
8,8,HCAEC_12 hr UF_P7_Heparin 2 UmL_1_21523.czi,Cell Alignment/Heparin Treated/2.15.23,Heparin 2 UmL,UF,12 hr,P7,21523,5,2
9,9,HCAEC_12 hr Static_P5_Heparin 2 UmL_2723.czi,Cell Alignment/Heparin Treated/2.7.23,Heparin 2 UmL,Static,12 hr,P5,2723,4,3


In [5]:
df.to_csv('Ron_Cell_Alignment_metadata_9_20_23.csv', index=False)

In [6]:
df['group'].unique()

array([11,  9, 10,  5,  4,  3,  1,  2,  0,  8,  6,  7])