In [1]:
import utils
import numpy as np
import glob 
import os
import pandas as pd

In [2]:
def extract_and_save(ims_file_path: str, valid_surface: int, categories_list: list, save_path: str) -> None:
    
    # load the imaris file
    data = utils.load_ims(ims_file_path)
    
    # get surface we want to parse
    surface_name = utils.get_object_names(full_data_file=data, 
                                           search_for='Surface')[valid_surface]
    
    # get all the statistics names
    surface_stats_names = utils.get_statistics_names(full_data_file=data, object_name=surface_name)
    
    # get the statistics values in the surface
    surface_stats_values = utils.get_stats_values(full_data_file=data, object_name=surface_name)
    
    # create a empty dict where key=numeric stats ids and value = None
    # this dictionary is simply a container to store all the statistics values
    empty_stats_dict = {key: None for key in surface_stats_names.keys()}   
    
    # create a empty dict where key=object_id, and value=empty stats dict
    # this dictionary is a container where for each object in the surface ..
    # .. it contains another dictionary that stores all the stats values
    empty_data_dict = {key: empty_stats_dict for key in surface_stats_values['ID_Object'].keys()}
    
    # start data extraction in the loop below
    for index in range(len(surface_stats_values)):
        
        # get the current data points 
        current_data = surface_stats_values.iloc[index]
        
        # get the object id the data is associated with
        object_id = current_data['ID_Object']
        
        # get the type of the value
        stats_type = current_data['ID_StatisticsType']
        
        # get the statistics value
        value = current_data['Value']
        
        # insert current selection into dictionary
        try:
            empty_data_dict[object_id][stats_type] = value
        except KeyError:
            # key error occurs if for a stats name there is no value
            # missing values will be represented as None
            pass
        
    # invert dictionary + name modifications
    # this step is a cosmetic step
    inverted_stats_names = utils.invert_stats_dict(surface_stats_names)
    inverted_stats_names = utils.flatten(inverted_stats_names)
    
    # create a list of stats names (in integer form) we want to remove
    del_list = utils.create_del_list(inverted_stats_names, categories_list)
    
    # reverse the stats names again such that key=num, value=name
    final_stats_names = {v: k for k,v in inverted_stats_names.items()}
    
    # generate csv
    dataframe = utils.generate_csv(data_dict=empty_data_dict, 
                                   del_list=del_list,
                                   stats_names=final_stats_names,
                                   categories_list=categories_list)
    
    
    # save the dataframe in the same location
    #dataframe.to_csv(save_path)
    
    return dataframe

In [8]:
def generate_available_categories(ims_file_path: str, valid_surface: int):
    
    # load the imaris file
    data = utils.load_ims(ims_file_path)
    
    # get surface we want to parse
    surface_name = utils.get_object_names(full_data_file=data, 
                                           search_for='Surface')[valid_surface]
    
    # get all the statistics names
    surface_stats_names = utils.get_statistics_names(full_data_file=data, object_name=surface_name)
    
    # invert dictionary + name modifications
    # this step is a cosmetic step
    inverted_stats_names = utils.invert_stats_dict(surface_stats_names)
    inverted_stats_names = utils.flatten(inverted_stats_names)
        
    # reverse the stats names again such that key=num, value=name
    #final_stats_names = {v: k for k,v in inverted_stats_names.items()}
    
    np.savetxt('stats_categories.txt', list(inverted_stats_names.keys()), fmt='%s')
    
    #return list(inverted_stats_names.keys())

In [9]:
categories = [
    'Position X',
    'Position Y',
    'Position Z',
    'Intensity Mean_channel_1',
    'Intensity Mean_channel_2',
    'Intensity Mean_channel_3',
    'Intensity Mean_channel_4',
    'Intensity Mean_channel_5',
]

valid_surface = 6

file_path = '../data/P1 DHBR Roi3 6x6_TileScan_001_Merging_Crop_0_batch.ims'

In [10]:
available_categories = generate_available_categories(file_path, valid_surface)

In [11]:
df = extract_and_save(file_path, valid_surface, categories, '.')
df.head()

In [7]:
def run(config_path: str):
    
    # load config path
    yaml = utils.load_yaml('config.yaml')

    # files to scan
    directories = yaml['data_dir']
    
    # get the stats categories
    stats_categories = utils.read_txt(yaml['stats_category_path'])
    
    # valid surface
    valid_surface = int(yaml['valid_surface'])
    
    for directory in directories:
        
        # grab all the files in the directory w/ .ims
        filenames = list(glob.glob(os.path.join(directory, '*.ims')))
        
        for filename in filenames: 
            
            # file path
            file_path = os.path.join(directory, filename)
            
            # save_file_path
            save_path = os.path.splitext(filename)[0] + '.csv'
            save_path = os.path.join(directory, save_path)
            
            # extract and save
            extract_and_save(file_path, valid_surface, stats_categories, save_path)

run('config.yaml')

In [8]:
yaml = utils.load_yaml('config.yaml')

In [10]:
yaml

{'data_dir': ['../data/'],
 'stats_category_path': 'stats_categories.txt',
 'valid_surface': '6'}

In [12]:
utils.read_txt(yaml['stats_category_path'])

AttributeError: 'float' object has no attribute 'strip'

In [15]:
data = pd.read_csv(yaml['stats_category_path'], header=None, names=['Statistics'])['Statistics'].values.tolist()