In [None]:
import os
import random
import yaml
from utils import CheckboxSelector, has_formatted_data, format_iam_data, format_uim_v1_data, plot_line
root_dir = './data'

In [None]:
"""
Select formatting options
"""

# What data sources to format
data_sources = [d for d in os.listdir(root_dir) if os.path.isdir(f'{root_dir}/{d}') and d != '__pycache__']
data_selector = CheckboxSelector(data_sources, 'Select data sources to format')
data_selector.display()

# Do you want to overwrite existing formatted data?
overwrite_selector = CheckboxSelector(['yes', 'no'], 'Do you want to overwrite existing formatted data?')
overwrite_selector.display()

# Do you want to view some samples after formatting?
view_selector = CheckboxSelector(['yes', 'no'], 'Do you want to view some samples after formatting?')
view_selector.display()


In [None]:
"""
Set selections to variables
"""

data_sources = data_selector.get_selected_items()
overwrite = overwrite_selector.get_selected_items() == ['yes']
view_output = view_selector.get_selected_items() == ['yes']

In [None]:
"""
IAM Formatting
"""

if 'IAM' in data_sources:
    if not has_formatted_data('IAM') or overwrite:    
        format_iam_data()  

In [None]:
"""
Custom Data Formatting
"""

custom_data_sources = [d for d in data_sources if d != 'IAM']
for data_source in custom_data_sources:
    if not has_formatted_data(f'{root_dir}/{data_source}') or overwrite:
        # find the data_source_type
        with open(f'{root_dir}/{data_source}/config.yaml', 'r') as f:
            config = yaml.safe_load(f)
        data_source_type = config['type'] 
        
        if data_source_type == 'uim_v1':
            format_uim_v1_data(f'{root_dir}/{data_source}')
        elif data_source_type == 'YOUR CUSTOM DATA TYPE':
            # format_custom_data_type(data_source)
            continue

In [None]:
"""
View Outputs
"""

if view_output:
    for data_source in data_sources:
        line_files = os.listdir(f'{root_dir}/{data_source}/lines')
        random_line_files = random.sample(line_files, 20)
        print(f'{data_source}------------')
        for line_file in random_line_files:
            plot_line(f'{root_dir}/{data_source}/lines/{line_file}')
    
        