In [None]:
import os, sys
from pathlib import Path
from itertools import compress
from numpy import unique

In [None]:
# define the current path (notebooks in lab_utils)
currpath = os.getcwd()
labutilspath = str(Path(currpath).parents[1])
sys.path.append(labutilspath)

# import the autoscan routines
from autoscan import autoscan

In [None]:
# define the base path that stores all characterization data
basepath = '/home/urlab/sandbox/data/characterization/'

# set the asdatapath accordingly (where is the atuoscan data inside basedatapath?)
asdatapath   = 'autoscan'

# set datapath
datapath = os.path.join(basepath, asdatapath)

## List and correct filenames (enforce patterns)

1. Read the _csv_ files in the `datapath`
1. Find whether each file has the word `before` or `after`; if not, then add `before` as default
1. Fix all file names such that they follow `probe_before|after_side.csv` 
1. Create a dataframe that describes the data
1. Add a columns with links and save (`io.wrange_datafiles(link=True)`)

In [None]:
# load the io module, 
io = autoscan.file_sorter(datapath = datapath, labutilspath=labutilspath)
pp = autoscan.postprocess(labutilspath = labutilspath)

### check basic rock info

1. load the tags available
2. check that the tags exist in rock_info, if any is missing then add it

In [None]:
missing_tag = False
tags = unique([d.split('_')[0] for d in os.listdir(datapath) if not d.startswith('_') and '.' not in d])
pp._get_rock_basics()
missing_tags = list(compress(tags, [c not in pp.rock_info.rock_dict.keys() for c in tags]))

if len(missing_tags)>0:
    missing_tag = True
    print('add the following to rock_info.rock_dict')
    print(missing_tags)

#### update rock_dict
1. update the rock_dict
1. save rock_dict for future

In [None]:
if missing_tag:
    # add the relevant information
    pp.rock_info.add_sample_to_dict(tag = 'sdgm', 
                                    family = 'sandsone',
                                    genus = 'layered sandstone',
                                    long_name = 'sdgm sandstone',
                                    origin = 'ksa',
                                    provenance = 'field')

    # update the sample file, the sample previously added will update
    pp.rock_info.update_sample_file()

### summarize available data

In [None]:
io.dryrun = False
io.debug  = False

summary = io.wrangle(save = True, link = True)
summary.head()