In [1]:
import os, sys, re
from pathlib import Path
from itertools import compress
from numpy import unique

In [2]:
# define the current path (notebooks in lab_utils)
currpath = os.getcwd()
labutilspath = str(Path(currpath).parents[1])
sys.path.append(labutilspath)

# import the autoscan routines
from autoscan import autoscan

In [3]:
# define the base path that stores all characterization data
basepath = '/sandbox/data/'

# set the asdatapath accordingly (where is the atuoscan data inside basedatapath?)
asdatapath   = 'autoscan'

# set datapath
datapath = os.path.join(basepath, asdatapath)

## List and correct filenames (enforce patterns)

1. Read the _csv_ files in the `datapath`
1. Find whether each file has the word `before` or `after`; if not, then add `before` as default
1. Fix all file names such that they follow `probe_before|after_side.csv` 
1. Create a dataframe that describes the data
1. Add a columns with links and save (`io.wrange_datafiles(link=True)`)

In [4]:
# load the io module, 
io = autoscan.file_sorter(datapath = datapath, labutilspath = labutilspath)
pp = autoscan.postprocess(labutilspath = labutilspath)

### check basic rock info

1. load the tags available
2. check that the tags exist in rock_info, if any is missing then add it

In [5]:
missing_tag = False
tags = unique([d.split('_')[0] for d in os.listdir(datapath) if not d.startswith('_') and '.' not in d])
pp._get_rock_basics()
missing_tags = list(compress(tags, [c not in pp.rock_info.rock_dict.keys() for c in tags]))

if len(missing_tags)>0:
    missing_tag = True
    print('add the following to rock_info.rock_dict')
    print(missing_tags)

#### update rock_dict
1. update the rock_dict
1. save rock_dict for future

In [6]:
if missing_tag:
    # add the relevant information
    pp.rock_info.add_sample_to_dict(tag = 'zgf', 
                                    family = 'gemstones',
                                    genus = 'fossils',
                                    long_name = 'fossils',
                                    origin = 'usa',
                                    provenance = 'field')
    pp.rock_info.add_sample_to_dict(tag = 'zgs', 
                                    family = 'gemstones',
                                    genus = 'sedimentary',
                                    long_name = 'sedimentary',
                                    origin = 'usa',
                                    provenance = 'field')
    pp.rock_info.add_sample_to_dict(tag = 'zgm', 
                                    family = 'gemstones',
                                    genus = 'minerals',
                                    long_name = 'minerals',
                                    origin = 'usa',
                                    provenance = 'field')
    pp.rock_info.add_sample_to_dict(tag = 'zgt', 
                                    family = 'gemstones',
                                    genus = 'metamorphic',
                                    long_name = 'metamorphic',
                                    origin = 'usa',
                                    provenance = 'field')
    
    pp.rock_info.add_sample_to_dict(tag = 'zgi', 
                                    family = 'gemstones',
                                    genus = 'igneous',
                                    long_name = 'igneous',
                                    origin = 'usa',
                                    provenance = 'field')

    pp.rock_info.add_sample_to_dict(tag = 'zgx', 
                                    family = 'gemstones',
                                    genus = 'generic',
                                    long_name = 'generic',
                                    origin = 'usa',
                                    provenance = 'field')
    
    pp.rock_info.add_sample_to_dict(tag = 'zgg', 
                                    family = 'gemstones',
                                    genus = 'gemstones',
                                    long_name = 'gemstones',
                                    origin = 'usa',
                                    provenance = 'field')
    

    # update the sample file, the sample previously added will update
    pp.rock_info.update_sample_file()

### summarize available data

In [7]:
io.dryrun = False
io.debug  = False

summary = io.wrangle(save = True, link = True)
print('\nnumber of files: %d' %(len(summary)))

# add experiment type 

summary.head()


number of files: 959


Unnamed: 0,sample_tag,subsample_tag,sample_code,sample_family,probe,side,instance,experiment,fname,relroot
0,sccaco_002,,sc,carbonate,vels,e,before,,vels_before.csv,sccaco_002/base/processed/vels_before.csv
1,sccaco_002,,sc,carbonate,ftir,e,before,,ftir_before.csv,sccaco_002/base/processed/ftir_before.csv
2,bg_134,,bg,sandstone,ftir,e,before,,ftir_before_e.csv,bg_134/base/processed/ftir_before_e.csv
3,bg_134,,bg,sandstone,vels,c,before,,vels_before_c.csv,bg_134/base/processed/vels_before_c.csv
4,bg_134,,bg,sandstone,ftir,f,before,,ftir_before_f.csv,bg_134/base/processed/ftir_before_f.csv
