In [5]:
import copy
import pathlib

import numpy as np
import pandas as pd

from tiled.client import from_uri
from tiled.examples.xdi import read_xdi
from tiled.queries import Key

from aimmdb.schemas import ExperimentalXASMetadata
from aimmdb.schemas import FEFFInputMetadata
from aimmdb.schemas import FEFFOutputMetadata

In [6]:
from tiled.client import from_uri
client = from_uri("http://localhost:8000/api")

ConnectError: [WinError 10061] No connection could be made because the target machine actively refused it

In [7]:
# data path
data_path = pathlib.Path("D:/BNL/AIMMDB/65272_C_007")

In [8]:
def read_dat(path):
    return pd.read_csv(path, sep="\t", header=None, names=["omega, e, k, mu, mu0, chi"])

In [9]:
def lower_case_dict(d):
    out = {}
    modified = False

    for k, v in d.items():
        if isinstance(v, dict):
          v, modified_ = lower_case_dict(v)
          modified = modified or modified_
        if isinstance(k, str) and not k.islower():
            out[k.lower()] = v
            modified = True
        else:
            out[k] = v

    return out, modified

def load_FeffData(data_path):
    """
    Load FEFF dataset into a dataframe parsing sample information from metadata
    """
    files = list(data_path.rglob("*.dat"))
    print(f"found {len(files)} dat files to ingest")

    data_list = []

    for f in files:
        name = f.stem
        #need to parse metadata from the file name
        df_feff, metadata = read_dat(str(f))
        fields = metadata.pop("fields")
        fields, _ = lower_case_dict(fields)
        metadata.update({k.lower(): v for k, v in fields.items()})
        metadata["sample"].setdefault("prep", None)
        
        try:
          facility_name = metadata["facility"]["name"]
        except:
          metadata["facility"] = {"name" : None}
        try:
          beamline_name = metadata["beamline"]["name"]
        except:
          metadata["beamline"] = {"name" : None}

        data_list.append(
            {
                "name": f.stem,
                "file": str(f),
                "metadata": metadata,
                "columns" : tuple(df_feff)
            }
        )

    df = pd.DataFrame(data_list)

    return df

In [10]:
# read through all the files and extract some metadata
feff = load_FeffData(pathlib.Path("D:/BNL/AIMMDB/65272_C_007"))
feff

found 1 dat files to ingest


ValueError: not enough values to unpack (expected 2, got 1)

In [11]:
def ingest_feff(client, df, verbose=False):
    """
    Upload the FEFF dataset to database
    """

    for (name, prep), g in df.groupby(["sample.name", "sample.prep"]):
        if verbose:
            print(f"{name}: {prep}, {len(g)}")

        sample_id = client.write_sample({"name" : name, "prep" : prep})

        for i, row in g.iterrows():
            feff_df, _ = read_dat(row.file)
            metadata = row.metadata
            metadata["dataset"] = "feff"
            metadata["sample_id"] = sample_id
            client["uid"].write_dataframe(feff_df, metadata=metadata, specs=["FEFF"])

In [12]:
print("starting ingestion...")
ingest_feff(client, feff, verbose=True)
print("finished.")

starting ingestion...


NameError: name 'client' is not defined

In [13]:
FEFFInputMetadata.schema()

{'title': 'FEFFInputMetadata',
 'type': 'object',
 'properties': {'element': {'$ref': '#/definitions/XDIElement'},
  'measurement_type': {'default': 'xas',
   'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},
  'dataset': {'title': 'Dataset', 'type': 'string'},
  'sample_id': {'title': 'Sample Id', 'type': 'string'},
  'input_script': {'title': 'Input Script', 'type': 'string'}},
 'required': ['element', 'dataset', 'sample_id', 'input_script'],
 'definitions': {'XDIElement': {'title': 'XDIElement',
   'type': 'object',
   'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},
    'edge': {'title': 'Edge', 'type': 'string'}},
   'required': ['symbol', 'edge']},
  'MeasurementEnum': {'title': 'MeasurementEnum',
   'description': 'An enumeration.',
   'enum': ['xas', 'rixs'],
   'type': 'string'}}}

In [14]:
FEFFOutputMetadata.schema()

{'title': 'FEFFOutputMetadata',
 'type': 'object',
 'properties': {'element': {'$ref': '#/definitions/XDIElement'},
  'measurement_type': {'default': 'xas',
   'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},
  'dataset': {'title': 'Dataset', 'type': 'string'},
  'sample_id': {'title': 'Sample Id', 'type': 'string'},
  'output_script': {'title': 'Output Script', 'type': 'string'}},
 'required': ['element', 'dataset', 'sample_id', 'output_script'],
 'definitions': {'XDIElement': {'title': 'XDIElement',
   'type': 'object',
   'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},
    'edge': {'title': 'Edge', 'type': 'string'}},
   'required': ['symbol', 'edge']},
  'MeasurementEnum': {'title': 'MeasurementEnum',
   'description': 'An enumeration.',
   'enum': ['xas', 'rixs'],
   'type': 'string'}}}

In [15]:
# we will enforce that XAS metadata satisfies the following schema
ExperimentalXASMetadata.schema()

{'title': 'ExperimentalXASMetadata',
 'type': 'object',
 'properties': {'element': {'$ref': '#/definitions/XDIElement'},
  'measurement_type': {'default': 'xas',
   'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},
  'dataset': {'title': 'Dataset', 'type': 'string'},
  'sample_id': {'title': 'Sample Id', 'type': 'string'},
  'facility': {'$ref': '#/definitions/FacilityMetadata'},
  'beamline': {'$ref': '#/definitions/BeamlineMetadata'}},
 'required': ['element', 'dataset', 'facility', 'beamline'],
 'definitions': {'XDIElement': {'title': 'XDIElement',
   'type': 'object',
   'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},
    'edge': {'title': 'Edge', 'type': 'string'}},
   'required': ['symbol', 'edge']},
  'MeasurementEnum': {'title': 'MeasurementEnum',
   'description': 'An enumeration.',
   'enum': ['xas', 'rixs'],
   'type': 'string'},
  'FacilityMetadata': {'title': 'FacilityMetadata',
   'type': 'object',
   'properties': {'name': {'title': 'Name', 'type': '

In [None]:
client["uid"]

In [None]:
# with the correct metadata we can write to the server
# NOTE this doesn't prevent you from writing garbage but does help
df = pd.DataFrame({"a" : np.random.rand(100), "b" : np.random.rand(100)})
metadata = {"dataset" : "feff", "foo" : "bar", "element" : {"symbol" : "Au", "edge" : "K"}, "facility" : {"name" : "ALS"}, "beamline" : {"name" : "8.0.1"}}
node = client["uid"].write_dataframe(df, metadata=metadata, specs=["FEFF"])
node