In [None]:
import os
import json
import gzip
from zipfile import ZipFile
from io import StringIO, BytesIO
from numpy import where
from scipy.interpolate import interp1d, interp2d
from pandas import to_numeric, read_csv
from mpcontribs.client import Client, Attachment
from tqdm.notebook import tqdm
from decimal import Decimal
from pathlib import Path

In [None]:
# name = "magmom_cobalt_copper_cerium"
# client = Client(host="lightsources-api.materialsproject.org")
name = "sandbox"
client = Client()

In [None]:
# add sample info to project.other
# client.projects.update_entry(pk=name, project={
#     "other": {
#         "sample1": {"name": "CMSI-2-10_1", "description": "something about this sample"},
#         "sample3": {"name": "CMSI-2-10_3", "description": "something about this sample"}
#     }
# }).result()

In [None]:
# define columns to initilize them later with pre-determined order and units
elements = ["Co", "Cu", "Ce"]
columns = {f"position.{axis}": "mm" for axis in ["x", "y"]}
columns.update({f"composition.{element}": "%" for element in elements})
columns.update({
    f"{element}.{spectrum}.{m}": ""
    for element in elements
    for spectrum in ["XAS", "XMCD"]
    for m in ["min", "max"]
})
columns.update({"tables": None, "attachments": None})
#columns

In [None]:
# result of an analysis run
zip_path = "/Users/patrick/GoogleDriveLBNL/MaterialsProject/gitrepos/mpcontribs-data/dataset.zip"
zip_file = ZipFile(zip_path, "r")

In [None]:
# composition/concentration table
ctable = read_csv(StringIO("""
X,		Y,		Co,		Cu,		Ce
-8.5,	37.6,	46.2,	5.3,	39.3
-8.5,	107.8,	70.0,	8.9,	15.5
-7.2,	37.9,	61.2,	4.3,	34.1
-7.2,	107.0,	88.2,	6.5,	12.3
-6.3,	37.9,	66.0,	20.2,	14.89
-6.3,	107.9,	88.7,	2.7,	9.5
-6.1,	35.4,	66.0,	20.2,	14.89
-6.1,	105.4,	88.7,	2.7,	9.5
-5.7,	34.8,	48.3,	12.5,	39.3
-5.7,	104.8,	54.9,	19.1,	15.5
-5.0,	37.1,	48.8,	8.7,	43.7
-5.0,	107.1,	64.8,	16.9,	19.2
""".replace('\t', '')))

ctable["x/y position [mm]"] = ctable["X"].astype('str') + '/' + ctable["Y"].astype('str')
ctable.attrs["name"] = "Composition Table"
ctable.attrs["meta"] = {"X": "category", "Y": "continuous"}  # for plotly
ctable.attrs["labels"] = {"value": "composition [%]"}
ctable.set_index("x/y position [mm]", inplace=True)
ctable.columns.name = "element"

In [None]:
def get_concentration_functions(composition_table):

    meta = composition_table.attrs["meta"]
    elements = [col for col in composition_table.columns if col not in meta]
    x = composition_table["X"].values
    y = composition_table["Y"].values
    cats = composition_table["X"].unique()
    concentration, conc, d, y_c, functions = {}, {}, {}, {}, {}

    for el in elements:
        concentration[el] = to_numeric(composition_table[el].values) / 100.0
        conc[el], d[el], y_c[el] = {}, {}, {}

        if meta["X"] == "category":
            for i in cats:
                k = "{:06.2f}".format(float(i))
                y_c[el][k] = to_numeric(y[where(x == i)])
                conc[el][k] = to_numeric(concentration[el][where(x == i)])
                d[el][k] = interp1d(y_c[el][k], conc[el][k])

            functions[el] = lambda a, b, el=el: d[el][a](b)

        else:
            functions[el] = interp2d(float(x), float(y), concentration[el])

    return functions

conc_funcs = get_concentration_functions(ctable)
del ctable["X"]
del ctable["Y"]
ctable

In [None]:
# paths to gzipped JSON files for attachments
# global params attachment identical for every contribution / across project
global_params = Attachment.from_data("files/global-params", {
    "transfer_fields": [
        "I_Norm0", "Magnet Field", "Energy", "Y", "Z", "filename_scannumber"
    ],
    "labelcols": ["Y", "Z"]
})

# separate attachment of analysis params for each contribution and element
def analysis_params(identifier, element):
    name = f"files/analysis-params__{identifier}__{element}"
    return Attachment.from_data(name, {
        "get_xas": {
            "element": element,
            'pre_edge': (695, 701),
            'post_edge': (730, 739),
        },
        "get_xmcd": {
            'L3_range': (705, 710),
            'L2_range': (718, 722),
        },
        "Remove BG (polynomial)": {
            "element": element,
            "degree": 1,
            "step": 0,
            "xmcd_bg_subtract": True,
            "scanindex_column": "XMCD Index"
        },
        "normalize_set": {
            "element": element,
            "scanindex_column": "XMCD Index"
        },
        "collapse_set": {
            "columns_to_keep": ["Energy","Y","Z"]
        },
        "plot_spectrum": {
            "element": element,
            'E_lower': 695,
            'E_upper': 760
        },
        "gather_final_op_param_values": {
            "identifier": identifier # added for testing to ensure different attachment contents
        }
    })

In [None]:
contributions = []

for idx, info in enumerate(tqdm(zip_file.infolist())):
    # file names for test data of format: <element>_<x>_<y>.csv
    # TODO include sample or scan-id in filenames to deduce identifier?
    # randomly assign fake sample id for testing here
    fn = os.path.splitext(info.filename)[0]
    element, x, y = fn.rsplit("_", 4)
    sample = f"CMSI-2-10_{idx%5}"
    identifier = f"{sample}__{x}_{y}"

    # tables and attachments for Co
    # Cu/Ce added via update later - see below
    csv = zip_file.read(info.filename)
    df = read_csv(BytesIO(csv))
    df = df[["Energy", "XAS", "XMCD"]]
    df.set_index("Energy", inplace=True)
    df.index.name = "Energy [eV]"
    df.columns.name = "spectral type"
    df.attrs["name"] = f"{element}-XAS/XMCD"
    df.attrs["title"] = f"XAS and XMCD Spectra for {element}"
    df.attrs["labels"] = {"value": "a.u."}    
    params = analysis_params(identifier, element)

    # build contribution
    contrib = {"project": name, "identifier": identifier, "is_public": True, "data": {}}
    # TODO auto-convert data.timestamp field in API to enable sorting/filtering
    contrib["data"]["position"] = {k: f"{v} mm" for k, v in zip(["x", "y"], [x, y])}
    contrib["data"]["composition"] = {}
        
    for el, f in conc_funcs.items():
        try:
            contrib["data"]["composition"][el] = f"{f(x, y) * 100.} %"
        except KeyError:
            continue

    if not contrib["data"]["composition"]:
        print(f"Could not determine composition for {identifier}!")
        continue
            
    contrib["formula"] = "".join([
        "{}{}".format(el, int(round(Decimal(comp.split()[0]))))
        for el, comp in contrib["data"]["composition"].items()
    ])

    contrib["data"][element] = {
        y: {"min": df[y].min(), "max": df[y].max()}
        for y in ["XAS", "XMCD"]
    }
    
    # adding ctable and global_params to every contribution
    # ctable could be the same for different subsets of contributions
    contrib["tables"] = [ctable, df]
    contrib["attachments"] = [global_params, params]
    contributions.append(contrib)
    
#     if len(contributions) > 2:
#         break
    
# len(contributions)
#contributions

In [None]:
client.delete_contributions(name)
client.init_columns(name, columns)
client.submit_contributions(contributions[:5], ignore_dupes=True)

In [None]:
client.contributions.queryContributions(project=name, _fields=[
    "id", "identifier", "tables", "attachments", "notebook"
]).result()

In [None]:
ids = client.get_all_ids({"project": name})[name].get("ids", set())
cids = ",".join(ids)
url = f"{client.url}/notebooks/build?cids={cids}"
!http "$url"

### Subsequent Analyses = Contribution Updates

In [None]:
# shift x-axis for spectra of other two elements to fake different spectra
fake_tables = {}

for contrib in contributions:
    identifier = contrib["identifier"]
    fake_tables[identifier] = []
    for idx, element in enumerate(elements[1:]):
        df = contrib["tables"][1].copy()
        df.index = df.index.astype("float") + (idx+1)*10
        df.attrs["name"] = f"{element}-XAS/XMCD"
        df.attrs["title"] = f"XAS and XMCD Spectra for {element}"
        fake_tables[identifier].append(df)

In [None]:
# get mapping of identifier to contribution ID
# list of identifiers could be a subset of contributions for which analysis is performed
identifiers = [c["identifier"] for c in contributions]

resp = client.contributions.queryContributions(
    project=name, identifier__in=identifiers[:5],
    _fields=["id", "identifier"]
).result()

mapping = {
    c["identifier"]: c["id"]
    for c in resp["data"]
}
print(mapping)

In [None]:
# iteratively add spectra/tables for each element to contributions
# example for a single identifier and element
identifier = identifiers[0]
element_index = 1
component_index = element_index + 1 # index in contribution's component list
element = elements[element_index]
pk = mapping[identifier]
df = fake_tables[identifier][element_index]
params = analysis_params(identifier, element)

contrib = {
    "id": pk,
    "data": {element: {
        y: {"min": df[y].min(), "max": df[y].max()}
        for y in ["XAS", "XMCD"]
    }},    
    "tables": [None] * component_index + [df],  # ensure correct index for update
    "attachments": [None] * component_index + [params],
}

In [None]:
client.submit_contributions([contrib])

In [None]:
client.get_contribution(pk)

In [None]:
client.get_table('608a5a1ddce158e132083323').display()

In [None]:
client.get_attachment("608a5a1edce158e132083329").info()