# Publish Dataset to the MDF
Publishes our data to the Materials Data Facility

In [1]:
from mdf_connect_client import MDFConnectClient
from batdata import __version__ as bd_version
from pathlib import Path

Configuration

In [2]:
source_endpoint = '0c3771de-f236-11ec-b3c1-15403b7b75ed'  # Globus endpoint of my desktop
data_path = Path.cwd() / 'data'

## Create the client
This will handle authentication with the Materials Data Facility

In [3]:
client = MDFConnectClient()

## Add basic metadata
Authors, titles, related publications

In [13]:
n_refined = len(list((data_path / 'refined').glob("*.h5")))
n_other = len(list((data_path / 'other').glob("*.h5")))

In [14]:
client.create_dc_block(
    'Dataset of NMC battery Tests from CAMP, 2023 Release',
    authors = [
        'Logan Ward',
        'Joseph Kubal',
        'Susan J. Babinec',
        'Wenquan Lu',
        'Allison Dunlop',
        'Steve Trask',
        'Andrew Jansen',
        'Noah H. Paulson'
    ],
    affiliations='Argonne National Laboratory',
    description='Collection of longevity tests of Nickel-Manganese-Cobalt (NMC) batteries from the '
                'Cell Analysis, Modeling, and Prototyping (CAMP) Facility at Argonne National Laboratory. '
                f'The dataset contains all {n_refined} cells used by Paulson et al. to study the effect of feature '
                f'engineering on machine learning models to predict the life of batteries, and {n_other} cells that '
                'failed to meet acceptence criteria for that study. '
                f'Each cell is stored in the HDF5 format of Argonne\'s Battery Data Toolkit v{bd_version}, '
                'which includes battery metadata, the raw signal from the testing equipment, and '
                'per-cycle summaries of battery performance.',
    related_dois=['10.1016/j.jpowsour.2022.231127']
)

In [15]:
client.set_source_name('camp_2023')

In [16]:
print(client.dc['descriptions'][0]['description'])

Collection of longevity tests of Nickel-Manganese-Cobalt (NMC) batteries from the Cell Analysis, Modeling, and Prototyping (CAMP) Facility at Argonne National Laboratory. The dataset contains all 278 cells used by Paulson et al. to study the effect of feature engineering on machine learning models to predict the life of batteries, and 291 cells that failed to meet acceptence criteria for that study. Each cell is stored in the HDF5 format of Argonne's Battery Data Toolkit v0.1.0, which includes battery metadata, the raw signal from the testing equipment, and per-cycle summaries of battery performance.


## Add Data Source
Tell MDF where to get the data from

In [8]:
client.add_data_source(f'globus://{source_endpoint}{data_path.absolute()}')

TODO: Pull the submission trigger