# Publish Dataset to the MDF
Publishes our data to the Materials Data Facility

In [1]:
from mdf_connect_client import MDFConnectClient
from batdata import __version__ as bd_version
from shutil import rmtree
from pathlib import Path

Configuration

In [2]:
source_endpoint = '0c3771de-f236-11ec-b3c1-15403b7b75ed'  # Globus endpoint of my desktop
data_path = Path.cwd() / 'data'

## Clean the directories
Remove temporary files placed by Jupyter

In [3]:
for path in data_path.glob('*data/**/.ipynb_checkpoints'):
    rmtree(path)
    print(f'Removed: {path.relative_to(Path().cwd())}')

## Create the client
This will handle authentication with the Materials Data Facility

In [4]:
client = MDFConnectClient()

## Add basic metadata
Authors, titles, related publications

In [17]:
n_refined = len(list((data_path / 'hdf5' / 'refined').glob("*.h5")))
n_other = len(list((data_path / 'hdf5' / 'other').glob("*.h5")))
assert n_refined + n_other == 601

In [18]:
client.create_dc_block(
    'Dataset of NMC battery Tests from CAMP, 2023 Release',
    authors = [
        'Logan Ward',
        'Joseph Kubal',
        'Susan J. Babinec',
        'Wenquan Lu',
        'Allison Dunlop',
        'Steve Trask',
        'Bryant Polzin',
        'Andrew Jansen',
        'Noah H. Paulson'
    ],
    affiliations='Argonne National Laboratory',
    description='Collection of cycle life tests of Li-ion batteries from the '
                'Cell Analysis, Modeling, and Prototyping (CAMP) Facility at Argonne National Laboratory. '
                f'The dataset contains all {n_refined} cells used by Paulson et al. to study the effect of feature '
                'engineering on machine learning models to predict the life of batteries, which were selected because they '
                'have a graphite anode, used charging rates below 1C, and were tested for at least 100 cycles. '
                f'We also include {n_other} cells that '
                'failed to meet acceptence criteria for that study. '
                f'Each cell is stored in the HDF5 format of Argonne\'s Battery Data Toolkit v{bd_version}, '
                'which includes battery metadata, the raw signal from the testing equipment, and '
                'per-cycle summaries of battery performance.',
    related_dois=['10.1016/j.jpowsour.2022.231127']
)

In [19]:
client.set_source_name('camp_2023')

In [20]:
print(client.dc['descriptions'][0]['description'])

Collection of cycle life tests of Li-ion batteries from the Cell Analysis, Modeling, and Prototyping (CAMP) Facility at Argonne National Laboratory. The dataset contains all 300 cells used by Paulson et al. to study the effect of feature engineering on machine learning models to predict the life of batteries, which were selected because they have a graphite anode, used charging rates below 1C, and were tested for at least 100 cycles. We also include 301 cells that failed to meet acceptence criteria for that study. Each cell is stored in the HDF5 format of Argonne's Battery Data Toolkit v0.1.0, which includes battery metadata, the raw signal from the testing equipment, and per-cycle summaries of battery performance.


## Add Data Source
Tell MDF where to get the data from

In [9]:
client.add_data_source(f'globus://{source_endpoint}{data_path.absolute()}')

# Submit the Dataset
Send it to the Materials Data Facility to be Processed

In [10]:
client.add_organization("MDF Open")  # Mark that we want it to go to MDF

Print out what I submitted

In [11]:
client.get_submission()

{'dc': {'titles': [{'title': 'Dataset of NMC battery Tests from CAMP, 2023 Release'}],
  'creators': [{'creatorName': 'Ward, Logan',
    'familyName': 'Ward',
    'givenName': 'Logan',
    'affiliations': ['Argonne National Laboratory']},
   {'creatorName': 'Kubal, Joseph',
    'familyName': 'Kubal',
    'givenName': 'Joseph',
    'affiliations': ['Argonne National Laboratory']},
   {'creatorName': 'Babinec, Susan J.',
    'familyName': 'Babinec',
    'givenName': 'Susan J.',
    'affiliations': ['Argonne National Laboratory']},
   {'creatorName': 'Lu, Wenquan',
    'familyName': 'Lu',
    'givenName': 'Wenquan',
    'affiliations': ['Argonne National Laboratory']},
   {'creatorName': 'Dunlop, Allison',
    'familyName': 'Dunlop',
    'givenName': 'Allison',
    'affiliations': ['Argonne National Laboratory']},
   {'creatorName': 'Trask, Steve',
    'familyName': 'Trask',
    'givenName': 'Steve',
    'affiliations': ['Argonne National Laboratory']},
   {'creatorName': 'Polzin, Bryant'

In [12]:
result = client.submit_dataset(update=True)
result

{'source_id': 'camp_2023_v3.5',
 'success': True,
 'error': None,
 'status_code': 202}

In [16]:
client.check_status(result['source_id'])


Status of submission camp_2023_v3.5 (Dataset of NMC battery Tests from CAMP, 2023 Release)
Submitted by Logan Ward at 2023-01-25T19:57:56.015687Z

Submission initialization was successful: The following submissions will be cancelled: ['camp_2023_v3.4'].
Cancellation of previous submissions was successful.
Connect data download was successful: 9324 files will be grouped and extracted (from 0 archives).
Data transfer to primary destination was successful.
Metadata extraction was successful: 601 metadata records extracted out of 9324 file groups.
Dataset curation is in progress.
MDF Search ingestion has not started yet.
Data transfer to secondary destinations has not started yet.
MDF Publish publication has not started yet.
Citrine upload has not started yet.
Materials Resource Registration has not started yet.
Post-processing cleanup has not started yet.

This submission is still processing.

