My data is in a JSON format right now. Let's put it into a form that the PC-GAN notebook/pytorch can read.

# Get raw data

In [10]:
import sys
import json

In [3]:
raw_data_folder = 'gaspy/raw_data/'

with open(raw_data_folder + 'co_data.json', 'r') as file_handle:
    co_documents = json.load(file_handle)
with open(raw_data_folder + 'h_data.json', 'r') as file_handle:
    h_documents = json.load(file_handle)

documents = {'CO': co_documents, 'H': h_documents}

In [4]:
docs[0]

{u'adsorbate': u'CO',
 u'atoms': {u'atoms': [{u'charge': 0.0,
    u'index': 0,
    u'magmom': 0.0,
    u'momentum': [0.0, 0.0, 0.0],
    u'position': [4.950828302364764, 7.456137729876762, 20.64137890207189],
    u'symbol': u'C',
    u'tag': 1},
   {u'charge': 0.0,
    u'index': 1,
    u'magmom': 0.0,
    u'momentum': [0.0, 0.0, 0.0],
    u'position': [4.937282334162113, 7.478494685664874, 21.81204637521394],
    u'symbol': u'O',
    u'tag': 1},
   {u'charge': 0.0,
    u'index': 2,
    u'magmom': 0.0,
    u'momentum': [0.0, 0.0, 0.0],
    u'position': [4.62461883579917, 8.797517786901787, 11.609944634852008],
    u'symbol': u'Ca',
    u'tag': 0},
   {u'charge': 0.0,
    u'index': 3,
    u'magmom': 0.0,
    u'momentum': [0.0, 0.0, 0.0],
    u'position': [6.645185404401976, 3.6846857888129585, 14.482524119130769],
    u'symbol': u'Ca',
    u'tag': 0},
   {u'charge': 0.0,
    u'index': 4,
    u'magmom': 0.0,
    u'momentum': [0.0, 0.0, 0.0],
    u'position': [0.779673721372297, 8.87095788

# Save processed data

In [6]:
elements = {'H': 1, 'He': 2, 'Li': 3, 'Be': 4, 'B': 5, 'C': 6, 'N': 7, 'O': 8, 'F': 9, 'Ne': 10,
            'Na': 11, 'Mg': 12, 'Al': 13, 'Si': 14, 'P': 15, 'S': 16, 'Cl': 17, 'Ar': 18,
            'K': 19, 'Ca': 20, 'Sc': 21, 'Ti': 22, 'V': 23, 'Cr': 24, 'Mn': 25, 'Fe': 26,
            'Co': 27, 'Ni': 28, 'Cu': 29, 'Zn': 30, 'Ga': 31, 'Ge': 32, 'As': 33, 'Se': 34,
            'Br': 35, 'Kr': 36, 'Rb': 37, 'Sr': 38, 'Y': 39, 'Zr': 40, 'Nb': 41, 'Mo': 42,
            'Tc': 43, 'Ru': 44, 'Rh': 45, 'Pd': 46, 'Ag': 47, 'Cd': 48, 'In': 49, 'Sn': 50,
            'Sb': 51, 'Te': 52, 'I': 53, 'Xe': 54, 'Cs': 55, 'Ba': 56, 'La': 57, 'Ce': 58,
            'Pr': 59, 'Nd': 60, 'Pm': 61, 'Sm': 62, 'Eu': 63, 'Gd': 64, 'Tb': 65, 'Dy': 66,
            'Ho': 67, 'Er': 68, 'Tm': 69, 'Yb': 70, 'Lu': 71, 'Hf': 72, 'Ta': 73, 'W': 74,
            'Re': 75, 'Os': 76, 'Ir': 77, 'Pt': 78, 'Au': 79, 'Hg': 80, 'Tl': 81, 'Pb': 82,
            'Bi': 83, 'Po': 84, 'At': 85, 'Rn': 86, 'Fr': 87, 'Ra': 88, 'Ac': 89, 'Th': 90,
            'Pa': 91, 'U': 92, 'Np': 93, 'Pu': 94, 'Am': 95}

In [30]:
data_folder = 'gaspy/data/'

for adsorbate, docs in documents.iteritems():
    for doc in docs:
        subfolder_prefix = doc['adsorbate']
        doc_hash = str(hash(json.dumps(doc, sort_keys=True)) % ((sys.maxsize + 1) * 2))

        # Write the relaxed structures
        subfolder = subfolder_prefix + '_relaxed/'
        pts_fname = data_folder + subfolder + 'points/' + doc_hash + '.pts'
        with open(pts_fname, 'w') as file_handle:
            for atom in doc['atoms']['atoms']:
                file_handle.write(' '.join(map(str, atom['position'])) + '\n')
        labels_fname = data_folder + subfolder + 'points_label/' + doc_hash + '.seg'
        with open(labels_fname, 'w') as file_handle:
            for atom in doc['atoms']['atoms']:
                element_num = elements[atom['symbol']]
                file_handle.write(str(element_num) + '\n')

        # Write the unrelaxed structures
        subfolder = subfolder_prefix + '_unrelaxed/'
        pts_fname = data_folder + subfolder + 'points/' + doc_hash + '.pts'
        with open(pts_fname, 'w') as file_handle:
            for atom in doc['initial_configuration']['atoms']['atoms']:
                file_handle.write(' '.join(map(str, atom['position'])) + '\n')
        labels_fname = data_folder + subfolder + 'points_label/' + doc_hash + '.seg'
        with open(labels_fname, 'w') as file_handle:
            for atom in doc['initial_configuration']['atoms']['atoms']:
                element_num = elements[atom['symbol']]
                file_handle.write(str(element_num) + '\n')