In [1]:
import numpy as np
import os
import pandas as pd
import gzip
import pickle as pkl

from pymatgen import MPRester 
    # Look to gist.github or pymatgen docs for examples
from pymatgen import Composition
import json

from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers import composition as cf

# Obtain data from MAPI via RESTurl

In [2]:
# Keep private, scrub when publishing
APIkey = ''

In [3]:
mpr = MPRester(api_key=APIkey)

In [4]:
data = mpr.get_data('ZrAlNi',prop='density')
data1 = mpr.get_data('ZrAlNi')

In [5]:
mpr.query({'elements':{'$all':['Hf','Al','Ni']}}, properties=['material_id', 'density', 'pretty_formula'])

[{'density': 6.0990192647020605,
  'material_id': 'mp-1025457',
  'pretty_formula': 'HfAl5Ni2'},
 {'density': 9.372035219242056,
  'material_id': 'mp-1078767',
  'pretty_formula': 'HfAlNi'},
 {'density': 9.530657163365866,
  'material_id': 'mp-5748',
  'pretty_formula': 'HfAlNi2'}]

In [6]:
data1[0]['cif']

"# generated using pymatgen\ndata_ZrAlNi\n_symmetry_space_group_name_H-M   'P 1'\n_cell_length_a   3.50853034\n_cell_length_b   6.90772970\n_cell_length_c   6.90716467\n_cell_angle_alpha   120.00270371\n_cell_angle_beta   90.00000000\n_cell_angle_gamma   90.00000000\n_symmetry_Int_Tables_number   1\n_chemical_formula_structural   ZrAlNi\n_chemical_formula_sum   'Zr3 Al3 Ni3'\n_cell_volume   144.97034775\n_cell_formula_units_Z   3\nloop_\n _symmetry_equiv_pos_site_id\n _symmetry_equiv_pos_as_xyz\n  1  'x, y, z'\nloop_\n _atom_site_type_symbol\n _atom_site_label\n _atom_site_symmetry_multiplicity\n _atom_site_fract_x\n _atom_site_fract_y\n _atom_site_fract_z\n _atom_site_occupancy\n  Zr  Zr1  1  0.500000  0.592952  1.000000  1\n  Zr  Zr2  1  0.500000  0.407024  0.407037  1\n  Zr  Zr3  1  0.500000  0.999988  0.592963  1\n  Al  Al4  1  0.000000  0.249448  0.000000  1\n  Al  Al5  1  -0.000000  0.750544  0.750578  1\n  Al  Al6  1  -0.000000  0.999967  0.249422  1\n  Ni  Ni7  1  0.500000  0.0

# Try getting density from MAPI

In [7]:
with gzip.open('./datasets/density_data.pkl.gz', 'rb') as fd:
    densityData = pkl.load(fd)

In [8]:
# make set of all ternary systems, grab all MP alloys in same ternary
comps = densityData['comp']
compSet = set()

for x in comps:
    tern = frozenset(x.as_dict().keys())
    compSet.add(tern)

In [9]:
list(tern)

['Zr', 'Cu', 'Al', 'Ag']

In [10]:
cols = ['pretty_formula', 'density', 'material_id']
MAPIdf = pd.DataFrame(columns=cols)
dataDict = dict()
failLog = set()
for tern in compSet:
    #print(tern)
    matches = mpr.query({'elements':{'$all':list(tern)}}, properties=['material_id', 'density', 'pretty_formula'])
    if len(matches) == 0:
        #print('no data for: {}'.format(tern))
        failLog.add(tern)
    else:
        MAPIdf = MAPIdf.append(pd.DataFrame(matches))
        
print('Out of {} compositions, {} ternaries had no examples, found {} other items'.format(len(compSet), len(failLog), len(MAPIdf)))
        
# add Composition object
MAPIdf['comp'] = [Composition(x) for x in MAPIdf['pretty_formula']]

Out of 251 compositions, 183 ternaries had no examples, found 5026 other items


In [11]:
base_featurizer = MultipleFeaturizer([cf.Stoichiometry(), cf.ElementProperty.from_preset("magpie"),
                                 cf.ValenceOrbital(props=['avg']), cf.IonProperty(fast=True),
                                cf.YangSolidSolution(), cf.AtomicPackingEfficiency()])

In [12]:
if False:
    %%time
    X_mapi = base_featurizer.featurize_many(MAPIdf['comp'], ignore_errors=True)
    X_mapi = np.array(X_mapi)
    X_mapi.astype(float)
    print('Computed {} features'.format(X_mapi.shape[1]))

In [13]:
with gzip.open('./datasets/MAPI_density_data.pkl.gz', 'wb') as fp:
    pkl.dump(MAPIdf, fp)
#with gzip.open('./datasets/MAPI_density_features.pkl.gz', 'wb') as fx:
#    pkl.dump(X_mapi, fx)