In [16]:
import numpy as np
import os
import pandas as pd
import gzip
import pickle as pkl
from itertools import combinations

from pymatgen import MPRester 
    # Look to gist.github or pymatgen docs for examples
from pymatgen import Composition
import json

from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers import composition as cf

# Obtain data from MAPI via RESTurl

In [2]:
# Keep private, scrub when publishing
APIkey = 'TDC0utdkGv6EMolT'

In [3]:
mpr = MPRester(api_key=APIkey)

In [4]:
data = mpr.get_data('ZrAlNi',prop='density')
data1 = mpr.get_data('ZrAlNi')

In [5]:
mpr.query(['Ag'][0], properties=['material_id', 'density', 'pretty_formula'])

[{'density': 9.909384947524844,
  'material_id': 'mp-8566',
  'pretty_formula': 'Ag'},
 {'density': 9.888476577052216,
  'material_id': 'mp-10597',
  'pretty_formula': 'Ag'},
 {'density': 9.948341484985864,
  'material_id': 'mp-124',
  'pretty_formula': 'Ag'},
 {'density': 9.922633071705896,
  'material_id': 'mp-989737',
  'pretty_formula': 'Ag'}]

In [9]:
mpr.query({'elements':{'$size':3, '$all':['Cu', 'Al', 'Ni']}}, properties=['material_id', 'density', 'pretty_formula','energy'])

[{'density': 0.39860429617432785,
  'energy': -10.46073226,
  'material_id': 'mp-1095860',
  'pretty_formula': 'Al2CuNi'},
 {'density': 0.6139380274508028,
  'energy': -10.63099917,
  'material_id': 'mp-1096066',
  'pretty_formula': 'AlCuNi2'},
 {'density': 5.785421822945918,
  'energy': -99.45244755000002,
  'material_id': 'mvc-2226',
  'pretty_formula': 'AlCu3(NiO3)4'},
 {'density': 5.783943989953349,
  'energy': -50.47093414,
  'material_id': 'mvc-7674',
  'pretty_formula': 'BaAlCuNiO5'},
 {'density': 5.042235453759157,
  'energy': -149.40831635,
  'material_id': 'mvc-864',
  'pretty_formula': 'Sr2AlCu2NiO7'}]

In [10]:
data1[0]

{'band_gap': 0.0,
 'cif': "# generated using pymatgen\ndata_ZrAlNi\n_symmetry_space_group_name_H-M   'P 1'\n_cell_length_a   3.50853034\n_cell_length_b   6.90772970\n_cell_length_c   6.90716467\n_cell_angle_alpha   120.00270371\n_cell_angle_beta   90.00000000\n_cell_angle_gamma   90.00000000\n_symmetry_Int_Tables_number   1\n_chemical_formula_structural   ZrAlNi\n_chemical_formula_sum   'Zr3 Al3 Ni3'\n_cell_volume   144.97034775\n_cell_formula_units_Z   3\nloop_\n _symmetry_equiv_pos_site_id\n _symmetry_equiv_pos_as_xyz\n  1  'x, y, z'\nloop_\n _atom_site_type_symbol\n _atom_site_label\n _atom_site_symmetry_multiplicity\n _atom_site_fract_x\n _atom_site_fract_y\n _atom_site_fract_z\n _atom_site_occupancy\n  Zr  Zr1  1  0.500000  0.592952  1.000000  1\n  Zr  Zr2  1  0.500000  0.407024  0.407037  1\n  Zr  Zr3  1  0.500000  0.999988  0.592963  1\n  Al  Al4  1  0.000000  0.249448  0.000000  1\n  Al  Al5  1  -0.000000  0.750544  0.750578  1\n  Al  Al6  1  -0.000000  0.999967  0.249422  1\n 

# Try getting density from MAPI

In [11]:
with gzip.open('./datasets/density_data.pkl.gz', 'rb') as fd:
    densityData = pkl.load(fd)

In [12]:
# make set of all ternary systems, grab all MP alloys in same ternary
comps = densityData['comp']
compSet = set()

def getAlloySet(data):
    '''Returns set of alloy element sets, iterating over 
        all items in ['composition'] column
        
    Args: 
        data (list-like): compositions
    Returns:
        (set): each item of the returned set is a set with strings 
                denoting element symbols
    '''
    alloys=set()
    
    for comp in data.tolist():
        curr = frozenset(comp.as_dict().keys())
        alloys.add(curr)
    
    return alloys

for x in comps: # Generate all ternaries to look through
    tern = frozenset(x.as_dict().keys())
    compSet.add(tern)

In [13]:
matches = mpr.query({'elements':{'$all':list(frozenset({'Ag', 'Hg', 'Au'}))}}, properties=['material_id', 'density', 'pretty_formula'])

In [14]:
issueComps = {frozenset({'Ag', 'Hg', 'Au'}), frozenset({'Al', 'Co','Rh'}), frozenset({'V', 'Cr', 'Mo'})}
matchedComps = {frozenset(Composition(x['pretty_formula']).as_dict().keys()) for x in matches}
print(issueComps)
print(matchedComps)
issueComps & matchedComps

{frozenset({'Rh', 'Al', 'Co'}), frozenset({'Au', 'Hg', 'Ag'}), frozenset({'Cr', 'Mo', 'V'})}
{frozenset({'Au', 'Hg', 'Ag'})}


{frozenset({'Ag', 'Au', 'Hg'})}

In [34]:
a = frozenset({'Ag','Hg','Au'})
b = frozenset({'Ag'})
comb = combinations(b, 1)
for i in comb:
    print(i)
    
frozenset(i)

('Ag',)


frozenset({'Ag'})

In [38]:
cols = ['pretty_formula', 'density', 'material_id']
MAPIdf = pd.DataFrame(columns=cols)
dataDict = dict()
failLog = set()

issueComps = {frozenset({'Ag', 'Hg', 'Au'}), frozenset({'Al', 'Co','Rh'}), frozenset({'V', 'Cr', 'Mo'})}

seenComps = set()

for comp in compSet:
    # Look for all data within the defined ternaries
    # iterate through all lengths of combinations
    for n in range(len(comp)):
        for c in combinations(comp, n): # single elements
            # look for composition with exact length of combination and containing elements in comb
            matches = mpr.query({'elements':{'$size':len(c), '$all':list(c)}}, 
                                properties=['material_id', 'density', 'pretty_formula'])
            seenComps.add(frozenset(c))

            # format and add to database
            if len(matches) == 0:
                #print('no data for: {}'.format(tern))
                failLog.add(tern)
            else:
                MAPIdf = MAPIdf.append(pd.DataFrame(matches))
      
print('Out of {} compositions, {} ternaries had no examples, found {} other items'.format(len(compSet), len(failLog), len(MAPIdf)))
        
# add Composition object
MAPIdf['comp'] = [Composition(x) for x in MAPIdf['pretty_formula']]

Out of 251 compositions, 1 ternaries had no examples, found 132 other items


In [39]:
MAPIdf

Unnamed: 0,density,material_id,pretty_formula,comp
0,8.902616,mp-998890,Cu,(Cu)
1,8.930286,mp-989695,Cu,(Cu)
2,8.935841,mp-989782,Cu,(Cu)
3,8.886169,mp-1010136,Cu,(Cu)
4,0.174855,mp-1056079,Cu,(Cu)
5,8.517518,mp-1059259,Cu,(Cu)
6,4.577375,mp-1120774,Cu,(Cu)
7,8.888270,mp-30,Cu,(Cu)
0,6.981483,mp-79,Zn,(Zn)
0,1.765008,mp-1056702,Mg,(Mg)


In [41]:
mapiCompSet = getAlloySet(MAPIdf['comp'])

In [47]:
len(MAPIdf)

5026

In [54]:
base_featurizer = MultipleFeaturizer([cf.Stoichiometry(), cf.ElementProperty.from_preset("magpie"),
                                 cf.ValenceOrbital(props=['avg']), cf.IonProperty(fast=True),
                                cf.YangSolidSolution(), cf.AtomicPackingEfficiency()])

In [None]:
if True:
    %%time
    X_mapi = base_featurizer.featurize_many(MAPIdf['comp'], ignore_errors=True)
    X_mapi = np.array(X_mapi)
    X_mapi.astype(float)
    print('Computed {} features'.format(X_mapi.shape[1]))

Wall time: 0 ns


HBox(children=(IntProgress(value=0, description='MultipleFeaturizer', max=5026), HTML(value='')))

In [13]:
with gzip.open('./datasets/MAPI_density_data.pkl.gz', 'wb') as fp:
    pkl.dump(MAPIdf, fp)
#with gzip.open('./datasets/MAPI_density_features.pkl.gz', 'wb') as fx:
#    pkl.dump(X_mapi, fx)

In [14]:
matches = mpr.query({'nelements':3}, properties=['material_id', 'density', 'pretty_formula'])
len(matches)

41751