In [12]:
%matplotlib inline
from matplotlib import pyplot as plt
from matminer.featurizers.base import MultipleFeaturizer, StackedFeaturizer
from matminer.featurizers import composition as cf
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import GridSearchCV, ShuffleSplit, LeaveOneGroupOut, cross_val_score, learning_curve, KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer, LabelEncoder
from sklearn.metrics import roc_curve, auc, r2_score, make_scorer
from sklearn import metrics
from pymatgen import Composition
import pickle as pkl
import pandas as pd
import numpy as np
import gzip
import os
import copy

In [14]:
base_featurizer = MultipleFeaturizer([cf.Stoichiometry(), cf.ElementProperty.from_preset("magpie"),
                                 cf.ValenceOrbital(props=['avg']), cf.IonProperty(fast=True),
                                cf.YangSolidSolution(), cf.AtomicPackingEfficiency()])

In [22]:
# No errors in values
compValid = Composition('Zr55Al10')
print(compValid)
Xvalid = base_featurizer.featurize(compValid)
Xvalid

Zr55 Al10


array([ 2.00000000e+00,  8.60026145e-01,  8.47845735e-01,  8.46187469e-01,
        8.46154640e-01,  8.46153849e-01,  1.30000000e+01,  4.00000000e+01,
        2.70000000e+01,  3.58461538e+01,  7.02958580e+00,  4.00000000e+01,
        4.40000000e+01,  7.30000000e+01,  2.90000000e+01,  4.84615385e+01,
        7.55029586e+00,  4.40000000e+01,  2.69815386e+01,  9.12240000e+01,
        6.42424614e+01,  8.13405444e+01,  1.67258479e+01,  9.12240000e+01,
        9.33470000e+02,  2.12800000e+03,  1.19453000e+03,  1.94422615e+03,
        3.11001893e+02,  2.12800000e+03,  4.00000000e+00,  1.30000000e+01,
        9.00000000e+00,  5.38461538e+00,  2.34319527e+00,  4.00000000e+00,
        3.00000000e+00,  5.00000000e+00,  2.00000000e+00,  4.69230769e+00,
        5.20710059e-01,  5.00000000e+00,  1.21000000e+02,  1.75000000e+02,
        5.40000000e+01,  1.66692308e+02,  1.40591716e+01,  1.75000000e+02,
        1.33000000e+00,  1.61000000e+00,  2.80000000e-01,  1.37307692e+00,
        7.28994083e-02,  

In [20]:
compSingle = Composition('Ti1')
Xsingle = base_featurizer.featurize(compSingle)


invalid value encountered in double_scalars



ValueError: Found array with 0 sample(s) (shape=(0, 103)) while a minimum of 1 is required.

In [19]:
# Single inf value
compAlloy = Composition('Fe1 W1')
Xalloy = base_featurizer.featurize(compAlloy)
Xalloy


divide by zero encountered in double_scalars



array([ 2.00000000e+00,  7.07106781e-01,  6.29960525e-01,  5.74349177e-01,
        5.52044757e-01,  5.35886731e-01,  2.60000000e+01,  7.40000000e+01,
        4.80000000e+01,  5.00000000e+01,  2.40000000e+01,  2.60000000e+01,
        5.10000000e+01,  5.50000000e+01,  4.00000000e+00,  5.30000000e+01,
        2.00000000e+00,  5.10000000e+01,  5.58450000e+01,  1.83840000e+02,
        1.27995000e+02,  1.19842500e+02,  6.39975000e+01,  5.58450000e+01,
        1.81100000e+03,  3.69500000e+03,  1.88400000e+03,  2.75300000e+03,
        9.42000000e+02,  1.81100000e+03,  6.00000000e+00,  8.00000000e+00,
        2.00000000e+00,  7.00000000e+00,  1.00000000e+00,  6.00000000e+00,
        4.00000000e+00,  6.00000000e+00,  2.00000000e+00,  5.00000000e+00,
        1.00000000e+00,  4.00000000e+00,  1.32000000e+02,  1.62000000e+02,
        3.00000000e+01,  1.47000000e+02,  1.50000000e+01,  1.32000000e+02,
        1.83000000e+00,  2.36000000e+00,  5.30000000e-01,  2.09500000e+00,
        2.65000000e-01,  

In [21]:
# Single nan value?  
compAlloy2 = Composition('Zr64.13 Al10 Cu15.75 Ni10.12 S2')
print(compAlloy2)
Xalloy2 = base_featurizer.featurize(compAlloy2)
Xalloy2

Zr64.13 Al10 Cu15.75 Ni10.12 S2


array([5.00000000e+00, 6.62554634e-01, 6.33419438e-01, 6.28861687e-01,
       6.28730751e-01, 6.28725542e-01, 1.30000000e+01, 4.00000000e+01,
       2.70000000e+01, 3.39932353e+01, 7.55321217e+00, 4.00000000e+01,
       4.40000000e+01, 8.80000000e+01, 4.40000000e+01, 5.24807843e+01,
       1.06641705e+01, 4.40000000e+01, 2.69815386e+01, 9.12240000e+01,
       6.42424614e+01, 7.62643845e+01, 1.88109832e+01, 9.12240000e+01,
       3.88360000e+02, 2.12800000e+03, 1.73964000e+03, 1.81815978e+03,
       3.89608889e+02, 2.12800000e+03, 4.00000000e+00, 1.60000000e+01,
       1.20000000e+01, 6.79382353e+00, 3.51309614e+00, 4.00000000e+00,
       3.00000000e+00, 5.00000000e+00, 2.00000000e+00, 4.51107843e+00,
       6.14794906e-01, 5.00000000e+00, 1.05000000e+02, 1.75000000e+02,
       7.00000000e+01, 1.56633627e+02, 2.30948132e+01, 1.75000000e+02,
       1.33000000e+00, 2.58000000e+00, 1.25000000e+00, 1.52752059e+00,
       2.48372457e-01, 1.33000000e+00, 1.00000000e+00, 2.00000000e+00,
      

In [28]:
base_featurizer.feature_labels()[-7]

'Yang omega'