In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matminer.datasets import load_dataset
from pymatgen.core.composition import Composition
from figrecipes import PlotlyFig

In [3]:
df = pd.read_csv("cleaned_data.csv")

In [4]:
#print(data)

## Visualizing the Data in Scatter Plots

In [5]:
#pf = PlotlyFig(data, mode='notebook')

# basic matrix:
#pf.scatter_matrix(cols=['Lowest distortion', 'Formation energy [eV/atom]', 'Band gap [eV]', 'Vacancy energy [eV/O atom]'])

## Adding composition-based features

In [6]:
from matminer.featurizers.conversions import StrToComposition
df = StrToComposition().featurize_dataframe(df, "Chemical formula")
df.head()

StrToComposition:   0%|          | 0/4914 [00:00<?, ?it/s]

Unnamed: 0,Chemical formula,A,B,Radius A [ang],Radius B [ang],Lowest distortion,Formation energy [eV/atom],Stability [eV/atom],Volume per atom [A^3/atom],Band gap [eV],a [ang],b [ang],c [ang],alpha [deg],beta [deg],gamma [deg],Vacancy energy [eV/O atom],composition
0,Ac2O3,Ac,Ac,1.12,1.12,cubic,-2.732,0.848,20.836,0.332,4.705,4.705,4.705,90.0,90.0,90.0,3.15,"(Ac, O)"
1,AcAgO3,Ac,Ag,1.12,0.95,orthorhombic,-1.957,-0.055,14.485,0.0,5.779,6.077,8.248,90.0,90.0,90.0,0.817,"(Ac, Ag, O)"
2,AcAlO3,Ac,Al,1.12,0.54,cubic,-3.532,-0.11,11.487,4.307,3.858,3.858,3.858,90.0,90.0,90.0,6.695,"(Ac, Al, O)"
3,AcAsO3,Ac,As,1.12,0.52,orthorhombic,-2.398,0.224,14.355,0.0,5.78,6.012,8.262,90.0,90.0,90.0,3.634,"(Ac, As, O)"
4,AcAuO3,Ac,Au,1.12,0.93,orthorhombic,-2.006,-0.056,15.19,0.745,5.899,6.75,7.63,90.0,90.0,90.0,0.807,"(Ac, Au, O)"


In [None]:
# Lets plot offline (the default) first. An html file will be created.
#pf.xy((df['poisson_ratio'], df['elastic_anisotropy']), labels='formula')

In [7]:
df.to_csv("composition_feature.csv")

## Adding element-property features

In [8]:
from matminer.featurizers.composition import ElementProperty

ep_feat = ElementProperty.from_preset(preset_name="magpie")
df = ep_feat.featurize_dataframe(df, col_id="composition")  # input the "composition" column to the featurizer
df.head()

ElementProperty:   0%|          | 0/4914 [00:00<?, ?it/s]

Unnamed: 0,Chemical formula,A,B,Radius A [ang],Radius B [ang],Lowest distortion,Formation energy [eV/atom],Stability [eV/atom],Volume per atom [A^3/atom],Band gap [eV],...,MagpieData range GSmagmom,MagpieData mean GSmagmom,MagpieData avg_dev GSmagmom,MagpieData mode GSmagmom,MagpieData minimum SpaceGroupNumber,MagpieData maximum SpaceGroupNumber,MagpieData range SpaceGroupNumber,MagpieData mean SpaceGroupNumber,MagpieData avg_dev SpaceGroupNumber,MagpieData mode SpaceGroupNumber
0,Ac2O3,Ac,Ac,1.12,1.12,cubic,-2.732,0.848,20.836,0.332,...,0.0,0.0,0.0,0.0,12.0,225.0,213.0,97.2,102.24,12.0
1,AcAgO3,Ac,Ag,1.12,0.95,orthorhombic,-1.957,-0.055,14.485,0.0,...,0.0,0.0,0.0,0.0,12.0,225.0,213.0,97.2,102.24,12.0
2,AcAlO3,Ac,Al,1.12,0.54,cubic,-3.532,-0.11,11.487,4.307,...,0.0,0.0,0.0,0.0,12.0,225.0,213.0,97.2,102.24,12.0
3,AcAsO3,Ac,As,1.12,0.52,orthorhombic,-2.398,0.224,14.355,0.0,...,0.0,0.0,0.0,0.0,12.0,225.0,213.0,85.4,88.08,12.0
4,AcAuO3,Ac,Au,1.12,0.93,orthorhombic,-2.006,-0.056,15.19,0.745,...,0.0,0.0,0.0,0.0,12.0,225.0,213.0,97.2,102.24,12.0


In [9]:
ep_feat.citations()

['@article{ward_agrawal_choudary_wolverton_2016, title={A general-purpose machine learning framework for predicting properties of inorganic materials}, volume={2}, DOI={10.1038/npjcompumats.2017.28}, number={1}, journal={npj Computational Materials}, author={Ward, Logan and Agrawal, Ankit and Choudhary, Alok and Wolverton, Christopher}, year={2016}}']

In [10]:
df.to_csv("element_property.csv")

## Add more composition-based features

In [11]:
from matminer.featurizers.conversions import CompositionToOxidComposition
from matminer.featurizers.composition import OxidationStates

df = CompositionToOxidComposition().featurize_dataframe(df, "composition")

os_feat = OxidationStates()
df = os_feat.featurize_dataframe(df, "composition_oxid")
df.head()

CompositionToOxidComposition:   0%|          | 0/4914 [00:00<?, ?it/s]

OxidationStates:   0%|          | 0/4914 [00:00<?, ?it/s]

Unnamed: 0,Chemical formula,A,B,Radius A [ang],Radius B [ang],Lowest distortion,Formation energy [eV/atom],Stability [eV/atom],Volume per atom [A^3/atom],Band gap [eV],...,MagpieData maximum SpaceGroupNumber,MagpieData range SpaceGroupNumber,MagpieData mean SpaceGroupNumber,MagpieData avg_dev SpaceGroupNumber,MagpieData mode SpaceGroupNumber,composition_oxid,minimum oxidation state,maximum oxidation state,range oxidation state,std_dev oxidation state
0,Ac2O3,Ac,Ac,1.12,1.12,cubic,-2.732,0.848,20.836,0.332,...,225.0,213.0,97.2,102.24,12.0,"(Ac3+, O2-)",-2,3,5,3.535534
1,AcAgO3,Ac,Ag,1.12,0.95,orthorhombic,-1.957,-0.055,14.485,0.0,...,225.0,213.0,97.2,102.24,12.0,"(Ac3+, Ag3+, O2-)",-2,3,5,3.273268
2,AcAlO3,Ac,Al,1.12,0.54,cubic,-3.532,-0.11,11.487,4.307,...,225.0,213.0,97.2,102.24,12.0,"(Ac3+, Al3+, O2-)",-2,3,5,3.273268
3,AcAsO3,Ac,As,1.12,0.52,orthorhombic,-2.398,0.224,14.355,0.0,...,225.0,213.0,85.4,88.08,12.0,"(Ac3+, As3+, O2-)",-2,3,5,3.273268
4,AcAuO3,Ac,Au,1.12,0.93,orthorhombic,-2.006,-0.056,15.19,0.745,...,225.0,213.0,97.2,102.24,12.0,"(Ac3+, Au3+, O2-)",-2,3,5,3.273268


In [12]:
df.to_csv("more_composition_feat.csv")

## Adding some structure based features

In [16]:
from matminer.featurizers.structure import DensityFeatures

df_feat = DensityFeatures()
df = df_feat.featurize_dataframe(df, "Chemical formula")  # input the structure column to the featurizer
df.head()

DensityFeatures:   0%|          | 0/4914 [00:00<?, ?it/s]

AttributeError: 'str' object has no attribute 'density'
TO SKIP THESE ERRORS when featurizing specific compounds, set 'ignore_errors=True' when running the batch featurize() operation (e.g., featurize_many(), featurize_dataframe(), etc.).

In [17]:
df.to_csv("structure_feat.csv")