In [1]:
from __future__ import annotations

import pandas as pd

from maml.describers import SiteElementProperty
from maml.models import AtomSets

# the data contains the MP formation energy data
# 100 formula - formation energy
df = pd.read_csv("./mp_ef_100.csv")

## Use MEGNet element features for MP Ef regression

In [2]:
from megnet.data.crystal import get_elemental_embeddings
from pymatgen.core import Element

embeddings = get_elemental_embeddings()
embeddings = {Element(i).Z: j for i, j in embeddings.items()}

In [3]:
describer = SiteElementProperty(embeddings, output_weights=True)
model = AtomSets(
    describer=describer,  # output weights gives element fractions
    is_embedding=False,  # we are using the pre-trained element embeddings
    compile_metrics=["mae"],
    loss="mse",
    is_classification=False,
)

2021-10-28 09:11:21.677552: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
features = describer.transform(df["formula"][:80])

In [5]:
model.fit(features, df["Ef"][:80], epochs=10)

2021-10-28 09:11:21.883447: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f8911b25850>

In [6]:
test_features = describer.transform(df["formula"][80:])
loss, metric = model.evaluate(test_features, df["Ef"][80:], True)



In [7]:
print(f"The MAE is {metric:.3f} eV/atom")

The MAE is 1.413 eV/atom


## No transfer learning for Ef regression

In [8]:
describer = SiteElementProperty(output_weights=True)
model = AtomSets(describer=describer, is_embedding=True, compile_metrics=["mae"], loss="mse", is_classification=False)

In [9]:
features = describer.transform(df["formula"][:80])
model.fit(features, df["Ef"][:80], epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f8913278490>

In [10]:
test_features = describer.transform(df["formula"][80:])
loss, metric = model.evaluate(test_features, df["Ef"][80:], True)



In [11]:
print(f"The MAE is {metric:.3f} eV/atom")

The MAE is 1.859 eV/atom


## Use MEGNet element features for metallicity classification

In [16]:
df_metal = pd.read_csv("mp_is_metal_100.csv")

In [17]:
df_metal

Unnamed: 0,formula,is_metal
0,KMnO2,False
1,Cr3Ni,True
2,CsRbAs,True
3,SiSn,False
4,Ca3P2,False
...,...,...
95,K2RhAuF6,False
96,CsK2SbF6,False
97,K2LiTaCl6,True
98,K2NaTaCl6,True


In [18]:
describer = SiteElementProperty(embeddings, output_weights=True)
model = AtomSets(
    describer=describer,  # output weights gives element fractions
    is_embedding=False,  # we are using the pre-trained element embeddings
    compile_metrics=["AUC"],
    loss="binary_crossentropy",
    is_classification=True,
)

In [20]:
features = describer.transform(df["formula"][:80])
model.fit(features, df["is_metal"][:80], epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f89165edc10>

In [21]:
test_features = describer.transform(df["formula"][80:])
loss, metric = model.evaluate(test_features, df["is_metal"][80:], True)



In [22]:
print(f"The AUC is {metric:.3f}")

The AUC is 0.677


## No transfer learning for metallicity classification

In [23]:
describer = SiteElementProperty(output_weights=True)
model = AtomSets(
    describer=describer, is_embedding=True, compile_metrics=["AUC"], loss="binary_crossentropy", is_classification=True
)

In [24]:
features = describer.transform(df["formula"][:80])
model.fit(features, df["is_metal"][:80], epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f89174ac1c0>

In [25]:
test_features = describer.transform(df["formula"][80:])
loss, metric = model.evaluate(test_features, df["is_metal"][80:], True)



In [26]:
print(f"The AUC is {metric:.3f}")

The AUC is 0.525
