In [2]:
from utility.utility_general import *
from collections import Counter
# from predict_synthesis import *
import requests
from time import sleep
import tqdm
import crystals_tools
from IPython.display import display
from ase.formula import Formula
from ase.io import read as ase_read
from io import StringIO

from utility import util_tf


In [None]:
from pymatgen.ext.matproj import MPRester
api_key = "MCpqDZgh29W9r6X4Ru"

mpr: MPRester = MPRester(api_key)

def get_battery_data(self, formula_or_batt_id):
    """Returns batteries from a batt id or formula.

    Examples:
        get_battery("mp-300585433")
        get_battery("LiFePO4")
    """
    return self._make_request('/battery/%s' % formula_or_batt_id)

MPRester.get_battery_data = get_battery_data

In [5]:
data = mpr.get_data('V2O5')

In [3]:
# save_var(data, 'tmp/V2O5_raw_mp.pkl') 
data = load_var('tmp/V2O5_raw_mp.pkl') # 58 data points

In [3]:
print('Number of collected data: ', len(data))

Number of collected data:  58


In [4]:
experimentally_exists = ['mp-25279', 'mp-754670', 'mp-25288', 'mp-510568', 
                         'mp-1104615', 'mp-542844', 'mp-1178826', 'mp-624689']

In [5]:
df = pd.DataFrame({
    'query': data
})

for col in ['energy_per_atom', 'e_above_hull', 'material_id', 'full_formula']:
    df[col] = [i[col] for i in df['query']]
    
df['sgn'] = [i['spacegroup']['number'] for i in df['query']]
df['hall'] = [i['spacegroup']['hall'] for i in df['query']]
df['crystal_system'] = [i['spacegroup']['crystal_system'] for i in df['query']]
df['symbol'] = [i['spacegroup']['symbol'] for i in df['query']]

df['experiment'] = False
df.loc[df['material_id'].isin(experimentally_exists), 'experiment'] = True
df = df.sort_values('experiment', ascending=False)
df['atoms'] = [ase_read(StringIO(i['cif']), format='cif') for i in df['query']]

df

Unnamed: 0,query,energy_per_atom,e_above_hull,material_id,full_formula,sgn,hall,crystal_system,symbol,experiment,atoms
29,"{'energy': -103.63725465, 'energy_per_atom': -...",-7.402661,0.039592,mp-1104615,V4O10,2,-P 1,triclinic,P-1,True,"(Atom('V', [2.8973492269345766, 5.333355758664..."
51,"{'energy': -208.02069668, 'energy_per_atom': -...",-7.429311,0.012942,mp-25288,V8O20,62,-P 2ac 2n,orthorhombic,Pnma,True,"(Atom('V', [2.7144735, 8.907559839399, 10.1779..."
14,"{'energy': -102.32647035, 'energy_per_atom': -...",-7.309034,0.133219,mp-624689,V4O10,59,P 2 2ab -1ab,orthorhombic,Pmmn,True,"(Atom('V', [1.4421545, 3.735567159696, 1.78359..."
46,"{'energy': -103.3426698, 'energy_per_atom': -7...",-7.381619,0.060634,mp-542844,V4O10,15,-C 2yc,monoclinic,C2/c,True,"(Atom('V', [7.656589140310443, 4.3194922300652..."
47,"{'energy': -51.20295963, 'energy_per_atom': -7...",-7.314709,0.127544,mp-1178826,V2O5,12,-C 2y,monoclinic,C2/m,True,"(Atom('V', [1.9628695893368011, 0.496278715538..."
1,"{'energy': -104.06382799, 'energy_per_atom': -...",-7.433131,0.009122,mp-754670,V4O10,59,P 2 2ab -1ab,orthorhombic,Pmmn,True,"(Atom('V', [0.0, 0.531171342919, 1.70007574128..."
55,"{'energy': -104.19153977, 'energy_per_atom': -...",-7.442253,0.0,mp-25279,V4O10,59,P 2 2ab -1ab,orthorhombic,Pmmn,True,"(Atom('V', [0.0, 2.9346624387689997, 7.5026582..."
56,"{'energy': -103.77266521, 'energy_per_atom': -...",-7.412333,0.02992,mp-510568,V4O10,11,-P 2yb,monoclinic,P2_1/m,True,"(Atom('V', [2.7107849999999996, 5.174873298553..."
52,"{'energy': -774.34388625, 'energy_per_atom': -...",-7.374704,0.067549,mp-1245120,V30O75,1,P 1,triclinic,P1,False,"(Atom('V', [12.919200685363656, 11.44055405050..."
40,"{'energy': -764.45962847, 'energy_per_atom': -...",-7.280568,0.537399,mp-1245283,V30O75,1,P 1,triclinic,P1,False,"(Atom('V', [0.6849783562529564, 0.199785116458..."


In [6]:
print('Number of duplicates:', len(df[df.duplicated(['symbol', 'full_formula'])]))

Number of duplicates: 44


# Evaluations

## CAE+MLP

In [7]:
import tensorflow as tf
util_tf.tf_select_gpu(0)

Total Num. GPUs Available:  6
6 Physical GPUs, 1 Logical GPU

Selected resources:
/physical_device:CPU:0
/physical_device:GPU:0


In [8]:
from tensorflow.keras.models import load_model
import config
import image_generator

encoder = load_model(config.cae_mlp_model_dir + '/encoder.h5')
encoder.load_weights(config.cae_mlp_model_dir + '/encoder_weights.h5')

generator = image_generator.ImageGeneratorDataFrame(df[['atoms']], target_col='atoms')
with tf.device('/device:GPU:0'):
    lsr = encoder.predict(
        generator, 
        steps=len(generator),
        verbose=1,
#         use_multiprocessing=True,
#         workers=32,
#         max_queue_size=10,
                                 )

clf = load_var(config.cae_mlp_model_clf_dir + '/classifier_class.pkl')
yp = clf.predict_proba(lsr)
df['yp cae-mlp'] = yp.flatten()
df['ypl cae-mlp'] = np.sign(np.sign(yp.flatten() - 0.5) + .5)



## CNN

In [9]:
from keras.models import load_model
import config

cnn_model = load_model(config.cnn_model_dir + '/model.h5')
cnn_model.load_weights(config.cnn_model_dir + '/weights0006.h5')


generator = image_generator.ImageGeneratorDataFrame(df[['atoms']], target_col='atoms')
yp = cnn_model.predict(
    generator, 
    steps=len(generator),
    verbose=1,
#     use_multiprocessing=True,
#     workers=32,
#     max_queue_size=10,
                             )
df['yp cnn'] = yp.flatten()
df['ypl cnn'] = np.sign(np.sign(yp.flatten() - 0.5) + .5)



In [11]:
df.to_pickle('tmp/V2O5.pkl')

In [6]:
df = pd.read_pickle('tmp/V2O5.pkl')

Unnamed: 0,query,energy_per_atom,e_above_hull,material_id,full_formula,sgn,hall,crystal_system,symbol,experiment,atoms,yp cae-mlp,ypl cae-mlp,yp cnn,ypl cnn
29,"{'energy': -103.63725465, 'energy_per_atom': -...",-7.402661,0.039592,mp-1104615,V4O10,2,-P 1,triclinic,P-1,True,"(Atom('V', [2.8973492269345766, 5.333355758664...",0.999997,1.0,0.999844,1.0
51,"{'energy': -208.02069668, 'energy_per_atom': -...",-7.429311,0.012942,mp-25288,V8O20,62,-P 2ac 2n,orthorhombic,Pnma,True,"(Atom('V', [2.7144735, 8.907559839399, 10.1779...",0.99837,1.0,0.998334,1.0
14,"{'energy': -102.32647035, 'energy_per_atom': -...",-7.309034,0.133219,mp-624689,V4O10,59,P 2 2ab -1ab,orthorhombic,Pmmn,True,"(Atom('V', [1.4421545, 3.735567159696, 1.78359...",0.9997,1.0,0.978988,1.0
46,"{'energy': -103.3426698, 'energy_per_atom': -7...",-7.381619,0.060634,mp-542844,V4O10,15,-C 2yc,monoclinic,C2/c,True,"(Atom('V', [7.656589140310443, 4.3194922300652...",0.408365,-1.0,0.314881,-1.0
47,"{'energy': -51.20295963, 'energy_per_atom': -7...",-7.314709,0.127544,mp-1178826,V2O5,12,-C 2y,monoclinic,C2/m,True,"(Atom('V', [1.9628695893368011, 0.496278715538...",0.998105,1.0,0.778465,1.0
1,"{'energy': -104.06382799, 'energy_per_atom': -...",-7.433131,0.009122,mp-754670,V4O10,59,P 2 2ab -1ab,orthorhombic,Pmmn,True,"(Atom('V', [0.0, 0.531171342919, 1.70007574128...",0.990838,1.0,0.999984,1.0
55,"{'energy': -104.19153977, 'energy_per_atom': -...",-7.442253,0.0,mp-25279,V4O10,59,P 2 2ab -1ab,orthorhombic,Pmmn,True,"(Atom('V', [0.0, 2.9346624387689997, 7.5026582...",0.999971,1.0,0.999839,1.0
56,"{'energy': -103.77266521, 'energy_per_atom': -...",-7.412333,0.02992,mp-510568,V4O10,11,-P 2yb,monoclinic,P2_1/m,True,"(Atom('V', [2.7107849999999996, 5.174873298553...",0.524704,1.0,0.94762,1.0
52,"{'energy': -774.34388625, 'energy_per_atom': -...",-7.374704,0.067549,mp-1245120,V30O75,1,P 1,triclinic,P1,False,"(Atom('V', [12.919200685363656, 11.44055405050...",0.999988,1.0,0.999726,1.0
40,"{'energy': -764.45962847, 'energy_per_atom': -...",-7.280568,0.537399,mp-1245283,V30O75,1,P 1,triclinic,P1,False,"(Atom('V', [0.6849783562529564, 0.199785116458...",1.0,1.0,0.999795,1.0
