In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from evaluator import Evaluator

In [20]:
organic_desc_names = ['homo', 'lumo', 'dipole', 'atomization', 'r_gyr', 'total_mass']
cation_desc_names = ['electron_affinity', 'ionization_energy', 'mass', 'electronegativity']
anion_desc_names = ['electron_affinity', 'ionization_energy', 'mass', 'electronegativity']

organics = [
        'acetamidinium', 'ammonium', 'azetidinium', 'butylammonium', 'dimethylammonium',
        'ethylammonium', 'formamidinium', 'guanidinium', 'hydrazinium', 'hydroxylammonium',
        'imidazolium', 'isopropylammonium', 'methylammonium', 'propylammonium', 'tetramethylammonium', 'trimethylammonium',
]
cations  = ['Ge', 'Sn', 'Pb']
anions   = ['F', 'Cl', 'Br', 'I']


In [21]:
# build CP space
vectors = []
for o in organics:
    for c in cations:
        for a in anions:
            vectors.append({'organic': [o], 'cation': [c], 'anion': [a]})
        

In [22]:
vectors[0]

{'organic': ['acetamidinium'], 'cation': ['Ge'], 'anion': ['F']}

In [24]:
# organic descriptors (homo, lumo, dipole, atomization, r_gyr, total_mass)
# anion descriptors (electron_affinity, ionization_energy, mass, electronegativity)
# cation descriptors (electron_affinity, ionization_energy, mass, electronegativity)

all_data = []

for vector in vectors:
    eval_exp = Evaluator(type='slow')
    val_exp, desc_exp = eval_exp(vector)
    
    
    eval_cheap = Evaluator(type='fast')
    val_cheap, desc_cheap = eval_cheap(vector)
    
    
    organic_desc_vals = desc_exp[:6]
    anion_desc_vals = desc_exp[6:10]
    cation_desc_vals = desc_exp[10:]
    
    data_point = {}
    data_point['organic'] = vector['organic'][0]
    data_point['anion'] = vector['anion'][0]
    data_point['cation'] = vector['cation'][0]
    
    data_point['hse06'] = val_exp
    data_point['gga'] = val_cheap
    
    for val, name in zip(organic_desc_vals, organic_desc_names):
        data_point['-'.join(['organic', name])] = val
    for val, name in zip(anion_desc_vals, anion_desc_names):
        data_point['-'.join(['anion', name])] = val
    for val, name in zip(cation_desc_vals, cation_desc_names):
        data_point['-'.join(['cation', name])] = val
        
    all_data.append(data_point)
    
    

In [25]:
df = pd.DataFrame(all_data)
print(df.shape)
df.head()

(192, 19)


Unnamed: 0,organic,anion,cation,hse06,gga,organic-homo,organic-lumo,organic-dipole,organic-atomization,organic-r_gyr,organic-total_mass,anion-electron_affinity,anion-ionization_energy,anion-mass,anion-electronegativity,cation-electron_affinity,cation-ionization_energy,cation-mass,cation-electronegativity
0,acetamidinium,F,Ge,5.1528,3.8145,-0.49721,-0.224,1.3334,-1.725934,1.344149,59.092,3.401189,17.42282,18.998,4.0,1.232676,7.8994,72.63,1.8
1,acetamidinium,Cl,Ge,3.7624,2.8878,-0.49721,-0.224,1.3334,-1.725934,1.344149,59.092,3.612724,12.96764,35.45,3.0,1.232676,7.8994,72.63,1.8
2,acetamidinium,Br,Ge,2.9861,2.2149,-0.49721,-0.224,1.3334,-1.725934,1.344149,59.092,3.363588,11.81381,79.904,2.8,1.232676,7.8994,72.63,1.8
3,acetamidinium,I,Ge,2.3129,1.6986,-0.49721,-0.224,1.3334,-1.725934,1.344149,59.092,3.059046,10.45126,126.9,2.5,1.232676,7.8994,72.63,1.8
4,acetamidinium,F,Sn,4.3173,3.2079,-0.49721,-0.224,1.3334,-1.725934,1.344149,59.092,3.401189,17.42282,18.998,4.0,1.11207,7.3439,118.71,1.8


In [26]:
pickle.dump(df, open('perovskites.pkl', 'wb'))