In [1]:
import pandas as pd
import numpy as np
import pymatgen as mg
import json
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from pymatgen.analysis.local_env import JMolNN
from itertools import combinations
%matplotlib inline

In [2]:
def load(index, kind='train'):
    with open('data/{}/{}/geometry.json'.format(kind, index)) as f:
        d = json.load(f)
        structure = mg.core.Structure.from_dict(d)
    return structure

In [3]:
structure = load(1); structure

Structure Summary
Lattice
    abc : 9.9522999999999993 8.5512999999999995 9.1775000000000002
 angles : 90.002600000000001 90.002300000000005 90.0017
 volume : 781.05208091340478
      A : 9.9522999919813007 0.0 -0.00039951092713025771
      B : -0.00025373775587410888 8.5512999874310154 -0.0003880456847134828
      C : 0.0 0.0 9.1775000000000002
PeriodicSite: Ga (1.6089, 7.2764, 6.3832) [0.1617, 0.8509, 0.6956]
PeriodicSite: Al (6.5849, 7.2528, 6.3658) [0.6617, 0.8482, 0.6937]
PeriodicSite: Al (3.4358, 1.2598, 1.7946) [0.3452, 0.1473, 0.1956]
PeriodicSite: Ga (8.4118, 1.2362, 1.7773) [0.8452, 0.1446, 0.1937]
PeriodicSite: Ga (0.9559, 2.9989, 1.8014) [0.0961, 0.3507, 0.1963]
PeriodicSite: Al (5.9319, 2.9753, 1.7840) [0.5960, 0.3479, 0.1944]
PeriodicSite: Al (4.0888, 5.5373, 6.3764) [0.4109, 0.6475, 0.6948]
PeriodicSite: Al (9.0648, 5.5136, 6.3591) [0.9108, 0.6448, 0.6930]
PeriodicSite: Al (0.9172, 5.6224, -0.0094) [0.0922, 0.6575, -0.0010]
PeriodicSite: Ga (5.8933, 5.5988, -0.0267) [0.5

In [4]:
df_train = pd.read_csv('data/train.csv')

In [5]:
row = df_train.iloc[0, :]; row

id                             1.0000
spacegroup                    33.0000
number_of_total_atoms         80.0000
percent_atom_al                0.6250
percent_atom_ga                0.3750
percent_atom_in                0.0000
lattice_vector_1_ang           9.9523
lattice_vector_2_ang           8.5513
lattice_vector_3_ang           9.1775
lattice_angle_alpha_degree    90.0026
lattice_angle_beta_degree     90.0023
lattice_angle_gamma_degree    90.0017
formation_energy_ev_natom      0.0680
bandgap_energy_ev              3.4387
Name: 0, dtype: float64

In [9]:
from pymatgen.core.periodic_table import Element

In [26]:
df_elements = pd.DataFrame([Element(sym).data for sym in ['Al', 'Ga', 'In', 'O']])
use_cols = ['Atomic mass', 'Atomic radius', 'Boiling point', 'Common oxidation states', 'Ionic radii', 'Liquid range', 'Melting point', 'Molar volume', 'Thermal conductivity', 'Van der waals radius', 'Velocity of sound', 'X', ]
df_elements[use_cols]
# IE1, IE2, electro affinity, covalent bond radius, pesudo potential radius, density, heat of fusion, heat of sublimation, specific heat
# HOMO, LUMO, 

Unnamed: 0,Atomic mass,Atomic radius,Boiling point,Common oxidation states,Ionic radii,Liquid range,Melting point,Molar volume,Thermal conductivity,Van der waals radius,Velocity of sound,X
0,26.981539,1.25,2792 K,[3],{'3': 0.675},1858.53 K,933.47 K,10.00 cm<sup>3</sup>,235 W m<sup>-1</sup> K<sup>-1</sup>,1.84,5100 m s<sup>-1</sup>,1.61
1,69.723,1.3,2477 K,[3],{'3': 0.76},2174.09 K,302.91 K,11.80 cm<sup>3</sup>,29 W m<sup>-1</sup> K<sup>-1</sup>,1.87,2740 m s<sup>-1</sup>,1.81
2,114.818,1.55,2345 K,[3],{'3': 0.94},1915.25 K,429.75 K,15.76 cm<sup>3</sup>,82 W m<sup>-1</sup> K<sup>-1</sup>,1.93,1215 m s<sup>-1</sup>,1.78
3,15.9994,0.6,90.2 K,[-2],{'-2': 1.26},35.4 K,54.8 K,17.36 cm<sup>3</sup>,0.02658 W m<sup>-1</sup> K<sup>-1</sup>,1.52,317.5 m s<sup>-1</sup>,3.44


In [31]:
df_sk = pd.read_csv('data/elemental_data.csv', index_col=0)
list_elements = ['Al', 'Ga', 'In', 'O']
df_sk.loc[list_elements, :]

Unnamed: 0,bp,cp_g,cp_mol,e_fusion,e_vapor,ea,group,ie1,ie2,kai-a,kai-p,mass,mol_vol,mp,period,ratom,rcov,rho,rps-d,rps-p,rps-s,rvdw,thermal_cond,z
Al,2792.0,0.897,24.2,10.7,290.8,42.5,13.0,577.5,1816.7,1.613,1.61,26.9815,1e-05,933.47,3.0,118.0,1.24,2.7,,0.905,0.77,1.84,2.37,13.0
Ga,2477.0,0.373,26.03,5.59,256.06,28.9,13.0,578.8,1979.3,1.756,1.81,69.723,1.2e-05,302.91,4.0,136.0,1.23,5.904,0.17,0.935,0.76,1.87,0.406,31.0
In,2345.0,0.233,26.74,3.26,226.35,28.9,13.0,558.3,1820.7,1.656,1.78,114.818,1.6e-05,429.75,5.0,156.0,1.42,7.31,0.36,1.11,0.94,1.93,0.816,49.0
O,90.2,0.918,29.378,0.222,3.4109,141.0,16.0,1313.9,3388.3,3.61,3.44,15.9994,0.011196,54.8,2.0,48.0,0.64,1.429,,0.18,0.285,1.52,0.000267,8.0
