In [1]:
from pandas import read_csv
import numpy as np

In [2]:
# Ordering of elements in the dataframe
# ['K', 'Rb', 'Cs', 'MA', 'FA', 'Ca', 'Sr', 'Ba', 'Ge', 'Sn', 'Pb', 'Cl', 'Br', 'I']

# Electronic and Ionic Polarizabilities of B site ions
# Ref : https://www.tandfonline.com/doi/suppl/10.1080/00268976.2018.1535143?scroll=top&role=tab
# Units are in 10^-24 cm3
# Electronic dipole polarizability of an atom is the response of an atoms elctric field to an 
# externally applied electric field.
# These are static dipole polarizabikities of neutral atoms
# Values in the reference table are given in atomic units. 1 AU = 0.14818471 * 1e-24 cm3
# Values in the below dictionary are given in cm3

dipole_polarizability_B = {'Ca':160.8, 
                           'Sr':197.2,
                           'Ba':272,
                           'Ge':40,
                           'Sn':53,
                           'Pb':47}

# Excluded
# 'Fe':55.85, 'Mg':24.31, 'Mn':54.94, 'F':18.99

In [3]:
# Data Frame functions

def insert_features(dataframe, col_idx, col_name, col_data):
    dataframe.insert(col_idx, col_name, col_data)
    
    return dataframe

In [4]:
which_csv = 'PBE_data'
df = read_csv(which_csv + '_orig.csv', header=0, skiprows=None)

# Properties added
# 1. Tolerance Factor
# 2. Octahedral Factor
# 3. Total mass of unit cell
# 4. A site molecules dipole moment
# 5. B and X site neutral atoms static dipole polarizability

# Get the coeffiecint data
all_coeffs = df.iloc[:, 5:19]
all_ions = list(all_coeffs.columns)
a_ions = all_ions[0:5]
print(a_ions)
a_ion_coeffs = all_coeffs.iloc[:, 0:5]
print(a_ion_coeffs)

b_ions = all_ions[5:11]
print(b_ions)
b_ion_coeffs = all_coeffs.iloc[:, 5:11]
print(b_ion_coeffs)

x_ions = all_ions[11:14]
print(x_ions)
x_ion_coeffs = all_coeffs.iloc[:, 11:14]
print(x_ion_coeffs)

# Insert B dipole polarizability
########################################################################

# Step 1 : Calculate avg property for MHP by multiplying propval with stoichiometric coeff
prop_vals = np.array(list(dipole_polarizability_B.values())).reshape(1,6)
coeffs_mat = b_ion_coeffs.to_numpy()
avg_prop_vals = np.dot(prop_vals, coeffs_mat.transpose()).transpose()

# Step 2 : Insert properties
df = insert_features(df, 43, 'B_electric_dipole_polarizability_neutral_atom', avg_prop_vals)

# Step 3 : Save to csv 
df.to_csv(which_csv + '.csv', header=True, index=False)

# Insert TF
########################################################################

tf = (df.A_ion_rad + df.X_ion_rad)/((2**0.5)*(df.B_ion_rad + df.X_ion_rad))

# Step 2 : Insert properties
df = insert_features(df, 56, 'Tolerance Factor', tf)

# Step 3 : Save to csv 
df.to_csv(which_csv + '.csv', header=True, index=False)

# Insert OF
########################################################################

of = df.B_ion_rad/df.X_ion_rad 

# Step 2 : Insert properties
df = insert_features(df, 57, 'Octahedral Factor', of)

# Step 3 : Save to csv 
df.to_csv(which_csv + '.csv', header=True, index=False)


['K', 'Rb', 'Cs', 'MA', 'FA']
       K   Rb   Cs   MA   FA
0    0.0  0.0  0.0  1.0  0.0
1    0.0  0.0  0.0  1.0  0.0
2    0.0  0.0  0.0  1.0  0.0
3    0.0  0.0  0.0  1.0  0.0
4    0.0  0.0  0.0  1.0  0.0
..   ...  ...  ...  ...  ...
489  0.0  0.0  0.0  0.0  1.0
490  0.0  0.0  0.0  0.0  1.0
491  0.0  0.0  0.0  1.0  0.0
492  0.0  0.0  0.0  1.0  0.0
493  0.0  0.0  0.0  0.0  1.0

[494 rows x 5 columns]
['Ca', 'Sr', 'Ba', 'Ge', 'Sn', 'Pb']
      Ca   Sr   Ba   Ge   Sn   Pb
0    0.0  0.0  0.0  1.0  0.0  0.0
1    0.0  0.0  0.0  1.0  0.0  0.0
2    0.0  0.0  0.0  1.0  0.0  0.0
3    0.0  0.0  0.0  0.0  1.0  0.0
4    0.0  0.0  0.0  0.0  1.0  0.0
..   ...  ...  ...  ...  ...  ...
489  0.0  0.0  0.0  1.0  0.0  0.0
490  0.0  0.0  0.0  0.0  1.0  0.0
491  0.0  0.0  0.0  0.0  0.0  1.0
492  0.0  1.0  0.0  0.0  0.0  0.0
493  0.0  1.0  0.0  0.0  0.0  0.0

[494 rows x 6 columns]
['Cl', 'Br', 'I']
        Cl   Br      I
0    1.000  0.0  0.000
1    0.000  1.0  0.000
2    0.000  0.0  1.000
3    1.000  0.0  0.

In [None]:
hse_data = read_csv('HSE_data.csv', header=1, skiprows=0)
gap_data = hse_data.iloc[:,3].values

import matplotlib.pyplot as plt

nbins = 50
counts, bin_edges = np.histogram(gap_data, bins=nbins)

print(counts)
print(bin_edges)

count_threshold = 3
mask = np.isin(np.digitize(gap_data, hist_data[1]), np.where(hist_data[0] >= count_threshold))
print(mask)
print(gap_data)

'''
min_value = np.min(gap_data)
max_value = np.max(gap_data)
hist_bins = np.arange(min_value, max_value, (max_value - min_value)/nbins)
hist_bins = np.append(hist_bins, max_value)
hist_data = plt.hist(gap_data, bins=hist_bins)
'''