# Large screening
**Instruction**  
The aim of this notebook is to vizualize the result of large screening.  
The following code requires the result file of screening predicted by NNP.

In [None]:
import glob
import pandas as pd
from utility import *
import matplotlib.pyplot as plt

In [None]:
# Read csv file
df = pd.read_csv('FILE_NAME.csv')
print(df.shape)
df.head()

In [None]:
# Calculate deltaV%, remove large deviation
df['deltaV'] = (df['opt_V']-df['exp_V'])/df['exp_V']*100
df = df[abs(df['deltaV']) <= 20]
df = df.iloc[:,:-1].reset_index(drop=True)
df.shape

In [None]:
import warnings
warnings.simplefilter('ignore')
import pandas as pd
import numpy as np

def calculate_modulus(row):
    try:
        tensor = np.array(row[1:37]).reshape(6,6)
        res = stiffnesstensor2modulus(tensor)
        if any(value < 0 for value in res): # Return NaN when negative data
            return pd.Series([np.nan]*len(moduli), index=moduli)
        return pd.Series(res, index=moduli)
    except:
        return pd.Series([np.nan]*len(moduli), index=moduli)

moduli = ['E_V', 'E_R', 'E_H', 'E_RH', 'K_V', 'K_R', 'K_H', 'K_RH', 'G_V', 'G_R', 'G_H', 'G_RH', 'n_H', 'A_L']
modulus_df = df.apply(calculate_modulus, axis=1)
df = pd.concat([df, modulus_df], axis=1)
df = df.dropna()
df = df.reset_index(drop=True)

In [None]:
print('Total', df.shape)
df = df.iloc[:5000, :]
print('Used', df.shape)

In [None]:
plt.rcParams["font.family"] = 'Arial'
plt.rcParams["font.size"] = 12
def visuzalize(df_calc, figname=None):
    fig = plt.figure(figsize=(12,12))
    for i, item in enumerate(moduli):
        ax = fig.add_subplot(4, 4, i+1)
        y_calc = df_calc[item]
        ax.hist(y_calc, color='skyblue', ec='k', bins=30)
        if 'E' in item or 'K' in item or 'G' in item:
            if 'RH' in item:
                item = item[0]+'_{RH}'
            ax.set(xlim=(0,70))
            ax.set(title=f'${item}$', xlabel=f'Pred. ${item}$ (GPa)', ylabel=f'Frequency')
            # ax.set(xlabel=f'Exp. ${item}$ (GPa)', ylabel=f'Calc. ${item}$ (GPa)')
        elif 'n_H' in item:
            ax.set(title=f'$\u03BD$', xlabel=f'Exp. $\u03BD$', ylabel=f'Calc. $\u03BD$')
        else:
            ax.set(title=f'$A$', xlabel=f'Exp. $A$', ylabel=f'Calc. $A$')
            # ax.set(xlabel=f'Exp. ${item}$', ylabel=f'Calc. ${item}$')
        text = f'''Max: {y_calc.max():.2f}\nMean: {y_calc.mean():.2f}\nMin: {y_calc.min():.2f}'''
        if 'n' in item:
            ax.text(0.05, 0.95, text, transform=ax.transAxes, horizontalalignment='left', verticalalignment='top')
        else:
            ax.text(0.5, 0.95, text, transform=ax.transAxes, horizontalalignment='left', verticalalignment='top')
    fig.tight_layout()
    if figname is not None:
        fig.savefig(figname, dpi=300)
    fig.show()

In [None]:
visuzalize(df, 'SAVE_FILE_NAME.png')

In [None]:
# Exp-Pred plot of relaxed cell volume and the histogram
df['deltaV'] = (df['opt_V']-df['exp_V'])/df['exp_V']*100
color = 'skyblue'
fig = plt.figure(figsize=(8,4.5))
ax = fig.add_subplot(1,2,1)
ax.scatter(df['exp_V'], df['opt_V'], c=color, ec='k', linewidth=0.5)
ax.plot([0, 13000], [0, 13000], linestyle='dashed', c='k')
ax.set(xlabel='exp. $V$ ($\mathrm{\mathring{A}}^3$)', ylabel='opt. $V$ ($\mathrm{\mathring{A}}^3$)')
ax = fig.add_subplot(1,2,2)
ax.hist(df['deltaV'], bins=25, ec='k', color=color)
ax.set(xlabel='$\Delta$V (%)', ylabel='Frequency')
plt.tight_layout()
plt.savefig('SAVE_FILE_NAME.png', dpi=300)

In [None]:
# ALL range of histogram
plt.rcParams["font.family"] = 'Arial'
plt.rcParams["font.size"] = 12
fig = plt.figure(figsize=(4,5))
ax = fig.add_subplot(111)
ax.hist(df['E_RH'], ec='k', color='skyblue', bins=80, linewidth=0.5)
ax.set(xlabel='$E_{RH}$ (GPa)', ylabel='Frequency', xlim=(0,100))
plt.tight_layout()
plt.savefig('SAVE_FILE_NAME.png', dpi=300)

In [None]:
# Enlarged histogram for larger values
plt.rcParams["font.family"] = 'Arial'
plt.rcParams["font.size"] = 18
fig = plt.figure(figsize=(4,3))
ax = fig.add_subplot(111)
ax.hist(df['E_RH'], ec='k', color='skyblue', bins=80, linewidth=0.5)
ax.set(xlabel='$E_{RH}$ (GPa)', ylabel='Frequency', xlim=(25,100), ylim=(0,8))
plt.tight_layout()
plt.savefig('SAVE_FILE_NAME.png', dpi=300)

In [None]:
# Enlarged histogram for smaller values
plt.rcParams["font.family"] = 'Arial'
plt.rcParams["font.size"] = 18
fig = plt.figure(figsize=(4,3))
ax = fig.add_subplot(111)
ax.hist(df['E_RH'], ec='k', color='skyblue', bins=80, linewidth=0.5)
ax.set(xlabel='$E_{RH}$ (GPa)', ylabel='Frequency', xlim=(0,20))
plt.tight_layout()
plt.savefig('SAVE_FILE_NAME.png', dpi=300)

In [1]:
# df.to_csv('csv/Large_screening.csv')