In [1]:
"""Author: Sameer Kesava
This code is for analysing the statistical significance of the fit parameters obtained from fitting dielectric function models
to spectroscopic ellipsometry data in CompleteEASE software (J.A.Woollam Company) by estimating p-values"""

'Author: Sameer Kesava\nThis code is for analysing the statistical significance of the fit parameters obtained from fitting dielectric function models\nto spectroscopic ellipsometry data in CompleteEASE software (J.A.Woollam Company) by estimating p-values'

In [2]:
import pandas as pd
import numpy as np
from pandas import ExcelWriter
from scipy import stats
import pdb

In [3]:
pd.options.display.max_columns = 100

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [4]:
before_fit_df = pd.read_excel('Values_Errors.xlsx', sheet_name='before_fit', index_col = 0)
#reads in the MSE, parameter values and corresponding standard errors

No_of_points = 1067
#total number of data points (varies with the range of fit)

before_fit_df.replace(np.nan, 0, inplace = True)
before_fit_df.rename(columns = {'VALUE': 'BEF_VALUE'}, inplace = True)
before_fit_df

Unnamed: 0_level_0,BEF_VALUE,ERROR BAR
PARAMETER,Unnamed: 1_level_1,Unnamed: 2_level_1
MSE,1.752,0.0
Amp1,3.235782,0.076963
Br1,0.7059,0.01159
En1,5.633,0.009451
Amp2,5.345734,0.084727
Br2,0.17,0.00471
En2,4.854,0.001664
Amp3,0.398332,0.107625
Br3,0.2612,0.05888
En3,5.821,0.015


In [5]:
#df.dropna(inplace=True)
before_fit_df['errorpercent'] = before_fit_df['ERROR BAR'] / before_fit_df['BEF_VALUE']*100

before_fit_df['P-value'] = 2*(1 - stats.t.cdf( np.abs(before_fit_df['BEF_VALUE']) / before_fit_df['ERROR BAR'],No_of_points))
#estimating p-value using cumulative distribution function corresponding to t-statistic

before_fit_df

Unnamed: 0_level_0,BEF_VALUE,ERROR BAR,errorpercent,P-value
PARAMETER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MSE,1.752,0.0,0.0,0.0
Amp1,3.235782,0.076963,2.378498,0.0
Br1,0.7059,0.01159,1.641876,0.0
En1,5.633,0.009451,0.167779,0.0
Amp2,5.345734,0.084727,1.584944,0.0
Br2,0.17,0.00471,2.770588,0.0
En2,4.854,0.001664,0.034281,0.0
Amp3,0.398332,0.107625,27.019019,0.0002256364
Br3,0.2612,0.05888,22.542113,1.010775e-05
En3,5.821,0.015,0.257688,0.0


In [6]:
before_fit_df_2 = before_fit_df.copy()

index_pos = list()
for i in before_fit_df_2.index:
    index_pos.append(before_fit_df_2.index.get_loc(i)-1)
    
before_fit_df_2['Index_Pos'] = index_pos

before_fit_df_2[np.abs(before_fit_df_2['P-value']) <= 0.05] = ''
#setting threshold for p-value to check for significance of the fit parameter - can use 1% or 5%

before_fit_df_2

Unnamed: 0_level_0,BEF_VALUE,ERROR BAR,errorpercent,P-value,Index_Pos
PARAMETER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MSE,,,,,
Amp1,,,,,
Br1,,,,,
En1,,,,,
Amp2,,,,,
Br2,,,,,
En2,,,,,
Amp3,,,,,
Br3,,,,,
En3,,,,,


In [7]:
before_fit_df_3 = before_fit_df.copy()
before_fit_df_3[np.abs(before_fit_df_3['P-value']) <= 0.00001] = ''
#using 0.001% threshold as an example
before_fit_df_3

Unnamed: 0_level_0,BEF_VALUE,ERROR BAR,errorpercent,P-value
PARAMETER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MSE,,,,
Amp1,,,,
Br1,,,,
En1,,,,
Amp2,,,,
Br2,,,,
En2,,,,
Amp3,0.398332,0.107625,27.019,0.000225636
Br3,0.2612,0.05888,22.5421,1.01077e-05
En3,,,,


In [8]:
index_pos = list()
for i in before_fit_df_3.index:
    index_pos.append(before_fit_df_3.index.get_loc(i)-1)
    
before_fit_df_3['Index_Pos'] = index_pos

pd.set_option('display.max_rows', None)
before_fit_df_3

Unnamed: 0_level_0,BEF_VALUE,ERROR BAR,errorpercent,P-value,Index_Pos
PARAMETER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MSE,,,,,-1
Amp1,,,,,0
Br1,,,,,1
En1,,,,,2
Amp2,,,,,3
Br2,,,,,4
En2,,,,,5
Amp3,0.398332,0.107625,27.019,0.000225636,6
Br3,0.2612,0.05888,22.5421,1.01077e-05,7
En3,,,,,8


In [9]:
#Use the Correlation_Matrix_Filter file to check for correlations between different parameters