In [None]:
run statistic.ipynb

In [None]:
#
# Post-fit analysis package: 
# simple estimators of effective masses, ratio plots for 3pt correlators. 
#
import sys
import numpy as np
import re
import os
import pandas as pds
import math

# Post-fit analysis class
#   is2pt: True / False / string for 2pt data / non-2pt data / the identifier (string) of 2pt data
class ML_Analyze_PDF: 
    def __init__(self, params):
        self.keys = []# ['p', 'z', 't']
        self.scount = 0
        self.seriesID = None #'s'+str(self.scount)
        self.atables = [] #index = self.seriesID, columns = self.keys)
        self.afuncs = {}
        try:
            self.is2pt = params['is2pt']
            if (self.is2pt is not True) and (self.is2pt is not False):
                self.is2pt = str(params['is2pt'])
        except:
            self.is2pt = str(None)
        self.NA = np.NAN
        self.table_open = False
        self.DEBUG = False
        return
    
    # Add in customized features of the data for each set of fit, stored in keys (and elem)
    # keys: names of data features, string, dictionary or list
    # elem: values of data features
    def extend_table(self, keys = None, elem = None):
        if self.scount == 0:
            return
        if elem is None:
            if keys is None:
                return
            if isinstance(keys, dict):
                for key in keys:
                    if key in self.keys:
                        print("ML_Analyze_PDF Warning: no overwriting to exsisting entry {:}".format(key))
                    else:
                        if keys[key] is not None:
                            self.wkrtable[-1].insert(key, keys[key])
                        self.keys.append(key)
            else:
                print("ML_Analyze_PDF Error: try to extend table with unidentified elements!")
                sys.exit(1)
            return
        if isinstance(keys, str):
            if elem is not None:
                self.wkrtable[-1].insert(len(self.wkrtable[-1].columns), keys, elem)
                if keys not in self.keys:
                    self.keys.append(keys)
        else:
            for key in keys:
                if key in self.keys:
                    print("ML_Analyze_PDF Warning: no overwrite to exsisting entry {:}".format(key))
                else:
                    if elem[key] is not None:
                        self.wkrtable[-1].insert(key, elem[key])
                    self.keys.append(key)
        return
    
    # add to the post-fit history a new set of fit identified with:
    #  NT: temporal lattice size
    #  ndata / ntrn / nbc: number of total data/ training data/ bias correction data 
    #  pztY: list of data Y's ids: momentum, z-link, time slices
    #  pztX: list of data X's ids: momentum, z-link, time slices
    def add_table(self, NT, ndata, ntrn, nbc, pztY, pztX=None):
        if self.table_open:
            self.merge_table()
        self.seriesID = 's'+str(self.scount)
        self.wkrtable = []
        pzY = []
        tY = []
        if isinstance(NT, int) is False:
            NTl = []
        else:
            NTl = NT
        for tg in pztY:
            #flag = True
            ytag = tg.split('+t+')
            pz = ytag[0]
            if pz in pzY:
                continue
            pzY.append(pz)
            if isinstance(NTl, list):
                NTl.append(NT[pz])
            t=[]
            for tg2 in pztY:
                if pz+'+t+' in tg2 and (int(tg2.split('+')[3]) not in t):
                    t.append(int(tg2.split('+')[3]))
            t.sort()
            tY.append(t)
            del t
        #self.atables.append(pds.DataFrame(data=[NT, ndata, ntrn, nbc+ntrn, list(pztX), tY], index = pzY, 
                #                                  columns=['NT', 'ndata', 'ntrn', 'nlbl', 'pztX', 'tY']))
        if pztX is None:
            pztXl = None
        elif isinstance(pztX, dict):
            pztXl = [list(pztX[pz]) for pz in pztX]
        else:
            pztXl = [pztX]
        self.tableframe = pds.DataFrame(data={'NT': NTl, 'ndata': ndata, 'ntrn': ntrn, 'nlbl': nbc+ntrn, 
                                              'pztX': pztXl, 'tY': tY}, index = pzY) 
                                             #     columns=['NT', 'ndata', 'ntrn', 'nlbl', 'pztX', 'tY'])
        self.keyf = ['NT', 'ndata', 'ntrn', 'nlbl', 'pztX', 'tY']
        self.keys = []
        if False:#if pztX is not None:
            self.keyf.append('pztX') 
        self.table_open = True
        self.datatmp = {}
        return
    
    # Append to the current set of fit the fit results
    def append_table(self):
        self.wkrtable.append(pds.DataFrame(self.tableframe))
        self.keys = list(self.keyf)
        if len(self.datatmp) > 0:
            #for key in self.datatmp:
             #   del self.datatmp[key]
            del self.datatmp
            self.datatmp = {}
        return
    
    # Merge all fits from the current set of fit
    def merge_table(self):
        if self.table_open is False:
            print("Warning: No work table to merge!")
            return
        if len(self.wkrtable) > 1:
            self.atables.append(self.wkrtable[0].append(list(self.wkrtable[1:])))
        else:
            self.atables.append(self.wkrtable[0])
        self.scount += 1
        del self.wkrtable
        #self.wkrtable = []
        del self.keys
        del self.keyf
        if len(self.datatmp) > 0:
            #for key in self.datatmp:
             #   del self.datatmp[key]
            del self.datatmp
        self.table_open = False
        return
      
    def setup_ffunc(self, func, key = None):
        if isinstance(func, dict):
            for key in func:
                self.afuncs[key] = func[key]
        elif key is None:
            print("ML_Analyze_PDF Error: try to add functions with unidentified names!")
            sys.exit(1)
        else:
            if isinstance(key, (tuple, list)): 
                assert(len(func)==len(key))
                for k in key:
                    self.afuncs[k] = func[k]
            else:
                self.afuncs[key] = func
        return
    
    # Perform dias correction on the predicted Y's; tag: list of data id's
    def biascorrt_cov(self, tag, indx = None):
        if indx is None:
            indx = -1
        if 'ycovR' not in self.wkrtable[indx].columns:
            print("ML_Analyze_PDF Error: Missing data covariance matrix!")
            sys.exit(1)
        if 'ycovI' not in self.wkrtable[indx].columns:
            print("ML_Analyze_PDF Error: Missing data covariance matrix!")
            sys.exit(1)
        tab = self.wkrtable[indx]
        if 'bcycovR' not in tab.columns:
            tab.insert(len(tab.columns), 'bcycovR', None)
        if 'bcycovI' not in tab.columns:
            tab.insert(len(tab.columns), 'bcycovI', None)
        for tg in tag:
            pz = tg.split('+t+')[0]
            ri = tg.split('+')[-1]
            if tab.loc[pz, 'bcycov'+ri] is not None:
                continue
            cov = tab.loc[pz,'ycov'+ri]
            if cov is None: 
                continue
            ny = int(cov.shape[0]/2)
            assert(cov.shape[1] == cov.shape[0] == 2*ny)
            tab['bcycov'+ri][pz] = (2.*cov[:ny, :ny] + cov[ny:, ny:] - cov[:ny,ny:] - cov[ny:,:ny])
                       #  -np.matmul(cov[:ny,ny:], cov[ny:,:ny])-np.matmul(cov[ny:,:ny], cov[:ny,ny:]))
        if self.DEBUG:
            print(self.wkrtable[indx])
        return
    
    # Add to the data storage for post-fit analysis: 
    #   tag: data ids
    #   data: list of data (same length as tag)
    #   dscale: data normalization factors (scalar of vector of same length as tag)
    #   dtrn / dbc / dunlbl : list of traing / bias correction / unlabeled data , an alternative to 'data'
    #   pred: True / False for predicted / observed data
    #   NT: temporal lattice size
    #   is2pt: if the data are 2pt correlators
    #   overwrite: True / False for overwriting / no overwriting of existing data w. identical id
    #   indx: None for working on the most recent fit
    def add_data(self, tag, data=None, dscale = None, dtrn=None, dbc=None, dunlbl=None, pred=True, NT=None, pztX=None, is2pt = False, overwrite = False, indx = None):
        if is2pt and (self.is2pt is False):
            return
        if indx is None:
            indx = -1
        tab = self.wkrtable[indx]
        if pred is False:
            name = 'Yobs'
        else:
            name = 'Ypdt'
        if name not in self.datatmp:
            self.datatmp[name] = {}
        elif overwrite:
            del self.datatmp[name] 
            self.datatmp[name] = {}
        else:
            #tag1 = list(tag)
            tag=list(tag)
            for i in range(len(tag)):
                if tag[i].split('+t+')[0] in self.datatmp[name]:
                    tag[i] = None
                elif is2pt and (self.is2pt is not True):
                    if tag[i].split('+')[1] != self.is2pt:
                        tag[i] = None
        # add tags to the rows
        tag1 = []
        for i in range(len(tag)):
            tg = tag[i]
            if tg is None:
                continue
            tag1.append(tg)
            pz = tg.split('+t+')[0]
            if pz not in tab.index:
                tY = []
                for key in tag:
                    if key is None:
                        continue
                    if pz+'+' in key and (int(key.split('+')[3]) not in tY):
                        tY.append(int(key.split('+')[3]))
                try:
                    NTl = NT[pz]
                except:
                    NTl = NT
                if isinstance(NTl, int) is False:
                    NTl =  tab.iloc[-1,0]
                if pztX is None:
                    pztXl = None
                elif isinstance(pztX, dict):
                    pztXl = [list(pztX[pz])]
                else:
                    pztXl = [pztX]
                tab = tab.append(pds.DataFrame(data={'NT': NTl, 'ndata': tab.iloc[0,1], 'ntrn': tab.iloc[0,2], 'nlbl': tab.iloc[0,3], 
                                              'pztX': pztXl, 'tY': [tY]}, index = [pz]))
       # del tab
        if dscale is None:
            dscale = 1.0
        for i in range(len(tag)):
            tg = tag[i]
            if tg is None:
                continue
            #print(tg)
            if isinstance(dscale, dict):
                scale = dscale[tg]
            else:
                try:
                    scale = dscale[i]
                except:
                    scale = dscale
            #print("scale = {:}".format(scale))
            pzt = tg.split('+t+')
            tri = pzt[1].split('+')
            pz = pzt[0]
            assert(pz in tab.index)
            t = int(tri[0])
            if tri[1] == 'R':
                ri = 0
            else:
                ri = 1
            if pz not in self.datatmp[name]:                
                self.datatmp[name][pz] = {} #{'R': list(ini), 'I': list(ini)}
            if tri[1] not in self.datatmp[name][pz]:
                self.datatmp[name][pz][tri[1]] = [[self.NA for n in range(tab.loc[pz, 'ndata'])] for m in range(tab.loc[pz, 'NT'])]
            if data is not None:
                self.datatmp[name][pz][tri[1]][t] = np.array(np.array(list(data[i]))*scale).tolist()
                continue
            if dtrn is not None:
                #print(list(dtrn[i]))
                self.datatmp[name][pz][tri[1]][t][:tab.loc[pz, 'ntrn']] = np.array(np.array(list(dtrn[i]))*scale).tolist()
            if dbc is not None:
                self.datatmp[name][pz][tri[1]][t][tab.loc[pz, 'ntrn']:tab.loc[pz, 'nlbl']] = np.array(np.array(list(dbc[i]))*scale).tolist()
            if dunlbl is not None:
                self.datatmp[name][pz][tri[1]][t][tab.loc[pz, 'nlbl']:] = np.array(np.array(list(dunlbl[i]))*scale).tolist()
        if False:#if is2pt:
            print(self.datatmp[name])
        self.wkrtable[indx] = pds.DataFrame(tab)
        if False: #for n in self.datatmp:
            for pz in self.datatmp[n]:
                for ri in self.datatmp[n][pz]:
                    for t in range(len(self.datatmp[n][pz][ri])):
                        print("{:}+{:}+t+{:}+{:}: {:}".format(n, pz, t, ri, self.datatmp[n][pz][ri][t]))
        del tab
        if (data is not None) or ((dtrn is not None) and (dbc is not None) and (dunlbl is not None)):
            self.add_mean(tag=tag1, pred=pred, indx=indx)
            self.add_cov(tag=tag1, pred=pred, indx=indx)
        return
    
    # Add the data mean to the list of features
    # tag: data id's 
    # data: corresponding data, None for using stored data
    # pred: None / True / False for calculating bias-corrected mean, direct predicted mean, and observed mean
    def add_mean(self, tag, data=None, pred=None, indx=None):
        if indx is None:
            indx = -1
        tab = self.wkrtable[indx]
        if pred is False:
            Yname = 'Yobs'
            name = 'Yobsmean'
        else:
            Yname = 'Ypdt'
            if pred is None:
                name = 'Ymean'
            else:
                name = 'Ypdtmean'
        if (data is None) and (Yname not in self.datatmp): #tab.columns):
            print("Error: Missing data for adding mean of data {:}".format(Yname))
            sys.exit(1)
        if pred is None:
            if (data is None) and ('Yobs' not in self.datatmp):
                print("Error: Missing observed data for adding mean of data {:}".format(Yname))
                sys.exit(1)
        if data is not None:
            if Yname in self.datatmp: #tab.columns:
                print("Warning: overwriting existing data!")
            self.add_data(tag, data=data, pred=pred, indx=indx)
        #dic = {'R': list(ini), 'I': list(ini)}
        if name+'R' not in tab.columns:
            tab.insert(len(tab.columns), name+'R', None)
        if name+'I' not in tab.columns:
            tab.insert(len(tab.columns), name+'I', None)
     #   for key in tab.index:
      #      tab.loc[key, name] = np.array([None for i in range(tab.loc[key, 'NT']*2)])
        for tg in tag:
            pz = tg.split('+t+')[0]
            if pz not in self.datatmp[Yname]:#if tab.loc[pz, Yname] is None:
                continue
            if pred is None:
                 if pz not in self.datatmp['Yobs']:#if tab.loc[pz,'Yobs'] is None:
                        continue
            tri = tg.split('+')
            t=int(tri[3])
            if isinstance(tab.loc[pz, name+tri[-1]], list) is False: # is None:
                #tab.loc[pz, name] = pds.Series({'R': list(ini), 'I': list(ini)})
                tab[name+tri[-1]][pz] = [self.NA for i in range(tab.loc[pz, 'NT'])]
            if tri[-1] == 'R':
                ri = 0
            else:
                ri = 1
            if pred is None:
                tab[name+tri[-1]][pz][t] = np.nanmean(np.array(
                self.datatmp[Yname][pz][tri[-1]][t][tab.loc[pz,'nlbl']:])) + np.nanmean(np.array(
                self.datatmp['Yobs'][pz][tri[-1]][t][tab.loc[pz,'ntrn']:tab.loc[pz,'nlbl']]))-np.nanmean(np.array(
                self.datatmp[Yname][pz][tri[-1]][t][tab.loc[pz,'ntrn']:tab.loc[pz,'nlbl']]))
            else:
                tab[name+tri[-1]][pz][t] = np.nanmean(np.array(self.datatmp[Yname][pz][tri[-1]][t]))
        return
    
    # Add the covariance matrix to the list of data features 
    def add_cov(self, tag, data=None, pred=None, indx=None):
        if indx is None:
            indx = -1
        tab =  self.wkrtable[indx]
        if pred is True:
            name = 'ycovpdt'
            Yname = 'Ypdt'
        elif pred is False:
            name = 'ycovobs'
            Yname = 'Yobs'
        else:
            Yname = 'Ypdt'
            name = 'ycov'
        if Yname not in self.datatmp: #tab.columns: 
            if data is None:
                print("Error: Missing data for generating covariance matrix!")
                sys.exit(1)
            self.add_data(tag, data=data, pred=pred, indx=indx)
        if name+'R' not in tab.columns:
            tab.insert(len(tab.columns), name+'R', None)
        if name+'I' not in tab.columns:
            tab.insert(len(tab.columns), name+'I', None)
        for key in tab.index:
            if key not in self.datatmp[Yname]:#if tab.loc[key, Yname] is None:
                continue
            if pred is None:
                if key not in self.datatmp['Yobs']: #if tab.loc[key, 'Yobs'] is None:
                    continue
            if name == 'ycov':
                lst = list(self.datatmp['Ypdt'][key]['R'])
                lst.extend(self.datatmp['Yobs'][key]['R'])
                tab[name+'R'][key] = np.cov(np.array(lst)) 
                print('shape of ycovR: {:}'.format(tab[name+'R'][key].shape))
                del lst
                lst = list(self.datatmp['Ypdt'][key]['I'])
                lst.extend(self.datatmp['Yobs'][key]['I'])
                tab[name+'I'][key] = np.cov(np.array(lst))
                print('shape of ycovI: {:}'.format(tab[name+'I'][key].shape))
                del lst
            else:
                tab[name+'R'][key] = np.cov(np.array(list(self.datatmp[Yname][key]['R']))) 
                tab[name+'I'][key] = np.cov(np.array(list(self.datatmp[Yname][key]['I'])))
        #del self.wkrtable[indx]
        #self.wkrtable[indx] = tab
        if self.DEBUG::
                print(self.wkrtable[indx])
        return
                
    def apply_ffunc(self, func):
        if func not in self.afuncs:
            print()
            sys.exit(1)
        lyt = {}
        if 'pztY' in self.keys:
            for pzt in self.wkrtable[indx].loc['pztY']:
                lst = pzt.split('+')
                ky = lst[0]+'+'+lst[1]
                if ky not in lyt:
                    lyt[ky] = []
                lyt[ky].append(int(lst[2]))
        fit = {}
        if 'ycov' in self.keys and 'ymean' in self.keys:
            self.biascorrt_cov()
            ct = 0
            for ky in lyt:
                lt = 2*len(lyt[ky])
                fit[ky] = []
                for i in range(2):
                    mean = self.wkrtable[indx].loc['ymean'][ct:ct+lt:2]
                    cov =self.wkrtable[indx].loc['bcycov'][ct:ct+lt:2, ct:ct+lt:2]
                    fit[ky].append(lsq_fit(data=(lyt[ky], mean, cov), 
                                      prior=func[1], fcn=func[0]))
        self.wkrtable[-1].insert('fit.'+func, fit)
        return
    
    # Calculate the effective mass of 2pt correlators 
    # pred: True / False for predicted / observed data estimators
    def effmass(self, tag, pred=True, indx=None):
        if indx is None:
            indx = -1
        tab = self.wkrtable[indx]
        if pred:
            Yname = 'Y'
            cname = 'bcycov'
        else:
            Yname = 'Yobs'
            cname = 'ycovobs'
        name = Yname+'mean'
        if (Yname not in self.datatmp) and (name+'R' not in tab.columns) and (name+'I' not in tab.columns):
            print('Missing {:} data for effective mass calcualtions!'.format(Yname))
            sys.exit(1)
        if (name+'R' not in tab.columns) and (name+'I' not in tab.columns):
            if pred:
                self.add_mean(tag, data=None, pred=None, indx=indx)
            else:
                self.add_mean(tag, data=None, pred=False, indx=indx)
        if pred:
            rname0 = 'effmass'
        else:
            rname0 = 'effmassobs'
        if pred and (cname+'R' not in tab.columns) and (cname+'I' not in tab.columns):
            self.biascorrt_cov(tag, indx)
        for ri in ['R', 'I']:
            if rname+ri not in tab.columns:
                #lst = [[[self.NA, self.NA] for i in range(tab.loc[k, 'NT']-1)] for k in tab.index]
                tab.insert(len(tab.columns), rname+ri, None)#lst)
        name0 = name
        cname0 = cname
        for tg in tag: 
            # skip 3pt data
            if 'T' in tg:
                continue
            if (self.is2pt is not True) and self.is2pt != tg.split('+')[1]:
                continue
            pz = tg.split('+t+')[0]
            t = int(tg.split('+')[3])
            ri = tg.split('+')[-1]
            rname = rname0+ri
            name = name0+ri
            cname = cname0+ri
            if isinstance(tab.loc[pz, rname], list) is False:
                tab[rname][pz] = [(self.NA, self.NA) for i in range(tab.loc[pz, 'NT']-1)]
            if t == tab.loc[pz, 'NT']-1 or (tab.loc[pz, name][t] is self.NA) or (tab.loc[pz, name][t+1] is self.NA):
                continue
            try:
                Emean = math.log(tab.loc[pz,name][t] / tab.loc[pz,name][t+1])
            except:
                Emean = self.NA
            if Emean is self.NA:
                continue
            try:
                Amean = tab.loc[pz,name][t] * math.exp(Emean*t)
            except:
                tab[rname][pz][t] = (Emean, self.NA)
                continue
            H = np.array([[0.,0.],[0.,0.]])
            try:
                for i in range(2):
                    sigma2 = tab.loc[pz,cname][t+i,t+i]
                    if self.DEBUG:
                        print(sigma2)
                        print(Emean)
                    H[0,0] += 2.*math.exp(-2.*Emean*float(t+i))/sigma2
                    H[0,1] += 2.*(tab.loc[pz,name][t+i]-2.*math.exp(-Emean*float(t+i))*Amean)*float(t+i)*math.exp(-Emean*float(t+i))/sigma2
                    H[1,1] += 2.*Amean*float(t+i)**2*math.exp(-Emean*float(t+i))*(2.*Amean*math.exp(-Emean*float(t+i))-tab.loc[pz,name][t+i])/sigma2
            except:
                tab[rname][pz][t] = (Emean, self.NA)
                continue
            H[1,0] = H[0,1]
            if self.DEBUG:
                print(H)
            try:
                err = np.linalg.inv(H)
                print(err)
                tab[rname][pz][t] = (Emean, math.sqrt(err[1,1]/float(tab.loc[pz,'ndata']-tab.loc[pz,'nlbl'])))
            except:
                tab[rname][pz][t] = (Emean, self.NA)
        if self.DEBUG:
            print(self.wkrtable[indx])
        return

    # Calculate the ratio of 3pt and 2pt data
    # d2pt: 2pt data served as the denominator, None for using stored data
    def ratio3ptn2pt(self, tag, d2pt=None, pred=True, indx=None):
        if (self.is2pt is True) or (self.is2pt is False):
            return
        if indx is None:
            indx = -1
        tab = self.wkrtable[indx]
        if pred:
            Yname = 'Ypdt'
            mname = 'Ymean'
            rname = 'Y3ptdv2pt'
        else:
            Yname = 'Yobs'
            mname = 'Yobsmean'
            rname = 'Y3ptdv2ptobs'
        if (Yname not in self.datatmp) and (mname+'R' not in tab.index) and (mname+'I' not in tab.index):
            print("Error: Missing data for 3pt&2pt ratios!")
            sys.exit(1)
        if (mname+'R' not in tab.index) and (mname+'I' not in tab.index):
            if pred:
                self.add_mean(tag=tag, pred=None, indx=indx)
            else:
                self.add_mean(tag=tag, pred=False, indx=indx)
        for ri in ['R', 'I']:
            if rname+ri not in tab.columns:
                tab.insert(len(tab.columns), rname+ri, None)
        for tg in tag: 
            pz = tg.split('+t+')[0]
            p = pz.split('+')[0]
            z = pz.split('+')[1]
            t = int(tg.split('+')[3])
            ri = tg.split('+')[-1]
            if self.is2pt == z:
                continue
            #3pt
            if pz not in tab.index:
                continue
            #find 2pt
            if d2pt is None:
                if p+'+'+self.is2pt in tab.index:
                    print("Using stored data as denominator in 3pt&2pt ratios at p,z= {:} , {:}\n".format(p, z))
                    try:
                        d2ptn = self.datatmp[Yname][p+'+'+self.is2pt][ri][t]
                        d2ptm = tab.loc[p+'+'+self.is2pt, mname+ri][t]
                    except:
                        d2ptn = self.datatmp['Yobs'][p+'+'+self.is2pt][ri][t]
                        d2ptm = tab.loc[p+'+'+self.is2pt, 'Yobsmean'+ri][t]
                else:
                    print("Error: Missing 2pt data for 3pt&2pt ratios at p= {:}".format(p))
                    sys.exit(1)
            else:
                d2ptn = d2pt[int(ri=='I')]
                d2ptm = np.array(d2ptn).mean()
            name = rname+ri
            if isinstance(tab.loc[pz, name], list) is False: 
                tab[name][pz] = [(self.NA, self.NA) for i in range(tab.loc[pz, 'NT'])]
            if pred:
                extcov = np.cov(np.array([self.datatmp[Yname][pz][ri][t], d2ptn, 
                                          self.datatmp['Yobs'][pz][ri][t], self.datatmp['Yobs'][p+'+'+self.is2pt][ri][t]]))
                assert(list(extcov.shape)==[4,4])
                cov = (2.*extcov[:2, :2] + extcov[2:, 2:] - extcov[:2,2:] - extcov[2:,:2])
                      #   -np.matmul(extcov[:2,2:], extcov[2:,:2])-np.matmul(extcov[2:,:2], extcov[:2,2:]))
            else:
                cov = np.cov(np.array([self.datatmp[Yname][pz][ri][t], d2ptn]))
            df = np.array([[1.0/d2ptm, -tab.loc[pz, mname+ri][t]/d2ptm**2]])
            err = np.matmul(np.matmul(df, cov), df.T)/math.sqrt(float(tab.loc[pz,'ndata']-tab.loc[pz,'nlbl']))
            tab[name][pz][t] = (tab.loc[pz, mname+ri][t]/d2ptm, err)
        return
    
    # Print effective masses
    # pf: Output data filename
    # ppf: Output plot filename
    def print_effmass(self, indx=None, pf = None, ppf=None):
        if indx is None:
            idx = -1
        elif isinstance(indx, str):
            idx = 0
        else:
            idx = indx
        name = {'main': 'effmass', 'description': 'Effective masses', 'complex': True}
        if 'effmass' in self.wkrtable[idx]:
            self.print_predTF(name, True, indx, pf, ppf)
        if 'effmassobs' in self.wkrtable[idx]:
            self.print_predTF(name, False, indx, pf, ppf)
        return
    
    # Print ratio of 3pt and 2pt data
    def print_ratio3ptn2pt(self, indx=None, pf = None, ppf=None):
        if indx is None:
            indx = -1
        elif isinstance(indx, str):
            idx = 0
        else:
            idx = indx
        name = {'main': 'Y3ptdv2pt', 'description': 'Ratio of 3pt & 2pt correlations', 'complex': True}
        if 'Y3ptdv2pt' in self.wkrtable[idx]:
            self.print_predTF(name, True, indx, pf, ppf)
        if 'Y3ptdv2ptobs' in self.wkrtable[idx]:
            self.print_predTF(name, False, indx, pf, ppf)
        return
        
    
    def print_predTF(self, name, pred, indx, pf, ppf):
        if name['complex']: 
            ril = [('R', ' (Real)'), ('I', ' (Imag)')]
        else:
            ril = [('', '')]
        for ri in ril:
            self.print_predTFRI(ri, name, pred, indx, pf, ppf)
        return
    
    def print_predTFRI(self, ri, name, pred, indx, pf, ppf):
        try:
            self.pltcid += 1
        except:
            self.pltcid = 0
        if pred:
            rname = name['main']+ri[0]
            rkey = ' Predicted'
        else:
            rname = name['main']+'obs'+ri[0]
            rkey = ' Observed'
        print("\n {:}{:}{:}: \n   p    z    t    mean \t err \n".format(name['description'], ri[1], rkey))
        if pf is not None:
            pf.write("\n {:}{:}{:}: \n   p    z    t    mean \t err \n".format(name['description'], ri[1], rkey))
        if indx is None:
            indx = -1
        if isinstance(indx, str):
            em = {}
            plt = pds.DataFrame()
            for tab in self.wkrtable:
                for key in tab.index:
                    if tab.loc[key, rname] is not None:
                        try:
                            em[key].append(tab.loc[key, rname])
                        except:
                            em[key] = [tab.loc[key, rname]]
            for key in em:
                yl = []
                errl = []
                tl = []
                for t in range(len(em[key][0])):
                    if em[key][0][t][0] is self.NA:
                        continue
                    ema = np.array([em[key][i][t][0] for i in range(len(em[key]))])
                    emm = np.nanmean(np.array(ema[1:]))
                    #ems = np.nanstd(np.array(ema[1:]))#/math.sqrt(float(len(ema))-1.0)
                    eme = np.array([em[key][i][t][1] for i in range(len(em[key]))])
                    ems = np.nanmean(np.array(eme[1:]))
                    if indx == 'JK':
                        #ems *= float(len(ema)-1)/math.sqrt(float(len(ema)))
                        emm = float(len(ema))*ema[0]-(float(len(ema))-1.0)*emm
                        ems = float(len(eme))*eme[0]-(float(len(eme))-1.0)*ems
                    elif indx == 'BS':
                        emm = 2.*ema[0] - emm
                        if eme[0] is not self.NA:
                            ems = 2.*eme[0] - ems
                    print("{:}    {:}    {:}    {:} \t {:}".format(key.split('+')[0], key.split('+')[1], t, emm, ems))
                    if emm is self.NA:
                        del ema
                        del eme
                        continue
                    if pf is not None:
                        pf.write("{:}    {:}    {:}    {:} \t {:}\n".format(key.split('+')[0], key.split('+')[1], t, emm, ems))
                    plt.insert(len(plt.columns), key+'+t'+str(t), ema)
                    yl.append(emm)
                    errl.append(ems)
                    tl.append(t)
                    del ema
                    del eme
            #if ppf is not None:
            Dataframe.boxplot(None, data=plt, title=name['description']+ri[1]+rkey, out=ppf)
            #Dataframe.errbarplot(tl, yl, errl, self.pltcid, name['description']+ri[1]+rkey, ppf)
        else:
            tab = self.wkrtable[indx]
            for key in tab.index:
                if tab.loc[key, rname] is not None:
                    if ppf is not None:
                        tl = []
                    for t in range(len(tab.loc[key, rname])):
                        if tab.loc[key, rname][t][0] != self.NA:
                            form = "{:}    {:}    {:}    {:} \t {:}\n".format(key.split('+')[0], key.split('+')[1], t, 
                                                                      tab.loc[key, rname][t][0], 
                                                                      tab.loc[key, rname][t][1])
                            print(form)
                            if pf is not None:
                                pf.write(form)
                            if ppf is not None:
                                tl.append(t)
                    y = [tab.loc[key, rname][t][0] for t in tl]
                    yerr = [tab.loc[key, rname][t][1] for t in tl]
                    Dataframe.errbarplot(tl, y, yerr, self.pltcid, name['description']+ri[1]+rkey, ppf)        
        return
    

        
    
    
    
    
    
# Maybe it's not needed     
class PDF_Functions:
    def __init__(self, nexp):
        self.nexp = nexp
        self.par = {'A': None, 'E': None, 'V': None, 'Vi': None}
        self.prior = dict(self.par)
        self.prop2prop = PDF_Functions.twopt(self)
    class twopt:
        def __init__(self, base):
            self._base = base
            #self.base.func = self.func()
            self.x = None
            self.y = None
        def Y(self):
            fy = []
            for x in self.x:
                y = 0.
                for n in self._base.nexp:
                    y += self._base.par['A'][n]*math.exp(-self._base.par['E'][n]*x)
                    #y.append(self.base.par['A']*math.exp(-self.base.par['E']*x))
                fy.append(y)
            return fy
        def dYd(self):
            fy = dict(self._base.par)
            for k in fy:
                fy[k] = [0. for i in range(self._base.nexp)]
            for x in self.x:
                for n in self.base.nexp:
                    fy['A'][n] += math.exp(-self._base.par['E'][n]*x)
                    fy['E'][n] += -x*self._base.par['A'][n]*math.exp(-self._base.par['E'][n]*x)
            return fy        
        def d2Yd(self):
            fy = dict(self._base.par)
            for k in fy:
                fy[k] = [0. for i in range(self._base.nexp)]
                for l in fy:
                    if l == k:
                        continue
                    fy[k+l] = [0. for i in range(self._base.nexp)]
            for x in self.x:
                for n in self.base.nexp:
                    fy['A'][n] += math.exp(-self._base.par['E'][n]*x)
                    fy['E'][n] += -x*self._base.par['A'][n]*math.exp(-self._base.par['E'][n]*x)
            return fy
            
    
    
class ML_PostAnalyze_PDF:     
    def __init__(self, params): 
        self.headline = 'PDF data analysis with ML:'
        self.tagX = 'Training'
        self.tagY = 'Target'
        self.tagAnly = 'Analysis result:'
        self.keylist = ('ndata.tr', 'ndata.bc', 'ndata.lb', 'ndata.unlbl', 'score', 'p', 'z', 't')
        # Catogerize results arrording to keys: 'p', 'z', 't', etc
        try:
            self.keys = params['keys']
        except:
            # intrinsic keys
            self.keys = ( 'p', 'z', 't')
        self.ml = params['tdset']
        # Plot features as in params['plot.xxx']: {'type', xlabel', 'ylabel', 'err', 'colors', 'shapes', ... }
        self.plot = []
        try:
            self.plot.append(dict[params['plot']])
        except:
            print("Warning: no default feature as 'plot'\n")
        for pk in params.key:
            if re.search('(\(^<=plot)(\.)', pk) is not None: 
                self.plot.append(dict(params[pk]))
        try:
            self.ddir = params['indir']
            self.dfile = []
        except:
            self.ddir = None
            self.dfile = params['infile']
        self.adir = params['outdir']
        if self.ddir is not None:
            d1, d2, df = os.walk(self.ddir)
            for f in df:
                if re.search('^'+self.ml+'.\d+', f) is not None:
                    self.dfile.append(df)
        if isinstance(self.dfile, str):
            self.dfile = [self.dfile]
        self.setup_datatable()
        
        
    def setup_datatable(self):
        self.data_table = pandas.DataFrame()
        data = {}
        data['p'] = {}
        data['z'] = {}
        data['t'] = {}
        data['p']['x'] = []
        data['p']['y'] = []
        data['z']['x'] = []
        data['z']['y'] = []
        data['t']['x'] = []
        data['t']['y'] = []
        data['ndata'] = {}
        data['ndata']['tr'] = []
        data['ndata']['bc'] = []
        data['ndata']['lbl'] = []
        data['ndata']['unlbl'] = []
        data['score'] = {}
        data['score']['R'] = []
        data['score']['I'] = []
        data['meanPrdt'] = {}
        data['meanPrdt']['R'] = []
        data['meanPrdt']['I'] = []
        data['stdPrdt'] = {}
        data['stdPrdt']['R'] = []
        data['stdPrdt']['I'] = []
        data['meanObsv'] = {}
        data['meanObsv']['R'] = []
        data['meanObsv']['I'] = []
        data['stdObsv'] = {}
        data['stdObsv']['R'] = []
        data['stdObsv']['I'] = []
        for file in self.dfile:
            with open(self.ddir+'/'+file, 'r') as pf:
                readpz = False
                readt = False
                readd = False
                dflag = 0
                for line in pf:
                    if readd:
                        if 'score' in line:
                            ss = str(re.search('\[(.*?\])', line).group(0)).split()
                            ss[0].split('[')
                            data['score']['R'].append(ss[0].split('[')[1])
                            data['score']['I'].append(ss[1].split(']')[0])
                        if '+-' in line and dflag > 0:
                            yy = str(re.search('(<=Y\d+ ).*+', line).group(0)).split()
                            try:
                                yid = int(re.search('\d+', line.split()[0]).group(0))
                            except:
                                yid = dflag
                            if yid%2 == 0:
                                data['meanPrdt']['R'].append(yy[0])
                                data['stdPrdt']['R'].append(yy[2])
                                data['meanObsv']['R'].append(yy[4])
                                data['stdObsv']['R'].append(yy[6])
                            else:
                                data['meanPrdt']['I'].append(yy[0])
                                data['stdPrdt']['I'].append(yy[2])
                                data['meanObsv']['I'].append(yy[4])
                                data['stdObsv']['I'].append(yy[6])
                            dflag -= 1
                        if dflag == 0:
                            readd = False
                            readt = True
                    if readt and 'data' in line:
                        if self.tagY in line:
                            ll = list(str(re.search('(<=t\= )[\d]'), line).group(0))
                            dflag *= len(ll)
                            data['t']['y'].extend(ll)
                            del ll
                        elif self.tagX in line:
                            data['t']['x'].extend(list(str(re.search('(<=t\= )\[(\d, *)\d\]'), line).group(0)))
                        readt = False
                    if readpz and 'data' in line:
                        if self.tagY in line:
                            ll = list(re.findall('(<=p\= )((\d+)|(-\d+))', line))
                            dflag *= len(ll)
                            data['p']['y'].extend(ll)
                            del ll
                            data['z']['y'].extend(list(re.findall('(<=z\= )((\d+)|(-\d+)|(None))', line)))
                        elif self.tagX in line:
                            data['p']['x'].extend(list(str(re.search('(<=p\= )\[(((\d+)|(-\d+)), *)((\d+)|(-\d+))\]'), line).group(0)))
                            data['z']['x'].extend(list(str(re.search('(<=z\= )\[((-\d+, )(\d, )(None, )*)((\d+)|(-\d+)|(None))\]'), line).group(0)))
                            readpz = False
                            readt = True
                    if line == self.headline:
                        readpz = True
                        readt = False
                        dflag = 2 # Real & Imag
                    if self.tagAnly in line:
                        readd = True
                        nd = str(re.search('\d/\d/\d', line).group(0)).split('/')
                        data['ndata']['tr'].extend(int(nd[0]))
                        data['ndata']['bc'].extend(int(nd[1]))
                        data['ndata']['unlbl'].extend(int(nd[2]))
                        data['ndata']['lbl'].extend(int(nd[0])+int(nd[1]))
        self.data_table = pds.DataFrame(data=data)
        return
    
    def print_data(self, keys):
        return


                                                