In [None]:
run statistic.ipynb

In [None]:
run io.ipynb

In [None]:
run data_anal.ipynb

In [None]:
# Wrapper of ML kernals for hadron correlators on lattice
import sys
import os
import datetime
import numpy as np
import yaml as yl
import sklearn.ensemble as sle
import sklearn.tree as slt
import matplotlib.pyplot as plot
import math
from itertools import product



# Algorithm depend. params
TOL = 1.0e-4
MIN_SSPLIT = 2
# Minimum #fits
MIN_FIT = 1
# Minimum #training data
MIN_TRAIN = 10
# Minimum #bias correction (bc) data
MIN_BC = 10
# Minimum #unlabeled data
MIN_PREDICT = 10
PRINT = False
SHOW = True

# Class to ML fits 
#   pfile: parameter file
class ML_Regression:
    def __init__(self, pfile):
        params = yl.load(pfile)
        # model list: GradiantBoost, DecisionTree, RandomForest, DeepLearning(using Keras)
        self.mllist = ['GB', 'DT', 'RF', 'DL']
        # pick training data set: Jackknife, Bootstrap, Random
        self.tdlist = ['JK', 'BS', 'RM']
        #self.dfile = params['dfile']
        # method to pick training data set: default to pick first self.trn data
        self.nfit = MIN_FIT
        self.print = PRINT
        self.pre_anly = True
        self.fitter_DL = None
        try:
            self.tdset = params['tdset']
            if self.tdset in self.tdlist:
                try:
                    self.nfit = params['nfit']
                except:
                    self.nfit = MIN_FIT
                if self.tdset == 'JK':
                    # Jackknife index
                    try:
                        self.itdpar = params['tdpar']
                    except:
                        self.itdpar = 0
                    self.tdpar = self.itdpar-1
                    self.nfit = self.trn+1
                    self.jk = False
                if self.tdset == 'BS':
                    self.nfit += 1
                    self.bs = False
            elif not isinstance(self.tdset, int):
                print('Warning: Unknown training data subset identifier! Set to 0\n')
                self.tdset = 0
        except:
            self.tdset = 0
        try:
            self.odir = params['odir']
            self.osave = True
        except:
            self.osave = False
            self.ofile = None
        self.pdir = params['pdir']
        self.mlml = params['mlml']
        pdfile = open(self.pdir+'/'+self.mlml,'r')
        self.pf = yl.load(pdfile)
        pdfile.close()
        try:
            self.anal = params['analysis']
        except:
            self.anal = False
        if self.anal: 
            self.post_anal = ML_Analyze_PDF(params)
            self.save_tmpfits = True
            try:
                self.effmass = params['effmass']
            except:
                self.effmass = False
            try:
                self.ratio = params['ratio']
            except:
                self.ratio = False
        # list of Y momentum
        self.prY = params['momentum_Y']
        # list of Y z's
        self.zrY = params['z_Y']
        self.prX = params['momentum_X']
        self.zrX = params['z_X']
        # list of Y time slices
        try:
            self.tr = params['ts_Y']
            if self.tr is None:
                self.tr = [None]
        except:
            self.tr = [None]
        # list of X & Y time differences
        self.dtr = params['dts_X']
        # number of sources per configuration
        try:
            self.tfold = params['nsrc']
        except:
            self.tfold = 0
        self.ntrn = params['ntrn']
        if isinstance(self.ntrn, (int, str)):
            assert(int(self.ntrn) >= MIN_TRAIN)
            self.ntrn = [int(self.ntrn)]
        else:
            assert(len(self.ntrn) == len(self.prX))
        self.nbc = params['nbc']
        if isinstance(self.nbc, (int, str)):
            assert(int(self.nbc) >= MIN_BC)
            self.nbc = [int(self.nbc)]
        else:
            assert(len(self.nbc) == len(self.prX))
        # read in data stored in data pool 'database'
        # params[]: 
        #     format (data format, 'raw','pdf'); 
        #     binsize (data bin); 
        #     ddir (data files directory)
        #     dfile.x (X data filename); 
        #     dfile.y (Y data filename);
        self.database = Data_IO(params, self.tdset)
        self.data = None
        self.ndata = 0
        self.model = None
        self.fitnow = None
        self.isset = False
        self.errscale = None
        self.date = datetime.datetime.today().strftime('%m%d%Y')
        # Data IO, moved to the io kernel 
        if False:
        #if self.osave:
            odir = self.odir+'/ML.'+self.date
            sfix = ''
            n = 1
            flag = 0
            while True:
                try:
                    os.mkdir(odir+sfix)
                    flag = 1
                except:
                    sfix = '.'+str(n)
                    flag = 0
                    n += 1
                    #print("Warming: directory"+self.odir+'/ML.'+self.date+" already exist")
                if flag == 1:
                    break
            self.odir = odir+sfix
            try:
                os.mkdir(self.odir+'/plots')
            except:
                print("Warming: directory"+self.odir+'/plots'+" already exist")
            self.oheader_tag = None
        #else:
            self.ofile = None
            self.pltfile = None
        
    # Initialize fits
    # Select correlator characters, p (momentum), z (Wilson link lenght), t (sink time slice), etc., 
    #        data sets, and assign the fit model
    def inifit(self, fdparX, fdparY, indx):
        self.ftag = None
        self.dtag = None
        self.p = None
        self.c = None
        self.y = None
        if self.del_data: 
            self.data = {}
            deltagY, self.ftag, self.data['Y'], self.dmeanY, self.dstdY = self.database.select_data(fdparY, 'Y')
            self.dtX, self.dtag, self.data['X'], self.dmeanX, self.dstdX = self.database.select_data(fdparX, 'X')       
        self.ndata = self.data['X'].shape[0]
        self.nx = self.data['X'].shape[1]
        self.ny = self.data['Y'].shape[1]
        print("Shape of X / Y : {:} / {:}".format(self.nx, self.ny))
        try:
            self.trn = self.ntrn[indx]
        except:
            self.trn = self.ntrn[0]
        try:
            self.bc = self.nbc[indx]
        except:
            self.bc = self.nbc[0]
        self.lbl = self.trn + self.bc
        self.unlbl = self.ndata - self.lbl
        self.bin = int((self.ndata+self.lbl-1)/self.lbl)
        self.data_reorder()
        if self.tdset == 'JK' or self.tdset == 'RM':
            self.N = self.data['X'][self.lbl:]
            self.P = self.data['Y'][self.lbl:]
        if self.model != None:
            del self.model
        self.model = self.make_model()
        self.isset = False
        self.errscale = None
        self.fitnow = 0
        if self.osave:
            self.database.dfile_mkheader(indx)
        if self.anal:
            print("Adding table")
            self.post_anal.add_table(self.database.NT, self.ndata, self.trn, self.bc, pztY=self.ftag, pztX=self.dtag)
            print("Finishing adding table")
            
    def isfit(self):
        return self.fitnow < self.nfit
        
    # Reorder data (scatter)
    def data_reorder(self):
        tmpx = self.data['X']
        tmpy = self.data['Y']
        self.data['X'] = []
        self.data['Y'] = []
        for i in range(self.bin):
            self.data['X'].extend(tmpx[i::self.bin])
            self.data['Y'].extend(tmpy[i::self.bin])
        del tmpx
        del tmpy

    # build up the fit model
    def make_model(self):
        assert(self.mlml in self.mllist) 
        if self.mlml == 'GB':
            return self.make_model_GB()
        elif self.mlml == 'DT':
            return self.make_model_DT()
        elif self.mlml == 'RF':
            return self.make_model_RF()
        else:
            return self.make_model_DL()
            
    # Gradient Boosting
    def make_model_GB(self):
        self.GB_nestimator = self.pf['nestimator']
        self.GB_lnrate = self.pf['lnrate']
        try:
            self.GB_lossfunc = self.pf['lossfunc']
        except:
            self.GB_lossfunc = 'ls'
        self.GB_ssample = float(self.trn) / float(self.ndata)
        try:
            self.GB_mdth = self.pf['max_depth']
        except:
            self.GB_mdth = 3
        try:
            self.GB_tol = self.pf['lntol']
        except:
            self.GB_tol = TOL
        model = [ sle.GradientBoostingRegressor(loss=self.GB_lossfunc, learning_rate=self.GB_lnrate, 
                                             n_estimators=self.GB_nestimator, max_depth=self.GB_mdth, 
                                             subsample=1.0,#self.GB_ssample,
                                            tol=self.GB_tol)
                 for i in range(self.ny) ]
        return model
        
    # Decistion Tree
    def make_model_DT(self):
        try:
            self.DT_ctr = self.pf['criterion']
        except:
            self.DT_ctr = 'mse'
        self.DT_mdth = None
        self.DT_mspt = None
        try:
            self.DT_mspt = self.pf['min_samples_split']
        except:
            try:
                self.DT_mdth = self.pf['max_depth']
                self.DT_mspt = None
            except:
                self.DT_mspt = MIN_SSPLIT
                self.DT_mdth = None
        model = [ slt.DecisionTreeRegressor(criterion=self.DT_ctr, max_depth=self.DT_mdth,
                                        min_samples_split=self.DT_mspt)
                 for i in range(self.ny) ]
        return model
            
    # Random Foreast
    def make_model_RF(self):
        # FIXME
        return None
    
    def make_model_DL(self):
        self.fitter_DL = DL_Regression(self.pf, self.ny)
        return self.fitter_DL.model
    
    def make_oheader(self, parY, parX, indx):
        self.database.dfile_mkheader()
        return
    
    # set up data: Train [X,Y]; BC [B,C]; Unlabeled [N,P]
    def setup_data(self):
        self.fitnow += 1
        if self.save_tmpfits: 
            self.post_anal.append_table()
        if self.isset:
            return True
        if isinstance(self.tdset, int):
            try:
                self.X = self.data['X'][:,self.tdset:self.tdset+self.trn]
                self.Y = self.data['Y'][:,self.tdset:self.tdset+self.trn]
                self.B = self.data['X'][self.tdset+self.trn:self.tdset+self.trn+self.bc]
                self.C = self.data['Y'][self.tdset+self.trn:self.tdset+self.trn+self.bc]
            except:
                print('Labeled data subset out of index range!\n')
                sys.exit()
            self.N = list(self.data['X'][self.tdset+self.lbl:])
            self.P = list(self.data['Y'][self.tdset+self.lbl:])
            if self.tdset > 0:
                self.N.append(self.data['X'][:self.tdset])
                self.P.append(self.data['Y'][:self.tdset])
            self.tdset+=1
        else:
            if self.tdset == 'JK':
                if self.jk: 
                    self.X = []
                    self.Y = []
                    self.tdpar+=1
                    if self.tdpar < 0 or self.tdpar > self.trn:
                        return self.isset
                    if self.tdpar > 0:
                        self.X.extend(self.data['X'].slice(None,self.tdpar))
                        self.Y.extend(self.data['Y'].slice(None,self.tdpar))
                    self.X.append(self.data['X'].slice(self.tdpar+1,self.trn+1))
                    self.Y.append(self.data['Y'].slice(self.tdpar+1,self.trn+1))
                    self.B = []
                    self.C = []
                    self.B.extend(self.data['X'][self.trn+1:self.lbl])
                    self.B.extend(self.data['X'][self.tdpar])
                    self.C.extend(self.data['Y'][self.trn+1:self.lbl])
                    self.C.extend(self.data['Y'][self.tdpar])
                else:
                    self.X = self.data['X'][:self.trn]
                    self.Y = self.data['Y'][:self.trn]
                    self.B = self.data['X'][self.trn:self.lbl]
                    self.C = self.data['Y'][self.trn:self.lbl]
                    self.N = self.data['X'][self.lbl:]
                    self.P = self.data['Y'][self.lbl:]
                    self.jk = True
            elif self.tdset == 'BS':
                if self.bs:
                    self.X = []
                    self.Y = []
                    self.B = []
                    self.C = []
                    self.N = []
                    self.P = []
                    for i in range(self.trn):
                        r = np.random.randint(0,self.lbl)
                        self.X.append(self.data['X'][r])
                        self.Y.append(self.data['Y'][r])
                    for i in range(self.bc):
                        r = np.random.randint(0,self.lbl)
                        self.B.append(self.data['X'][r])
                        self.C.append(self.data['Y'][r])
                    for i in range(self.unlbl):
                        r = np.random.randint(self.lbl,self.ndata)
                        self.N.append(self.data['X'][r])
                        self.P.append(self.data['Y'][r])  
                else:
                    self.X = self.data['X'][:self.trn]
                    self.Y = self.data['Y'][:self.trn]
                    self.B = self.data['X'][self.trn:self.lbl]
                    self.C = self.data['Y'][self.trn:self.lbl]
                    self.N = self.data['X'][self.lbl:]
                    self.P = self.data['Y'][self.lbl:]
                    self.bs = True
            else:
                pm = np.random.permutation(self.lbl)
                self.X = [self.data['X'][pm[i]] for i in range(self.trn)]
                self.B = [self.data['X'][pm[i]] for i in range(self.trn,self.lbl)]
                self.Y = [self.data['Y'][pm[i]] for i in range(self.trn)]
                self.C = [self.data['Y'][pm[i]] for i in range(self.trn,self.lbl)]
        self.isset = True
        return self.isset
        
    def del_data(self):
        if self.data == None:
            return True
        del self.data['X'][:]
        del self.data['Y'][:]
        self.data = None
        del self.dmeanX
        del self.dmeanY
        del self.dstdY
        del self.dstdX
        self.dmeanX = None
        self.dmeanY = None
        self.dstdY = None
        self.dstdX = None
        return True
    
    # clean up data
    def cleanup_data(self):
        if self.isset is False:
            return 
        if self.tdset in self.tdlist:
            del self.X[:]
            del self.Y[:]
            del self.B[:]
            del self.C[:]
        if self.tdset == 'BS' or isinstance(self.tdset, int):
            del self.N[:]
            del self.P[:]
        self.isset = False
        return
    
    # Plot data statistics: histograms, density, correlations
    def data_statistic(self, data, tag = None):
        ddf = Dataframe(data, tag=tag)
        print(ddf.data.describe())
        binmin, atc = ddf.autocorr(0.1)
        print('Data blocksize {:d} with autocorrelation {:f} \n'.format(binmin, atc))
        ddf.hist((self.nx, None))#,self.database.pltfile)
        #ddf.density((0,self.nx))#, self.database.pltfile)
        ddf.density((self.nx,None))#,self.database.pltfile)
        #corrl = ddf.data.corr()
        #print(corrl)
        ddf.covplot(prange=((0, self.nx), (self.nx, None)))#, out=self.database.pltfile)
        return
        

    # Prediction bias Correction
    def bias_crrt(self):
        if self.c is not None:
            del self.c
            self.c = None
        if self.p is not None:
            del self.p
            self.p = None
        if self.y is not None:
            del self.y
            self.y = None
        self.c = np.array([self.model[i].predict(
                self.database.select_T(self.dtag, self.dtX, self.ftag[i], self.B).tolist()) 
                            for i in range(self.ny)])
        self.p = np.array([self.model[i].predict(
                self.database.select_T(self.dtag, self.dtX, self.ftag[i], self.N).tolist()) 
                            for i in range(self.ny)])
        C_arr = np.ndarray(shape=(self.bc,self.ny),buffer=np.array(self.C)).T
        # Prediction matrix of size ny*ndata
        self.y = np.array([self.model[i].predict(
                self.database.select_T(self.dtag, self.dtX, self.ftag[i], self.X).tolist()) 
                            for i in range(self.ny)])
        predata = np.array([self.model[i].predict(
            self.database.select_T(self.dtag, self.dtX, self.ftag[i], self.data['X']).tolist())  
                            for i in range(self.ny)])
        print(predata.shape)
        omat = np.ndarray(shape=(2*self.ny, self.ndata), 
                          buffer=np.array([
                              predata.tolist(), 
                              np.array(self.data['Y']).T.tolist()]))
                     #     dtype=float)
        # generate covariance matrix, predictions first
        ocov = np.cov(omat)
        print('shape of OCOV: {:}'.format(ocov.shape))
        # Bias correction equation
        obc = np.array([self.p[i].mean() + C_arr[i].mean() - self.c[i].mean() for i in range(self.ny)])
        for o in obc:
            print(o)
        s2 = np.array([ocov[i,i]/ocov[i+self.ny,i+self.ny] for i in range(self.ny)])
        r = np.array([(ocov[i,i+self.ny]**2/(ocov[i,i]*ocov[i+self.ny,i+self.ny])) for i in range(self.ny)])
        osigma = np.array([ocov[i+self.ny,i+self.ny] for i in range(self.ny)])
        srs2r = np.array([math.sqrt(s2[i]*r[i]) for i in range(self.ny)])
        bcvar = osigma/self.lbl*(s2*self.lbl/self.unlbl + (1.+s2-2.*srs2r)*self.lbl/self.bc)
        if self.save_tmpfits:
            #self.post_anal.extend_table({"ycov": ocov})
          #  self.post_anal.extend_table({"ymean": obc})
           # self.post_anal.extend_table({"y": omat})
            if False: #for i in range(self.ny):
                print("{:}: {:}".format(self.ftag[i], np.array(self.Y).T[i]))
            self.post_anal.add_data(tag=self.ftag, dscale=self.dmeanY, 
                                    dtrn=np.array(self.Y).T, dbc=np.array(self.C).T, 
                                    dunlbl=np.array(self.P).T, pred=False, overwrite=True)
            self.post_anal.add_data(tag=self.ftag, dscale=self.dmeanY, 
                                    dtrn=self.y, dbc=self.c, 
                                    dunlbl=self.p, pred=None, overwrite=True)
            if self.effmass:
                self.post_anal.effmass(tag=self.ftag, pred=False)
                self.post_anal.effmass(tag=self.ftag, pred=True)
            if self.ratio:
                self.post_anal.add_data(tag=self.dtag, dscale=self.dmeanX, dtrn=np.array(self.X).T, 
                                        dbc=np.array(self.B).T, dunlbl=np.array(self.N).T, 
                                        pred=False, NT=self.database.NT, is2pt=True)
                self.post_anal.ratio3ptn2pt(tag=self.ftag, pred=False)
                self.post_anal.ratio3ptn2pt(tag=self.ftag, pred=True)
        if self.errscale is None:
            self.errscale = (s2*self.lbl/self.unlbl + (1.+s2-2.*srs2r)*self.lbl/self.bc)/self.lbl
            self.errscale = np.array([math.sqrt(self.errscale[i]) for i in range(self.ny)])
            print("Correlation coefficient between Pred. & Obsd. {:}".format([math.sqrt(r[i]) for i in range(self.ny)]))
            print("error scale: {:}".format(self.errscale))
            if False: #if self.anal:
                self.post_anal.extend_table('errscl', self.errscale)
            if self.osave: 
                pf = open(self.database.ofile, 'a+')
                pf.write("Correlation coefficient between Pred. & Obsd. {:}\n".format([math.sqrt(r[i]) for i in range(self.ny)]))
                pf.write("error scale: {:}\n".format(self.errscale))
                pf.close()
        return (obc, np.array([math.sqrt(bcvar[i]) for i in range(self.ny)]))
    
    def imp_prdt(self, bc):
        return bc
        
    # Post-fits analysis
    def analyze(self, bc, pd):
        scr = np.array([self.model[i].score(
            self.database.select_T(self.dtag, self.dtX, self.ftag[i], self.data['X']).tolist(), 
            np.array(self.data['Y']).T.tolist()[i]) for i in range(self.ny)])
        if self.tdset == 'BS' or self.tdset == 'JK':
            oimp = np.array(np.array(pd[1:]).reshape(2*len(pd[1:]),self.ny)[::2]).T
#            print(oimp)
            omean = np.array([oimp[i].mean() for i in range(self.ny)])
 #           print(omean)
            ostd = np.array([oimp[i].std() for i in range(self.ny)])
            if self.tdset == 'BS':
                print(pd[0][0])
                obc = 2.*pd[0][0]-omean
            else:
                obc = self.lbl*pd[0][0]-(self.lbl-1)*omean
                ostd *= (len(oimp)-1)**2
        else:
            oimp = np.array(pd[:][0]).T
            obc = np.array([oimp[i].mean() for i in range(self.ny)])
            ostd = np.array([oimp[i].std() for i in range(self.ny)])
        self.res = (scr, obc, ostd)
        oy = np.array(self.data['Y']).T
        oymean = np.array([oy[i].mean() for i in range(self.ny)]) * self.dmeanY
        oystd = np.array([oy[i].std() for i in range(self.ny)]) * self.dstdY/math.sqrt(self.unlbl)
        if False:
            print("Analysis result: {:d} * {:d} data {:d}/{:d}/{:d} tr/bc/unlbl; \n \
           \t {:} (re)sampling {:d} fits \n \
           \t score = {:}; Predicted Mean / Observed: \
           \t {:} +- {:} / {:} +- {:} \n".format(self.nx, self.ndata, self.trn,self.bc, self.unlbl, 
                                                         self.tdset, self.nfit, 
                                                         scr, obc*self.dmeanY, ostd*self.dmeanY,  
                                                         oymean, oystd))
        print("\n Analysis result: {:d} * {:d} data {:d}/{:d}/{:d} tr/bc/unlbl; \n \
            \t {:} (re)sampling {:d} fits \n \
            \t score = {:}; \n\
            \t Y's \t Predicted Mean / Observed: \n".format(self.nx, self.ndata, self.trn, self.bc, self.unlbl, 
                                            self.tdset, self.nfit, 
                                            scr))
        IR = ['R', 'I']
        for i in range(self.ny): 
            print("\t Y{:d}.{:s} \t {:8e} +- {:8e} / {:8e} +- {:8e} ".format(int(i/2), IR[i%2], 
                                                                       obc[i]*self.dmeanY[i], ostd[i]*self.dmeanY[i], 
                                                                      oymean[i], oystd[i]))
        print('\n\n')
        if False:#if self.anal:
            if self.effmass:
                self.post_anal.print_effmass(self.tdset)
            if self.ratio:
                self.post_anal.print_ratio3ptn2pt(self.tdset)
        if self.osave:
            pf = open(self.database.ofile, 'a+')
            pf.write("\n Analysis result: {:d} * {:d} data {:d}/{:d}/{:d} tr/bc/unlbl; \n \
            \t {:} (re)sampling {:d} fits \n \
            \t score = {:}; \n\
            \t Y's \t Predicted Mean / Observed: \n".format(self.nx, self.ndata, self.trn, self.bc, self.unlbl, 
                                            self.tdset, self.nfit, 
                                            scr))
            IR = ['R', 'I']
            for i in range(self.ny): 
                pf.write("\t Y{:d}.{:s} \t {:8e} +- {:8e} / {:8e} +- {:8e} \n".format(int(i/2), IR[i%2], 
                                                                       obc[i]*self.dmeanY[i], ostd[i]*self.dmeanY[i], 
                                                                      oymean[i], oystd[i]))
            if self.anal:
                if self.effmass:
                    self.post_anal.print_effmass(self.tdset, pf, ppf=self.database.pltfile)
                if self.ratio:
                    self.post_anal.print_ratio3ptn2pt(self.tdset, pf, ppf=self.database.pltfile)
            pf.write("\n\n\n")
            pf.close()
        if self.anal:
            print("Merge tables")
            self.post_anal.merge_table()
            print("Finished merging tables")
    
    def print_fit(self, pd):
        for r in pd:
            print(r)
    
    # Do fits
    def fit(self):
        res = []
        bc = []
        pd = []
        n=0
        while self.isfit(): #for i in range(self.nfit):
            if self.setup_data():
                # Print data statistics
                if False:#n == 1:
                    print("Training data statistic\n")
                    data = {}
                    data['X'] = np.array(self.X).reshape(self.trn, self.nx)
                    data['Y'] = np.array(self.Y).reshape(self.trn, self.ny)
                    self.data_statistic(data, {'X': self.dtag, 'Y': self.ftag})
                    print("Bias correction data statistic\n")
                    data['X'] = np.array(self.B).reshape(self.bc, self.nx)
                    data['Y'] = np.array(self.C).reshape(self.bc, self.ny)
                    self.data_statistic(data, {'X': self.dtag, 'Y': self.ftag})
                    print("Unlabeled data statistic\n")
                    data['X'] = np.array(self.N).reshape(self.unlbl, self.nx)
                    data['Y'] = np.array(self.P).reshape(self.unlbl, self.ny)
                    self.data_statistic(data, {'X': self.dtag, 'Y': self.ftag})
                # yield to DL fitter
                if self.fitter_DL is not None:
                    res.append(self.fitter_DL.fit(self.X, self.Y, self.B, self.C))
                else:
                    print("ny = {:}".format(self.ny))
                    res.append(np.array([self.model[i].fit(
                        self.database.select_T(self.dtag, self.dtX, self.ftag[i], self.X).tolist(), 
                        np.array(self.Y).T.tolist()[i]) for i in range(self.ny)]))
                #print(res[-1])
                    bc.append(self.bias_crrt())
                    print(bc[-1])
                    pd.append(self.imp_prdt(bc[-1]))
                    print(pd[-1])
                if False:#n == 1:
                    print("Predicted data statistics\n")
                    data = {'X': np.array(self.N).reshape(self.unlbl,self.nx), 'Y': np.array(self.p).T}
                    self.data_statistic(data, {'X': self.dtag, 'Y': self.ftag})
                self.cleanup_data()
                if self.print:
                    self.print_fit(pd)
                n+=1
        if self.anal: 
            self.analyze(bc, pd)

    def show_fit(self):
        return
            
    # runs over loops of input parameters
    def run(self):
        tlist = (tuple, list)
        print("Running ML...\n")
        for pi in range(len(self.prY)):
            if isinstance(self.prY[pi], tlist):
                py = list(self.prY[pi])
            else:
                py = [self.prY[pi]]
            if isinstance(self.prX[pi], tlist):
                px = list(self.prX[pi])
            else:
                px = [self.prX[pi]]
            if isinstance(self.zrY[pi], tlist):
                zy = list(self.zrY[pi])
            else:
                zy = [self.zrY[pi]]
            if isinstance(self.zrX[pi], tlist):
                zx = list(self.zrX[pi])
            else:
                zx = [self.zrX[pi]]
            for ti in range(len(self.tr)):
                if isinstance(self.tr[ti], tlist):
                    ty = list(self.tr[ti])
                else:
                    ty = [self.tr[ti]]
                if None in ty:
                    ty = None
                try:
                    if isinstance(self.dtr[ti], tlist):
                        dtx = self.dtr[ti]
                    else:
                        dtx = self.dtr
                except:
                    dtx = self.dtr
                if False:
                    tx = []
                    for t,dt in product(ty, dtx):
                        tx.append(t+dt)
                    ttx = np.array(tx)
                    ttx.sort()
                    del tx
                    tx = []
                    tx.append(ttx[0])
                    for i in range(1, ttx.size):
                        if ttx[i] != tx[-1]:
                            tx.append(ttx[i])
                parX = [px, zx, ty, dtx]
                parY = [py, zy, ty, None]
                print(py)
                print(zy)
                self.inifit(parX, parY, pi)
                if False:#self.pre_anly:
                    self.data_statistic(self.data)  
                self.fit()
                if SHOW:
                    self.show_fit()
   

def main(pfile):
    ml = ML_Regression(pfile)
    ml.run()
    return 0
        
if __name__ == '__main__':
    main(open('input_param_filename','r'))      
