In [None]:
#
# Visualization of data statistic
# 
import numpy as np
import pandas as pd
import matplotlib.pyplot as mplot
from matplotlib.figure import Figure 
#from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib import colors as mcolors
import seaborn as sbn

# Data frame: 
#   data: data in dictionary with labels X: 'xl', Y: 'yl';
#         data[{xl, yl}]: matrix of size #samples * #variables
#   tag:  dictionary of X & Y variable names (optional), shown as labels of data in the output
class Dataframe:
    def __init__(self, data, tag = None, xl='X', yl='Y'):
        self.nx = len(data[xl][0])
        self.ndata = len(data[xl])
        dy = data[yl]
        try:
            self.ny = len(data[yl][0])
        except:
            self.ny = 1
            dy = [[data[yl][i]] for i in range(len(data[yl]))]
        print("Dataframe: ndata / nx / ny is {:d} {:d} {:d}\n".format(self.ndata, self.nx, self.ny))
        if tag is None:
            xlist = list([xl+'-'+str(i) for i in range(self.nx)])
            ylist = list([yl+'-'+str(i) for i in range(self.ny)])
        else:
            xlist = list(tag[xl])
            ylist = list(tag[yl])
        self.data = pd.DataFrame(np.array(data[xl]).reshape(self.ndata, self.nx), 
                                 columns=xlist)
        print(len(dy))
        self.clist = xlist
        self.clist.extend(ylist)
        dy = np.array(dy).T
        for i in range(self.ny):
            #print(list(dy[0:len(dy)][i]))
            self.data.insert(self.nx+i, ylist[i], list(dy[i]))
        self.block = 1
        
    # Calculate autocorrelations
    #   Return the block size, the autocorrelation 
    #   tol: maximum autocorrelation
    def autocorr(self, tol):
        assert(1.>tol>0.)
        at = 1.0
        block = 0
        while at > tol:
            if block > 0:
                block += int((at-tol)*5+1)
            else:
                block = 1
            at = 0.
            for i in range(self.nx):
                atn = pd.Series(self.data.iloc[:,i].to_dict()).autocorr(block)
                if atn > at: 
                    at = atn
        return block, at 
    
    # Block data with block size 'blk'                             
    def blockdata(self, blk):
        if blk > 1:
            self.data = pd.DataFrame(np.array([[self.data.iloc[i:i+blk,j].mean() for j in range(self.nx+self.ny)]
                                             for i in range(0, self.ndata, blk)]).reshape(int(self.ndata/blk), self.nx+self.ny), 
                                columns=self.clist)
        self.block = blk
        return
    
    # Plot histgrams
    #   crange: range of variables using pure digital position location in the data table
    #   out: output filename, None for no store of the output
    def hist(self, crange = None, out = None):
        try:
            cmn = int(crange[0])
        except:
            cmn = 0
        try:
            cmx = int(crange[1])
        except:
            cmx = self.nx+self.ny
        if out is not None:
            #Figure()
            #canves = FigureCanvasTkAgg(fig)
            #fig.set_canvas(canves)
            sfx = '.pdf'
            n = 1
            while True:
                try:
                    pf = open(out+'.hist'+sfx, 'r')
                except:
                    break
                pf.close()
                sfx = '.'+str(n)+'.pdf'
                n+=1
            pp = PdfPages(out+'.hist'+sfx)
        n = 1
        for i in range(cmn, cmx, 10):
            if out is not None:
                fig = mplot.figure()
            j=i
            if i+10<cmx:
                j=i+10
            else:
                j=cmx
            self.data.iloc[:,i:j].plot.hist(subplots=True, bins=int(self.ndata/4), title='Data(X Y) histgram')
            mplot.show()
            if out is not None:
                #fig.add_subplot(n, 1, n)
                pp.savefig(fig)
            n += 1 
        #if out is not None:   
            fig.savefig(pp, format = 'pdf')
        return
      
    # Density plot
    #   data: pandas.DataFrame; None for using stored data
    def density(self, crange = None, out = None, data=None):
        try:
            cmn = int(crange[0])
        except:
            cmn = 0
        try:
            cmx = int(crange[1])
        except:
            if data is None:
                cmx = self.nx+self.ny
            else:
                cmx = len(data.columns)
        if data is None:
            data = self.data
        if out is not None:
            fig = Figure()
            canves = FigureCanvasTkAgg(fig)
            fig.set_canvas(canves)
        n = 1
        for i in range(cmn, cmx, 10):
            j=i
            if i+10<cmx:
                j=i+10
            else:
                j=cmx
            data.iloc[:,i:j].plot.density()
            mplot.show()
            if out is not None:
                fig.add_subplot(n, 1, n)
            n += 1 
        if out is not None:
            sfx = '.pdf'
            n = 1
            while True:
                try:
                    pf = open(out+'.kde'+sfx, 'r')
                except:
                    break
                pf.close()
                sfx = '.'+str(n)+'.pdf'
                n+=1               
            fig.savefig(out+'.kde'+sfx, format = 'pdf')
        return
    
    # Box plot 
    #   title for the plot title
    def boxplot(self, crange = None, data = None, title=None, out = None):
        try:
            cmn = int(crange[0])
        except:
            cmn = 0
        try:
            cmx = int(crange[1])
        except:
            if data is None:
                cmx = self.nx+self.ny
            else:
                cmx = len(data.columns)
        if data is None:
            data = self.data
        if False:#if out is not None:
            fig = Figure()
            canves = FigureCanvasTkAgg(fig)
            fig.set_canvas(canves) 
        n = 1
        for i in range(cmn, cmx, 10):
            j=i+10
            if j>cmx:
                j=cmx
            data.iloc[:,i:j].plot.box(title=title)
            mplot.show()
            if out is not None:
                sfx = '.pdf'
                m = 1
                while True:
                    try:
                        pf = open(out+'.box'+sfx, 'r')
                    except:
                        break
                    pf.close()
                    sfx = '.'+str(m)+'.pdf'
                    m+=1               
                mplot.savefig(out+'.box'+sfx, format = 'pdf')
                #fig.add_subplot(n, 1, n)
            n += 1 
        if False:#if out is not None:
            sfx = '.pdf'
            n = 1
            while True:
                try:
                    pf = open(out+'.box'+sfx, 'r')
                except:
                    break
                pf.close()
                sfx = '.'+str(n)+'.pdf'
                n+=1               
            fig.savefig(out+'.box'+sfx, format = 'pdf')
        return
    
    # Error bar plot
    #   x / y / err: x labels, y values, y errors
    #   clr: color code
    def errbarplot(x, y, err, clr = None, title=None, out=None):
        colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
        colorlist = [ name for name, color in colors.items()]
        try:
            errbarcolor = (clr+len(colorlist))%len(colorlist)
        except:
            errbarcolor = 0
        mplot.errorbar(x, y, yerr=err, ecolor=colorlist[errbarcolor], title=title)
        mplot.show()
        if out is not None:
            sfx = '.pdf'
            n = 1
            while True:
                try:
                    pf = open(out+'.err'+sfx, 'r')
                except:
                    break
                pf.close()
                sfx = '.'+str(n)+'.pdf'
                n+=1               
            mplot.savefig(out+'.err'+sfx, format = 'pdf')
        return
    
    # Plot covariance matrix
    #   prange: list of X & Y variable ranges of [min, max] using pure digital position location
    def covplot(self, prange = None, out = None):
        try:
            pmn = tuple(prange[0])
            for i in range(2):
                if pmn[i] is None:
                    pmn[i] = 0
        except:
            pmn = (0,0)
        try:
            pmx = tuple(prange[1])
            for i in range(2):
                if pmx[i] is None:
                    pmx[i] = self.nx+self.ny
        except:
            pmx = (self.nx+self.ny, self.nx+self.ny)
        corrl = self.data.corr().iloc[pmn[0]:pmx[0], pmn[1]:pmx[1]]
        print(corrl)
        #corrl.plot.hexbin(x=corrl[0], y=corrl[1])
        if out is not None:
            fig = Figure()
            canves = FigureCanvasTkAgg(fig)
            fig.set_canvas(canves)
        n = 1
        for j in range(0, pmx[1]-pmn[1], 16):
            jm = j+16
            if jm > pmx[1]-pmn[1]:
                jm = pmx[1]
            for i in range(0, pmx[0]-pmn[0], 16):
                im = i+16
                if im > pmx[0]-pmn[0]:
                    im = pmx[0]
                tcorr = corrl.iloc[i:im, j:jm]
                sbn.heatmap(tcorr, cmap=sbn.diverging_palette(220, 20, as_cmap=True))#, xticklabels=tcorr.columns, yticklabels=tcorr.index)
                mplot.show()
                if out is not None:
                    fig.add_subplot(n, 1, n)
        if out is not None:
            sfx = '.pdf'
            n = 1
            while True:
                try:
                    pf = open(out+'.corr'+sfx, 'r')
                except:
                    break
                pf.close()
                sfx = '.'+str(n)+'.pdf'
                n+=1               
            fig.savefig(out+'.corr'+sfx, format = 'pdf')
        return
    