# Notebook to plot persistence from a dataframe produced by Pers_in_visit


In [None]:
from astropy.io import fits
import glob, os, shutil, pickle, bz2, gc
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import sigmaclip
from scipy.optimize import curve_fit
from scipy.special import gammaincc, gamma
from astropy.wcs import WCS
from astropy.stats import histogram
from itertools import product
from multiprocessing import Pool

%matplotlib notebook

In [None]:
pwd

In [None]:
# The project dir 
pdir = '/user/gennaro/Functional_work/WFC3_persistence/py_progs/short_term_persistence/'

#The mosaic dir
mdir = pdir+'/Mosaic_hi_res_folder/'

#The dir to save/load the Persistence curves dataframes
sdir = pdir+'/PD_dataframes_dir/'


In [None]:
#Single and double exponential models to be fitted to the data

def decay1(t,a1,t1):
    e1 = a1*np.exp(-t/t1)
    return e1

def intdec1(t,a1,t1):
    tu = t[1:]
    td = t[:-1]
    k  = -a1*t1
    return k*(np.exp(-tu/t1)-np.exp(-td/t1))/(tu-td)
    
def decay2(t,a1,t1,a2,t2):
    e1 = a1*np.exp(-t/t1)
    e2 = a2*np.exp(-t/t2)
    return e1+e2

def intdec2(t,a1,t1,a2,t2):
    tu = t[1:]
    td = t[:-1]
    k1,k2  = -a1*t1, - a2*t2
    
    return k1*(np.exp(-tu/t1)-np.exp(-td/t1))/(tu-td) + k2*(np.exp(-tu/t2)-np.exp(-td/t2))/(tu-td)

#Single exponential models plus a constant

def intdec1_plusconst(t,a1,t1,q):
    tu = t[1:]
    td = t[:-1]
    k  = -a1*t1
    return k*(np.exp(-tu/t1)-np.exp(-td/t1))/(tu-td) +q

def dec1_plusconst(t,a1,t1,q):
    e1 = a1*np.exp(-t/t1)
    return e1+q


#Shifted power law model

def shpwl(t,t0,A,index):
    return A * ((t+t0)/1000)**index

def intshpwl(t,t0,A,index):
    tu = t[1:]
    td = t[:-1]

    if (index == -1.):
        return A*np.log( (tu+t0)/(td+t0) )
    else:
        return A/(1+index) * ( ((tu+t0)/1000)**(1+index) - ((td+t0)/1000)**(1+index) )/(tu-td)
    
    
#Schechter like model

def schechter(t,phi,alpha,tstar):
    x = t/tstar
    return phi*(x**alpha)*np.exp(-x)

def intschechter(t,phi,alpha,tstar):
    x = t/tstar

    tu = x[1:]
    td = x[:-1]

    g1 = gammaincc(alpha+1,td)
    g2 = gammaincc(alpha+1,tu)
    
    diff = gamma(alpha+1)*(g1-g2)
    
    return phi*diff


#Geometric median calculation function

from scipy.spatial.distance import cdist, euclidean

def geometric_median(X, eps=1e-5):
    y = np.mean(X, 0)

    while True:
        D = cdist(X, [y])
        nonzeros = (D != 0)[:, 0]

        Dinv = 1 / D[nonzeros]
        Dinvs = np.sum(Dinv)
        W = Dinv / Dinvs
        T = np.sum(W * X[nonzeros], 0)

        num_zeros = len(X) - np.sum(nonzeros)
        if num_zeros == 0:
            y1 = T
        elif num_zeros == len(X):
            return y
        else:
            R = (T - y) * Dinvs
            r = np.linalg.norm(R)
            rinv = 0 if r == 0 else num_zeros/r
            y1 = max(0, 1-rinv)*T + min(1, rinv)*y

        if euclidean(y, y1) < eps:
            return y1, D

        y = y1



In [None]:
#Cell to restore the hf5 store with the dataframe for each visit
#df = pd.DataFrame()

df_v   = pd.DataFrame()
df_l   = pd.DataFrame()
store  = pd.HDFStore(sdir+'DF.h5')
skeys  = store.keys()

print(skeys)

for i in range(len(skeys)//2):
    df_vh = store.get(skeys[i*2+1])
    df_lh = store.get(skeys[i*2])
    
    print('Number of entries for Visit ',i+1,':',len(df_vh))
    print('Number of unique ramps for Visit ',i+1,':',len(df_lh))

    df_vh['Visit'] = i+1
    df_v = df_v.append(df_vh,ignore_index=True)

    df_lh['Visit'] = i+1
    df_l = df_l.append(df_lh,ignore_index=False)


In [None]:
print('Total number of entries',len(df_v))
df_v.head()

In [None]:
print('Total number of unique ramps',len(df_l))
df_l.head()

In [None]:
Stim    = df_l.index.get_level_values('Stim').values
Visit   = df_l['Visit'].values
DQ_stim = df_l.index.get_level_values('DQ_stim').values


In [None]:
fig = plt.figure()

n,b,p = plt.hist(np.log10(Stim[DQ_stim == 0]),alpha=0.25,bins=50)

for vv in [1,2,3]:
    plt.hist(np.log10(Stim[(Visit == vv) & (DQ_stim == 0)]),alpha=0.25,bins=b)
plt.xlabel('Log10(Fluence)')


In [None]:
#The stimulus levels probed

print('Minimum stimulus:',np.log10(np.min(Stim)))
print('Maximum stimulus:',np.log10(np.max(Stim)))


In [None]:
#This cell is intended only for speeding up use

#df = df[np.log10(df['Stim']) > 6.5]


In [None]:
BM_rng25   = (df_v['deltat'] < 26) & (df_v['deltat'] > 24) 
BM_rng100  = (df_v['deltat'] < 101) & (df_v['deltat'] > 99) 
BM_pext = df_v['Pers_type'] =='EXT'
BM_V1   = df_v['Visit'] == 1
BM_V2   = df_v['Visit'] == 2
BM_V3   = df_v['Visit'] == 3

g_vvlu = DQ_stim == 0
g_umix = df_l['Uniq_multiindex'][g_vvlu].values
BM_DQ  = df_v['Uniq_multiindex'].isin(g_umix)


In [None]:
#levels = [5.0,5.1,5.5,5.75,6.,6.25,6.5,6.75,7.,7.25,7.5,7.75]
#levels = [6.75,7.,7.25]
levels = list(np.arange(6.9,7.75,0.1))


mean_res = {'levels': levels, 
            'med_times':[],
            'med_pers':[],
            'Npoints':[],
            'Readme': [#'ALL',
                       'ALL_DQ',
                      # 'dT~25s',
                      # 'dT~25s & Ptyp == EXT',
                      # 'dT~25s & Ptyp != EXT',
                      # 'dT~25s & Ptyp == EXT & Vis == 1',
                      # 'dT~25s & Ptyp == EXT & Vis == 2',
                      # 'dT~25s & Ptyp == EXT & Vis == 3',
                      # 'dT~25s & Ptyp != EXT & Vis == 1',
                      # 'dT~25s & Ptyp != EXT & Vis == 2',
                      # 'dT~25s & Ptyp != EXT & Vis == 3',
                      #'Visit 1',
                      #'Visit 2',
                      #'Visit 3',
                       'Vis 1_DQ',
                       'Vis 2_DQ',
                       'Vis 3_DQ'
                      # 'Ptyp == EXT & Vis == 1',
                      # 'Ptyp == EXT & Vis == 2',
                      # 'Ptyp == EXT & Vis == 3',
                      # 'Ptyp != EXT & Vis == 1',
                      # 'Ptyp != EXT & Vis == 2',
                      # 'Ptyp != EXT & Vis == 3',
                      # 'dT~100s',
                      # 'dT~100s & Ptyp == EXT',
                      # 'dT~100s & Ptyp != EXT',
                      # 'dT~100s & Ptyp == EXT & Vis == 1',
                      # 'dT~100s & Ptyp == EXT & Vis == 2',
                      # 'dT~100s & Ptyp == EXT & Vis == 3',
                      # 'dT~100s & Ptyp != EXT & Vis == 1',
                      # 'dT~100s & Ptyp != EXT & Vis == 2',
                      # 'dT~100s & Ptyp != EXT & Vis == 3',
                       
                      ]
            }    


step = 25.
    
for lev_dw,lev_up in zip(mean_res['levels'][0:-1],mean_res['levels'][1:]):
    
    print(lev_dw,lev_up)
    
    g_vvlu = ( np.log10(Stim) >= lev_dw ) & ( np.log10(Stim) < lev_up )
    g_umix = df_l['Uniq_multiindex'][g_vvlu].values
    BM_lv  = df_v['Uniq_multiindex'].isin(g_umix)

    gBMs = []
    
#    gBMs    =   [BM_lv]
    gBMs.append(BM_lv & BM_DQ)    
#    gBMs.append(BM_lv & BM_rng25)
#    gBMs.append(BM_lv & BM_rng25 & BM_pext)
#    gBMs.append(BM_lv & BM_rng25 & (~BM_pext) )
#    gBMs.append(BM_lv & BM_rng25 & BM_pext & BM_V1)
#    gBMs.append(BM_lv & BM_rng25 & BM_pext & BM_V2)
#    gBMs.append(BM_lv & BM_rng25 & BM_pext & BM_V3)
#    gBMs.append(BM_lv & BM_rng25 & (~BM_pext) & BM_V1)
#    gBMs.append(BM_lv & BM_rng25 & (~BM_pext) & BM_V2)
#    gBMs.append(BM_lv & BM_rng25 & (~BM_pext) & BM_V3)
#    gBMs.append(BM_lv & BM_V1)
#    gBMs.append(BM_lv & BM_V2)
#    gBMs.append(BM_lv & BM_V3)
    gBMs.append(BM_lv & BM_V1 & BM_DQ)
    gBMs.append(BM_lv & BM_V2 & BM_DQ)
    gBMs.append(BM_lv & BM_V3 & BM_DQ)
#    gBMs.append(BM_lv & BM_pext & BM_V1)
#    gBMs.append(BM_lv & BM_pext & BM_V2)
#    gBMs.append(BM_lv & BM_pext & BM_V3)
#    gBMs.append(BM_lv & (~BM_pext) & BM_V1)
#    gBMs.append(BM_lv & (~BM_pext) & BM_V2)
#    gBMs.append(BM_lv & (~BM_pext) & BM_V3)
#    gBMs.append(BM_lv & BM_rng100)
#    gBMs.append(BM_lv & BM_rng100 & BM_pext)
#    gBMs.append(BM_lv & BM_rng100 & (~BM_pext) )
#    gBMs.append(BM_lv & BM_rng100 & BM_pext & BM_V1)
#    gBMs.append(BM_lv & BM_rng100 & BM_pext & BM_V2)
#    gBMs.append(BM_lv & BM_rng100 & BM_pext & BM_V3)
#    gBMs.append(BM_lv & BM_rng100 & (~BM_pext) & BM_V1)
#    gBMs.append(BM_lv & BM_rng100 & (~BM_pext) & BM_V2)
#    gBMs.append(BM_lv & BM_rng100 & (~BM_pext) & BM_V3)

    
    mth = []
    mph = []
    nph = []
    
    for i,gBM in enumerate(gBMs):
        print(mean_res['Readme'][i],np.sum(gBM),np.sum(BM_lv))
        
   #     df2 = df[['tfromstim','meancurr','background']][gBM]
        tmed   = []
        medsig = []
        npts   = []
        
        if (np.sum(gBM) > 0 ):
            df2 = df_v[gBM]
            tmin = 0.
            tmax = np.max(df2['tfromstim'].values)

            while tmin <= tmax:

                BM =  (df2['tfromstim'].values > tmin-step/2.) & (df2['tfromstim'].values <= (tmin+ step/2.))
                tmin += step
                nn = np.sum(BM)
                if (nn > 5):
                    medsig.append(np.nanmedian(df2['meancurr'][BM].values))
                    tmed.append(np.median(df2['tfromstim'][BM].values))
                    npts.append(nn)
    
        mth.append(tmed)
        mph.append(medsig)
        nph.append(npts)
        
    mean_res['med_times'].append(mth)
    mean_res['med_pers'].append(mph)
    mean_res['Npoints'].append(nph)
    

In [None]:
#fig = plt.figure(figsize=(18,3*(len(mean_res['levels'])-1)))

name_to_plot = ['ALL_DQ','Vis 1_DQ','Vis 2_DQ','Vis 3_DQ']
#name_to_plot = ['ALL']
ind_to_plot = [mean_res['Readme'].index(name) for name in name_to_plot]

ncols = 3
nrows = len(levels) // ncols

ax = []

plt.style.use('bmh')
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 16
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['axes.labelweight'] = 'normal'
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['figure.titlesize'] = 16

col_list = ['#909AEE','#8B4500','#008B45','#CD9B1D','#99BB00','#9400D3','#FF00FF','#00688B','#FF3030']


fig, ax = plt.subplots(nrows, ncols, sharex=True, sharey=True, figsize=(6*ncols,3*nrows))

for i,(lev_dw,lev_up) in enumerate(zip(mean_res['levels'][0:-1],mean_res['levels'][1:])):

    
    k = i%ncols
    l = i//ncols
       
    for j in ind_to_plot:
        ax[l,k].plot(mean_res['med_times'][i][j],mean_res['med_pers'][i][j],color=col_list[j],
                     label='N: '+'{:3.1e}'.format(1.*np.sum(mean_res['Npoints'][i][j]))+' ; '+mean_res['Readme'][j])
        
    ax[l,k].set_xscale('log')
    ax[l,k].set_yscale('log')
    meanlev = 0.5*(mean_res['levels'][i]+mean_res['levels'][i+1])
    titlestring = 'Log$_{10}$ (Stim.[e$^{-}$]): ' + '{:5.3f}'.format(meanlev) + '; ' + '{:5.1f}'.format(10**meanlev/80000.) +' x saturation' 
    ax[l,k].text(0.275,0.85,titlestring,verticalalignment='bottom', horizontalalignment='left',transform=ax[l,k].transAxes,bbox={'facecolor':'gray', 'alpha':.1, 'pad':1},fontsize=15)

    ax[l,k].legend(fontsize=13,loc=3)
    
    ax[l,k].set_ylim(0.009,25)
    ax[l,k].set_xlim(50,10000)
    
    ax[l,k].plot(np.array([1,10000]),700*np.array([1,1/10000.]),'--',c='black')

    if l==(nrows-1):
        ax[l,k].set_xlabel('Time [s]',fontsize=24)
    if k==0:
        ax[l,k].set_ylabel('Mean current [e$^{-}$/s]',fontsize=18)
    ax[l,k].set_axis_bgcolor('#FFFFFF')
    
plt.tight_layout(pad=.2)

