# Analysis
In this notebook I analyze all the data collected through the iterations for the reinforcement TICA 

### Import needed modules and useful functions

In [None]:
#-- useful python script for training the DeepTICA cvs --#
from utils import *

#-- to not visualize warnings --#
import warnings
warnings.filterwarnings('ignore')

### Simulation parameters 

In [None]:
iterations = 10

kb=0.008314
#-- SIMULATION PARAMETERS --#
sim_parameters = {
    'temp':300, 
    'beta': 1./(300*kb),
    'kbt': None,
    #-- parameters to compute the fes --#
    'blocks':2,
    'bandwidth': 0.02,
    'plot_max_fes' :70,
}
#--------------------------------------#

# Prepare input files for plumed

In [None]:
List =[]
folder = "unbias_A/"
for i in range(1,iterations):
    List.append(folder)
    folder += "bias"+str(i)+"/"
List.append(folder)

In [None]:
#fig,ax = plt.subplots(1,1,figsize=(10,8))
#color = ["b","r","g","orange"]
for k,set in enumerate(List):

    fig,ax = plt.subplots(1,1,figsize=(6,6))
    data = load_dataframe(set+"COLVAR") 
    descriptors_names = data.filter(regex='^d[^a-z]').columns.values 
    #data.plot.scatter(y="psi",x="phi",ax=ax,c=color[k])
    data.plot.scatter(y="psi",x="phi",ax=ax)

    fes = np.loadtxt("../../../angles/fes.txt",delimiter=" ")
    grid0 = np.loadtxt("../../../angles/grid0.txt",delimiter=" ")
    grid1 = np.loadtxt("../../../angles/grid1.txt",delimiter=" ")
    bounds = np.arange(0, 60, 5.)
    c = ax.contour(grid0, grid1, fes, bounds, linewidths=3,cmap="gray",linestyles="dashed",
        norm = mpl.colors.BoundaryNorm(bounds, ncolors=len(bounds)-1, clip=False), label="FES [Kj/mol]",
    )
    c.clabel()
    ax.grid()
    ax.set_title(set)
    ax.set_xlabel(r"$\phi$")
    ax.set_ylabel(r"$\psi$")
    plt.tight_layout()

In [None]:
#fig,axs = plt.subplots(1,2,figsize=(16,6))
for k,set in enumerate(List):
    fig,axs = plt.subplots(1,2,figsize=(6,4))
    data = load_dataframe(set+"COLVAR")
    data.plot.scatter(x="time",y="psi",ax=axs[0])#, color=color[k])
    data.plot.scatter(x="time",y="phi",ax=axs[1])#, color=color[k])

    axs[0].set_title(set)
    plt.tight_layout()

In [None]:
# variance and mean of cvs
for k,set in enumerate(List[1:]):
    data = load_dataframe(set+"COLVAR")
    cv = data["deep"+str(k)+".node-0"].values
    logweight = data["opes.bias"].to_numpy()-max(data["opes.bias"].to_numpy())*sim_parameters["beta"]
    mean = np.average(cv)#,weights=np.exp(logweight))
    variance = np.cov(cv)#,aweights=np.exp(logweight))
    print("average: ", mean, "\tvariance: ", variance)

In [None]:
'''
# cvs isolines, after simulations
for k,set in enumerate(List[1:]):
    fig,ax = plt.subplots(1,1,figsize=(6,6))
    data = load_dataframe(set+"COLVAR")
    data.plot.hexbin(y="psi",x="phi",C="deep"+str(k)+".node-0",cmap="Set1",ax=ax)#,ax=axs[k])

    fes = np.loadtxt("../../../angles/fes.txt",delimiter=" ")
    grid0 = np.loadtxt("../../../angles/grid0.txt",delimiter=" ")
    grid1 = np.loadtxt("../../../angles/grid1.txt",delimiter=" ")
    bounds = np.arange(0, 60, 5.)
    c = ax.contour(grid0, grid1, fes, bounds, linewidths=3,cmap="gray",linestyles="dashed",
        norm = mpl.colors.BoundaryNorm(bounds, ncolors=len(bounds)-1, clip=False), label="FES [Kj/mol]",
    )
    c.clabel()
    ax.grid()
    ax.set_title(set)
    ax.set_xlabel(r"$\phi$")
    ax.set_ylabel(r"$\psi$")
    plt.tight_layout()
'''

In [None]:
# cvs isolines, before simulations
for k,set in enumerate(List[:]):

    data = load_dataframe(set+"COLVAR")
    model = DeepTICA_CV([len(descriptors_names),30,30,2],activation='tanh')
    model.load_checkpoint(set+"deeptica/model_checkpoint.pt")
    X = data[descriptors_names].to_numpy()
    data["cv1"] = np.transpose(model(torch.Tensor(X)).detach().cpu().numpy())[0]
    data["cv2"] = np.transpose(model(torch.Tensor(X)).detach().cpu().numpy())[1]

    fig,axs = plt.subplots(1,2,figsize=(12,4),sharey=True)
    data.plot.hist(y="cv1",bins=20,ax=axs[0],density=True,color="b")
    data.plot.hist(y="cv2",bins=20,ax=axs[1],density=True,color="b")

    fig,ax = plt.subplots(1,2,figsize=(12,6))
    data.plot.hexbin(y="psi",x="phi",C="cv1",cmap="Set1",ax=ax[0])#,ax=axs[k])
    data.plot.hexbin(y="psi",x="phi",C="cv2",cmap="Set1",ax=ax[1])

    fes = np.loadtxt("../../../angles/fes.txt",delimiter=" ")
    grid0 = np.loadtxt("../../../angles/grid0.txt",delimiter=" ")
    grid1 = np.loadtxt("../../../angles/grid1.txt",delimiter=" ")
    bounds = np.arange(0, 60, 5.)
    c = ax[0].contour(grid0, grid1, fes, bounds, linewidths=3,cmap="gray",linestyles="dashed",
        norm = mpl.colors.BoundaryNorm(bounds, ncolors=len(bounds)-1, clip=False), label="FES [Kj/mol]",
    )
    c = ax[1].contour(grid0, grid1, fes, bounds, linewidths=3,cmap="gray",linestyles="dashed",
        norm = mpl.colors.BoundaryNorm(bounds, ncolors=len(bounds)-1, clip=False), label="FES [Kj/mol]",
    )
    c.clabel()
    ax[0].grid()
    ax[0].set_title(set)
    ax[0].set_xlabel(r"$\phi$")
    ax[0].set_ylabel(r"$\psi$")
    ax[1].grid()
    ax[1].set_title(set)
    ax[1].set_xlabel(r"$\phi$")
    ax[1].set_ylabel(r"$\psi$")
    plt.tight_layout()

In [None]:
# cvs isolines, on all psi-phi space before bias simulations
#fig,axs = plt.subplots(1,3,figsize=(20,8))
for k,set in enumerate(List[:]):
    
    model = DeepTICA_CV([len(descriptors_names),30,30,2],activation='tanh')
    model.load_checkpoint(set+"deeptica/model_checkpoint.pt")
    data_complete = load_dataframe("../../../angles/COLVAR")
    X_complete = data_complete[descriptors_names].to_numpy() 
    data_complete["cv1"] = np.transpose(model(torch.Tensor(X_complete)).detach().cpu().numpy())[0]
    data_complete["cv2"] = np.transpose(model(torch.Tensor(X_complete)).detach().cpu().numpy())[1]   

    fig,ax = plt.subplots(1,2,figsize=(12,6))
    data_complete.plot.hexbin(y="psi",x="phi",C="cv1",cmap="Set1",ax=ax[0])#,ax=axs[k])
    data_complete.plot.hexbin(y="psi",x="phi",C="cv2",cmap="Set1",ax=ax[1])

    fes = np.loadtxt("../../../angles/fes.txt",delimiter=" ")
    grid0 = np.loadtxt("../../../angles/grid0.txt",delimiter=" ")
    grid1 = np.loadtxt("../../../angles/grid1.txt",delimiter=" ")
    bounds = np.arange(0, 60, 5.)
    c = ax[0].contour(grid0, grid1, fes, bounds, linewidths=3,cmap="gray",linestyles="dashed",
        norm = mpl.colors.BoundaryNorm(bounds, ncolors=len(bounds)-1, clip=False), label="FES [Kj/mol]",
    )
    c = ax[1].contour(grid0, grid1, fes, bounds, linewidths=3,cmap="gray",linestyles="dashed",
        norm = mpl.colors.BoundaryNorm(bounds, ncolors=len(bounds)-1, clip=False), label="FES [Kj/mol]",
    )
    c.clabel()
    ax[0].grid()
    ax[0].set_title(set)
    ax[0].set_xlabel(r"$\phi$")
    ax[0].set_ylabel(r"$\psi$")
    ax[1].grid()
    ax[1].set_title(set)
    ax[1].set_xlabel(r"$\phi$")
    ax[1].set_ylabel(r"$\psi$")
    plt.tight_layout()

In [None]:
# different loss functions and trainigs
for k,set in enumerate(List[:]):
    
    model = DeepTICA_CV([len(descriptors_names),30,30,2],activation='tanh')
    model.load_checkpoint(set+"deeptica/model_checkpoint.pt")
    fig, ax = plt.subplots(1,1,figsize=(6,4))#,dpi=100)
    plot_model_lossfunction(model,only_loss=ax)
    ax.set_title(set)

plt.tight_layout()

In [None]:
#-- estimation of Free Energy Surface --#
for k,set in enumerate(List[1:]):
    
    data = load_dataframe(set+"COLVAR")
    s = data.filter(regex="^p").to_numpy()
    logweight=( data["opes.bias"].to_numpy()-max(data["opes.bias"].to_numpy()) )*sim_parameters["beta"]

    fig, ax = plt.subplots(figsize=(6,4))
    for i in range(2):
        fes,grid,bounds,error = compute_fes(s[:,i], weights=np.exp(logweight),
                                            temp=sim_parameters["temp"],
                                            kbt=sim_parameters["kbt"],
                                            blocks=sim_parameters["blocks"],
                                            bandwidth=sim_parameters["bandwidth"],scale_by='range',
                                            plot=True, plot_max_fes=sim_parameters["plot_max_fes"], ax = ax)
    ax.legend([r"$F(\phi)$ estimate",r"$F(\psi)$ estimate"])   
    ax.grid()
    plt.tight_layout()
    ax.set_title(set)
    ax.set_xlabel(r"$(\phi,\psi)$")
    ax.set_ylabel("FES [Kj/mol]")

    plt.show()

In [None]:
# load model from folder
#X, t, logweight = data[descriptors_names].values, data['time'].to_numpy(). data["opes.bias"].to_numpy()
#logweight = ( logweight-max(logweight) ) / sim_parameters["temp"]

In [None]:
# Pearson correlations
'''
for k,set in enumerate(List[:-1]):

    data = load_dataframe(set+"COLVAR")
    model = DeepTICA_CV([len(descriptors_names),30,30,2],activation='tanh')
    model.load_checkpoint(set+"deeptica/model_checkpoint.pt")
    X = data[descriptors_names].to_numpy()
    data["cv1"] = np.transpose(model(torch.Tensor(X)).detach().cpu().numpy())[0]
    data["cv2"] = np.transpose(model(torch.Tensor(X)).detach().cpu().numpy())[1]

    fig,axs = plt.subplots(2,1,figsize=(16,12),sharex=True)
    for k,cv in enumerate(["cv1","cv2"]):
        cols = [cv]
        cols.extend(data[descriptors_names].columns)
        corr = data[cols].corr(method='pearson')
        corr[cv].drop(cv).plot(kind='bar', ax=axs[k], rot=35, color="b",label=r"$C(deep|desc)$")
        axs[k].set_title('Correlation with DeepTICA '+str(k+1))
    plt.tight_layout()
    plt.show()
'''

### FES estimate from cvs  
Obviously from this first simulation it is not possible to found a cvs that from data are able to distinguish all the possible basins. I recall that our approach is a **data drive approach**.  


In [None]:
#-- estimation of Free Energy Surface --#
for k,set in enumerate(List[1:]):

    data = load_dataframe(set+"COLVAR")
    model = DeepTICA_CV([len(descriptors_names),30,30,2],activation='tanh')
    model.load_checkpoint(set+"deeptica/model_checkpoint.pt")
    X = data[descriptors_names].to_numpy()
    data["cv1"] = np.transpose(model(torch.Tensor(X)).detach().cpu().numpy())[0]
    data["cv2"] = np.transpose(model(torch.Tensor(X)).detach().cpu().numpy())[1]
    s = data.filter(regex="^cv").to_numpy()
    logweight=( data["opes.bias"].to_numpy()-max(data["opes.bias"].to_numpy()) )*sim_parameters["beta"]
    
    fig, ax = plt.subplots(figsize=(6,6))
    for i in range(2):
        fes,grid,bounds,error = compute_fes(s[:,i], weights=np.exp(logweight),
                                            temp=sim_parameters["temp"],
                                            kbt=sim_parameters["kbt"],
                                            blocks=sim_parameters["blocks"],
                                            bandwidth=sim_parameters["bandwidth"],scale_by='range',
                                            plot=True, plot_max_fes=sim_parameters["plot_max_fes"], ax = ax)
    ax.legend(["F(cv1) estimate","F(cv2) estimate"])   
    ax.grid()
    plt.tight_layout()
    ax.set_title(set)
    ax.set_xlabel(r"$(cv1,cv2)$")
    ax.set_ylabel("FES [Kj/mol]")

plt.show()

In [None]:
## attempt to obtain estimate of free energy from all the combined data
alldata = []
# I will not consider the unbias simulation for simplicity
for set in List[1:]:
    data = load_dataframe(set+"COLVAR")
    alldata.append(data)
alldata = pd.concat(alldata,ignore_index=True).replace(np.nan, 0)
# shuffle all datas
alldata = alldata.sample(frac=1)

In [None]:
# estimation of free energy in phi and psi
s = alldata.filter(regex="^p").to_numpy()
logweight=( alldata["opes.bias"].to_numpy()-max(alldata["opes.bias"].to_numpy()) )*sim_parameters["beta"]

fig, ax = plt.subplots(figsize=(6,6))
for i in range(2):
    fes,grid,bounds,error = compute_fes(s[:,i], weights=np.exp(logweight),
                                        temp=sim_parameters["temp"],
                                        kbt=sim_parameters["kbt"],
                                        blocks=sim_parameters["blocks"],
                                        bandwidth=sim_parameters["bandwidth"],scale_by='range',
                                        plot=True, plot_max_fes=sim_parameters["plot_max_fes"], ax = ax)
ax.legend([r"$F(\phi)$ estimate",r"$F(\psi)$ estimate"])   
ax.grid()
plt.tight_layout()
ax.set_xlabel(r"$(\phi,\psi)$")
ax.set_ylabel("FES [Kj/mol]")