<a href="https://colab.research.google.com/github/physicaone/loss_IG/blob/master/%5B210516%5Dsampling_with_PT7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
try:
    from google.colab import drive
    drive.mount('/content/drive')
    base='drive/MyDrive'
except:
    base='Google Drive'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [31]:
import numpy as np
import torch
import torchvision.datasets
import torchvision.models
import torchvision.transforms
import torch.nn.functional as F
from torchvision import datasets, transforms
from torchvision.utils import make_grid
import torch.utils.data
import torch.nn as nn
from itertools import combinations
from itertools import permutations
from datetime import datetime
from tqdm import tqdm, tnrange
import warnings
warnings.filterwarnings("ignore")
import pickle as pkl
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline
# np.seterr(divide='ignore', invalid='ignore')
import itertools
import re
from time import sleep

#Define RBM class

In [32]:
class RBM(nn.Module):

    def __init__(self, n_vis, n_hid, k, use_cuda):
        """Create a RBM."""
        super(RBM, self).__init__()
        
        if use_cuda==True:
            self.v = nn.Parameter(torch.ones(1, n_vis).cuda())
            self.h = nn.Parameter(torch.zeros(1, n_hid).cuda())
            self.W = nn.Parameter(torch.randn(n_hid, n_vis).cuda())
            self.k = k
        else:
            self.v = nn.Parameter(torch.ones(1, n_vis))
            self.h = nn.Parameter(torch.zeros(1, n_hid))
            self.W = nn.Parameter(torch.randn(n_hid, n_vis))
            self.k = k     

    def visible_to_hidden(self, v, beta):
        return torch.sigmoid(F.linear(v, self.W, self.h)*beta)

    def hidden_to_visible(self, h, beta):
        return torch.sigmoid(F.linear(h, self.W.t(), self.v)*beta)

    def free_energy(self, v):
        v_term = torch.matmul(v, self.v.t())
        w_x_h = F.linear(v, self.W, self.h)
        h_term = torch.sum(F.softplus(w_x_h), dim=1)
        return torch.mean(-h_term - v_term)
    
    def energy2(self, v, h):
        return -torch.matmul(v, self.v.t())-torch.matmul(torch.matmul(v, self.W.t()),h.t())-torch.matmul(h, self.h.t())

    def forward(self, v):
        h = self.visible_to_hidden(v)
        h = h.bernoulli()
        for _ in range(self.k):
            v_gibb = self.hidden_to_visible(h)
            v_gibb = v_gibb.bernoulli()
            h = self.visible_to_hidden(v_gibb)
            h = h.bernoulli()
        return v, v_gibb

# Check CUDA availability

In [33]:
CUDA = torch.cuda.is_available()
CUDA_DEVICE = 0

if CUDA:
    device='cuda'
else:
    device='cpu'
torch.cuda.is_available(); CUDA

False

# Define fundamental functions

In [34]:
# 아래 두 함수는 PT와는 아무 상관 없습니다. 10진법을 2진법으로 바꾸고 리스트 형태로 변형하는 함수입니다. 예)15->[1,1,1,1]
# 그리고 그 다음은 역함수입니다.
def decimal_to_binary(integer, n_hid):
    string=bin(integer)[2:]
    list0=[float(d) for d in string]
    while len(list0)<n_hid:
        list0=[0.]+list0
    return torch.tensor([list0])

def binary_to_decimal(list0):
    value=0
    list0=list0.tolist()
    for i in range(len(list0)):
        value+=list0[-i-1]*2**(i)
    return int(value)

# 이 함수는 PT에 사용되는 transition 확률을 계산합니다.
def swap_prob(i,j, model, list00, list11):
    v1=torch.tensor(list00[i]).view(1,n_vis)
    v2=torch.tensor(list00[j]).view(1,n_vis)
    h1=torch.tensor(list11[i]).view(1,n_hid)
    h2=torch.tensor(list11[j]).view(1,n_hid)
    beta1=beta_list[i]
    beta2=beta_list[j]
    return torch.exp((beta2-beta1)*(model.energy2(v2, h2)-model.energy2(v1, h1)))

# 이 함수는 tansition 확률에 의거, swap을 수행합니다.
def swap(list0, list1, model):
    k=np.random.randint(0, len(list0)-1)
    combination=[k, k+1]
    if swap_prob(combination[0], combination[1], model, list0, list1)>=np.random.rand():
        a=list0[combination[0]].clone()
        b=list0[combination[1]].clone() 
        list0[combination[0]]=b
        list0[combination[1]]=a
        
        c=list1[combination[0]].clone()
        d=list1[combination[1]].clone() 
        list1[combination[0]]=d
        list1[combination[1]]=c
    else: None
#     return list0, list1
#     combinations=list(itertools.combinations(list(range(len(list0))), 2))
#     for k in range(len(combinations)):
#         if swap_prob(combinations[k][1], combinations[k][0])>=np.random.rand():
#             list0[combinations[k][0]]=list0[combinations[k][1]]; list0[combinations[k][1]]=list0[combinations[k][0]]
#         else: None
#     return list0

def P_h(list0):
    config_count={}
    for i in range(len(list0)):
        config_count[str(list0[i])]=0
    for i in range(len(list0)):
        config_count[str(list0[i])]+=1
    return config_count

def prod(L):
    p=1
    for i in L:
        p= i * p
    return p

# def Estimate_Z(model0, states):
#     Z=0
#     for i in range(len(states[0])):
#         Z+=torch.exp(-model0.energy2(states[0][i], states[1][i])).detach()
#     return float(Z.detach().numpy())

def get_hist(list00, color='red'):
    bins=range(int(min(list00)-30), int(max(list00)+30), 1)
    y1,x1,_ = plt.hist(list00, bins = bins, histtype='step', color=color)
    x1 = 0.5*(x1[1:]+x1[:-1])
    return x1, y1

def flatten_list(list0):
    flattened = [val for sublist in list0 for val in sublist]
    return flattened


# Define stat. physical functions

In [35]:
from scipy.stats import entropy
def Entropy(labels, base=None):
    value,counts = np.unique(labels, return_counts=True)
    return entropy(counts, base=base)

def Entropy0(fullconfigs):
    config_count={} # 각 hidden layer state 갯수 파악 (k)
    for i in range(len(fullconfigs)):
        config_count[str(fullconfigs[i])]=0
    for i in range(len(fullconfigs)):
        config_count[str(fullconfigs[i])]+=1

    listk=[]
    for i in range(len(list(config_count.values()))):
        listk.append(int(list(config_count.values())[i]))
    listmk=[]
    kcount={} # 갯수의 갯수 파악 (m_k)
    for i in range(len(listk)):
        kcount[listk[i]]=0
    for i in range(len(listk)):
        kcount[listk[i]]+=1
    for i in range(len(kcount)):
        listmk.append(kcount[sorted(list(kcount))[i]])
    x,y= sorted(list(kcount)), listmk

    N=len(fullconfigs)
    H_s=0
    for i in range(len(x)):
        H_s-=(x[i]*y[i]/N)*np.log(x[i]/N)
    return H_s

def FE_mean_std(model0_dict, p):
    FE_list=[]
    for i in range(len(p)):
        # n_vis=len(model0_dict['v'][0]); n_hid=len(model0_dict['h'][0])
        integer=list(p.keys())[i]
        h=decimal_to_binary(int(integer),n_hid).to(device)
        a=model0_dict['v'].view(1,n_vis).detach()
        b=model0_dict['h'].view(1,n_hid).detach()
        W=model0_dict['W'].view(n_hid,n_vis).detach()
        
        FE=prod(torch.sigmoid(-F.linear(h, W.t(), a)).detach()[0])*100000
        FE=FE*(p[integer])
        FE=FE/sum(list(p.values()))
        FE=torch.log(FE.detach())-torch.log(torch.tensor([[100000]]))-torch.dot(b[0],h[0].t())
        for _ in range(p[integer]):
            FE_list.append(FE.detach())
    return [float(torch.mean(torch.tensor(FE_list)).detach().numpy()), float(torch.std(torch.tensor(FE_list)).detach().numpy())]

def dE_mean_std(model0_dict, model1_dict, v_list, h_list):
    dE=[]
    for i in range(len(v_list)):
        E0=model0_dict.energy2(v_list[i].float().detach(), h_list[i].float().detach())[0].detach()
        E1=model1_dict.energy2(v_list[i].float().detach(), h_list[i].float().detach())[0].detach()
        dE.append(E1-E0)
    return [float(torch.mean(torch.tensor(dE)).numpy()), float(torch.std(torch.tensor(dE)).numpy())]

def Energy(model0_dict, v_list, h_list):
    a=model0_dict['v'].detach()
    b=model0_dict['h'].detach()
    W=model0_dict['W'].detach()
    values=[]
    for i in range(len(v_list)):
        e=-np.matmul(v_list[i], a.t())-np.matmul(np.matmul(v_list[i], W.t()), h_list[i].t())-np.matmul(h_list[i], b.t())
        values.append(e.detach())
    return float(np.mean(values))
    
def Energy_GPU(model0_dict, v_list0, h_list0):
    n_split=4
    a=model0_dict['v'].detach().cuda().view(n_vis)
    b=model0_dict['h'].detach().cuda().view(n_hid)
    W=model0_dict['W'].detach().cuda().view(n_hid, n_vis)
    values=[]
    len(h_list0)
    for i in range(n_split):
        v_list=torch.stack(list(v_list0[i*int(len(v_list0)/n_split):(i+1)*int(len(v_list0)/n_split)]))
        h_list=torch.stack(list(h_list0[i*int(len(h_list0)/n_split):(i+1)*int(len(h_list0)/n_split)]))
        v_list=v_list.detach().cuda().view(len(v_list), n_vis)
        h_list=h_list.detach().cuda().view(len(h_list), n_hid)
        e=-torch.matmul(v_list.float(), a)-torch.diagonal(torch.matmul(torch.matmul(v_list.float(), W.t()), h_list.float().t()))-torch.matmul(h_list.float(), b)
        values.append(np.mean(e.cpu().detach().numpy()))

    return np.mean(values)

def FE_V(model0, v0):
    v0=torch.tensor(v0)
    a=model0['v'].detach() # visible bias
    b=model0['h'].detach() # hidden bias
    W=model0['W'].detach() # weight
    values=[]
    for i in range(len(v0)):
        v_term = torch.matmul(v0[i].detach(), a.t()).detach()
        w_x_h = F.linear(v0[i].detach(), W, b).detach()
        h_term = torch.sum(F.softplus(w_x_h), dim=1).detach()
        values.append(torch.mean(-h_term - v_term).detach().numpy())
    return float(np.mean(values))

def sampling_with_PT(fullmodel, eq_step, n_step):
    # states for full model
    hidden_states_now0=[decimal_to_binary(2**n_hid-1, n_hid)]*len(beta_list)
    visible_states_now0=[1.]*len(beta_list)

    coldest_hidden0=[]
    coldest_visible0=[]
    
    # equilibrium steps for full model
    for step in range(eq_step):
        for i in range(len(beta_list)):
            visible_states_now0[i]=fullmodel.hidden_to_visible(hidden_states_now0[i].to(device), beta_list[i]).bernoulli().detach()
            hidden_states_now0[i]=fullmodel.visible_to_hidden(visible_states_now0[i].to(device), beta_list[i]).bernoulli().detach()

    # Tasks with PT
    for step in range(n_step):
        for i in range(len(beta_list)):
            # Gibbs sampling of fu
            visible_states_now0[i]=fullmodel.hidden_to_visible(hidden_states_now0[i].to(device), beta_list[i]).bernoulli().detach()
            hidden_states_now0[i]=fullmodel.visible_to_hidden(visible_states_now0[i].to(device), beta_list[i]).bernoulli().detach()
        swap(visible_states_now0, hidden_states_now0, fullmodel)
        coldest_hidden0.append(int(binary_to_decimal(hidden_states_now0[0].view(n_hid))))
        coldest_visible0.append(int(binary_to_decimal(visible_states_now0[0].view(n_vis))))
    return coldest_visible0, coldest_hidden0

# function to sace every T samples
def sampling_with_PT2(fullmodel, eq_step, n_step):
    # states for full model
    hidden_states_now=[decimal_to_binary(2**n_hid-1, n_hid)]*len(beta_list)
    visible_states_now=[1.]*len(beta_list)

    hidden_list=[]
    visible_list=[]
    
    # equilibrium steps for full model
    for step in range(eq_step):
        for i in range(len(beta_list)):
            visible_states_now[i]=fullmodel.hidden_to_visible(hidden_states_now[i].to(device), beta_list[i]).bernoulli().detach()
            hidden_states_now[i]=fullmodel.visible_to_hidden(visible_states_now[i].to(device), beta_list[i]).bernoulli().detach()

    # Tasks with PT
    for step in range(n_step):
        hidden_tmp=[]
        visible_tmp=[]
        for i in range(len(beta_list)):
            # Gibbs sampling of fu
            visible_states_now[i]=fullmodel.hidden_to_visible(hidden_states_now[i].to(device), beta_list[i]).bernoulli().detach()
            hidden_states_now[i]=fullmodel.visible_to_hidden(visible_states_now[i].to(device), beta_list[i]).bernoulli().detach()

            hidden_tmp.append(int(binary_to_decimal(hidden_states_now[i].view(n_hid))))
            visible_tmp.append(int(binary_to_decimal(visible_states_now[i].view(n_vis))))
        swap(visible_states_now, hidden_states_now, fullmodel)
        hidden_list.append(hidden_tmp)
        visible_list.append(visible_tmp)
    return visible_list, hidden_list

def Curly_W(model_dict, v, h):
    w=[]
    for i in range(len(beta_list)-1):
        w.append(beta_list[i]*Energy(model_dict, decimal_to_binary(v[i+1], n_vis), decimal_to_binary(h[i+1], n_hid))
        -beta_list[i+1]*Energy(model_dict, decimal_to_binary(v[i+1],n_vis), decimal_to_binary(h[i+1],n_hid)))
    return w

def Curly_W_tilde(model_dict, v, h):
    w_t=[]
    for i in range(1,len(beta_list)):
        w_t.append(beta_list[i-1]*Energy(model_dict, decimal_to_binary(v[i-1], n_vis), decimal_to_binary(h[i-1], n_hid))
        -beta_list[i]*Energy(model_dict, decimal_to_binary(v[i-1],n_vis), decimal_to_binary(h[i-1],n_hid)))
    return w_t

def AISPT(model_dict, v_list, h_list):
    r=len(v_list)
    C=0
    for n in range(r):
        C+=np.exp(-np.sum(Curly_W(model_dict, v_list[n], h_list[n])))
    C=C/r
    C=np.log(C)
    return -C -np.log(2**(n_vis+n_hid))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

from scipy.optimize import minimize    
def BARPT(model_dict, v_list, h_list):
    r=len(v_list)
    def function(x):
        list0=[]
        for n in range(r):
            W=np.sum(Curly_W(model_dict, v_list[n], h_list[n]))
            W_t=np.sum(Curly_W_tilde(model_dict, v_list[n], h_list[n]))
            list0.append(sigmoid(-W-x)*sigmoid(W_t+x))
        return -np.log(prod(list0))
    Init_Point = 30
    Optimum = minimize(function, Init_Point, method='CG', options={'xtol':0.01, 'disp': True})
    return Optimum
    # 체크 필요
    

In [38]:
n_hid_list=[1, 2, 4, 8, 12, 16]
T_list=[1.47, 1.78, 2.3, 5.2, 16]
n_beta=5
n_sample=200000
n_eq=100
beta_list=torch.tensor(np.linspace(1,0,n_beta).astype(float)).to(device)
n_vis=9

# mn sample 만들기 2 (모든 path)

In [None]:
for T in T_list:
    for n_hid in n_hid_list:
        dict0={}
        models=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_mn.pkl'.format(base=base, n_hid=n_hid, T=T))
        for m in tnrange(10):
            dict0[str(m)]={}
            for n in range(10):
                model1_dict=models[str(m)][str(n)]
                model1=RBM(n_vis=n_vis, n_hid=n_hid, k=1, use_cuda=CUDA)
                model1.load_state_dict(model1_dict)
                a0, b0=sampling_with_PT2(model1, n_eq, n_sample)
                dict0[str(m)][str(n)]=[a0,b0]
        with open('{base}/loss_IG/3*3/samples/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_mn_all.pkl'.format(base=base, n_beta=n_beta, n_hid=n_hid, T=T), 'wb') as f:
            pkl.dump(dict0, f)

# mn model의 샘플 만들기


In [None]:
for T in T_list:
    for n_hid in n_hid_list:
        dict0={}
        models=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_mn.pkl'.format(base=base, n_hid=n_hid, T=T))
        for m in tnrange(10):
            dict0[str(m)]={}
            for n in range(10):
                model1_dict=models[str(m)][str(n)]
                model1=RBM(n_vis=n_vis, n_hid=n_hid, k=1, use_cuda=CUDA)
                model1.load_state_dict(model1_dict)
                a0, b0=sampling_with_PT(model1, n_eq, n_sample)
                dict0[str(m)][str(n)]=[a0,b0]
        with open('{base}/loss_IG/3*3/samples/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_mn2.pkl'.format(base=base, n_beta=n_beta, n_hid=n_hid, T=T), 'wb') as f:
            pkl.dump(dict0, f)


for T in T_list:
    for n_hid in n_hid_list:
        dict0={}
        models=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_CM_m.pkl'.format(base=base, n_hid=n_hid, T=T))
        for m in tnrange(10):
            dict0[str(m)]={}
            model1_dict=models[str(m)]
            model1=RBM(n_vis=n_vis, n_hid=n_hid, k=1, use_cuda=CUDA)
            model1.load_state_dict(model1_dict)
            a0, b0=sampling_with_PT(model1, n_eq, n_sample)
            dict0[str(m)]=[a0,b0]
        with open('{base}/loss_IG/3*3/samples/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_CM_m.pkl'.format(base=base, n_beta=n_beta, n_hid=n_hid, T=T), 'wb') as f:
            pkl.dump(dict0, f)

for T in T_list:
    for n_hid in n_hid_list:
        model1_dict=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_CM.pkl'.format(base=base, n_hid=n_hid, T=T))
        model1=RBM(n_vis=n_vis, n_hid=n_hid, k=1, use_cuda=CUDA)
        model1.load_state_dict(model1_dict)
        a0, b0=sampling_with_PT(model1, n_eq, n_sample)
        with open('{base}/loss_IG/3*3/samples/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_CM.pkl'.format(base=base, n_beta=n_beta, n_hid=n_hid, T=T), 'wb') as f:
            pkl.dump([a0,b0], f)

# CM, CM_m, mn의 Free energy 계산

In [None]:
for T in T_list:
    for n_hid in n_hid_list:
        dict0={}; dict1={}; dict2={}
        model_dicts_mn=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_mn.pkl'.format(base=base, n_hid=n_hid, T=T))
        model_dicts_CM_m=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_CM_m.pkl'.format(base=base, n_hid=n_hid, T=T))
        model_dicts_CM=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_CM.pkl'.format(base=base, n_hid=n_hid, T=T))
        vh_mn=pd.read_pickle('{base}/loss_IG/3*3/samples/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_mn_all.pkl'.format(base=base, n_beta=n_beta, n_hid=n_hid, T=T))
        vh_CM_m=pd.read_pickle('{base}/loss_IG/3*3/samples/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_CM_m_all.pkl'.format(base=base, n_beta=n_beta, n_hid=n_hid, T=T))
        vh_CM=pd.read_pickle('{base}/loss_IG/3*3/samples/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_CM_all.pkl'.format(base=base, n_beta=n_beta, n_hid=n_hid, T=T))

        for m in tnrange(10):
            dict0[str(m)]={}
            for n in range(10):
                model_dict0=model_dicts_mn[str(m)][str(n)]
                # dict0[str(m)][str(n)]=FE_mean_std(model_dict0, P_h(vh_mn1[str(m)][str(n)][1]+vh_mn2[str(m)][str(n)][1]))[0]
                dict0[str(m)][str(n)]=AISPT(model_dict0, vh_mn[str(m)][str(n)][0], vh_mn[str(m)][str(n)][1])
            model_dict1=model_dicts_CM_m[str(m)]
            # dict1[str(m)]=FE_mean_std(model_dict1, P_h(vh_CM_m[str(m)][1]))[0]
            dict1[str(m)]=AISPT(model_dict1, vh_CM_m[str(m)][0], vh_CM_m[str(m)][1])
        model_dict2=model_dicts_CM
        # dict2=FE_mean_std(model_dict2,P_h(vh_CM[1]))[0]
        dict2=AISPT(model_dict2, vh_CM[0], vh_CM[1])
        with open('{base}/loss_IG/3*3/data/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_FE_AISPT.pkl'.format(base=base, n_beta=n_beta, n_hid=n_hid, T=T), 'wb') as f:
            pkl.dump({'mn':dict0, 'CM_m':dict1, 'CM':dict2}, f)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

# CM, CM_m, mn의 <E'-E> 계산

In [None]:
for T in T_list:
    for n_hid in n_hid_list:
        dE_CM={}; dE_CM_m={}
        model_dict_CM=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_CM.pkl'.format(base=base, n_hid=n_hid, T=T))
        model_dict_CM_m=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_CM_m.pkl'.format(base=base, n_hid=n_hid, T=T))
        model_dict_mn=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_mn.pkl'.format(base=base, n_hid=n_hid, T=T))
        vh_CM=pd.read_pickle('{base}/loss_IG/3*3/samples/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_CM.pkl'.format(base=base, n_beta=n_beta, n_hid=n_hid, T=T))
        vh_CM_m=pd.read_pickle('{base}/loss_IG/3*3/samples/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_CM_m.pkl'.format(base=base, n_beta=n_beta,  n_hid=n_hid, T=T))
        vh_CM_v=[decimal_to_binary(vh_CM[0][i],n_vis)[0] for i in range(len(vh_CM[0]))]
        vh_CM_h=[decimal_to_binary(vh_CM[1][i],n_hid)[0] for i in range(len(vh_CM[1]))]
        for m in tnrange(10):
            dE_CM[str(m)]={}; dE_CM_m[str(m)]={}
            vh_CM_m_v=[decimal_to_binary(vh_CM_m[str(m)][0][i],n_vis)[0] for i in range(len(vh_CM_m[str(m)][0]))]
            vh_CM_m_h=[decimal_to_binary(vh_CM_m[str(m)][1][i],n_hid)[0] for i in range(len(vh_CM_m[str(m)][1]))]
            for n in range(10):
                dE_CM[str(m)][str(n)]=Energy(model_dict_mn[str(m)][str(n)], vh_CM_v, vh_CM_h)-Energy(model_dict_CM, vh_CM_v, vh_CM_h)
                dE_CM_m[str(m)][str(n)]=Energy(model_dict_mn[str(m)][str(n)], vh_CM_m_v, vh_CM_m_h)-Energy(model_dict_CM_m[str(m)], vh_CM_m_v, vh_CM_m_h)
        with open('{base}/loss_IG/3*3/data/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_dE.pkl'.format(base=base, n_beta=n_beta, n_hid=n_hid, T=T), 'wb') as f:
            pkl.dump({'CM':dE_CM, 'CM_m':dE_CM_m}, f)

In [None]:
for T in T_list:
    for n_hid in n_hid_list:
        dE={}
        model_dict_mn=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_mn.pkl'.format(base=base, n_hid=n_hid, T=T))
        vh_mn=pd.read_pickle('{base}/loss_IG/3*3/samples/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_mn.pkl'.format(base=base, n_beta=n_beta,  n_hid=n_hid, T=T))
        for i in tnrange(100):
            i=str(i).zfill(2)
            model0=model_dict_mn[i[1]][i[0]]
            dE[str(i)]={}
            vh_mn_v=[decimal_to_binary(vh_mn[i[1]][i[0]][0][p],n_vis)[0] for p in range(len(vh_mn[i[1]][i[0]][0]))]
            vh_mn_h=[decimal_to_binary(vh_mn[i[1]][i[0]][1][p],n_hid)[0] for p in range(len(vh_mn[i[1]][i[0]][1]))]
            for j in range(100):
                j=str(j).zfill(2)
                model1=model_dict_mn[j[1]][j[0]]
                dE[str(i)][str(j)]=float(Energy_GPU(model1, vh_mn_v, vh_mn_h))-float(Energy_GPU(model0, vh_mn_v, vh_mn_h))
        with open('{base}/loss_IG/3*3/data/3*3_PT{n_beta}_n_hid={n_hid}_T={T}_dE10000.pkl'.format(base=base, n_beta=n_beta, n_hid=n_hid, T=T), 'wb') as f:
            pkl.dump(dE, f) 

# Entropy of m

In [21]:
for T in T_list:
    fullconfigs=pd.read_pickle('{base}/loss_IG/3*3/3*3_full_T={T}.pkl'.format(base=base, T=T))
    S_m={}
    for m in range(10):
        S_m[str(m)]=Entropy(fullconfigs[m])
    S_CM=Entropy0(flatten_list(fullconfigs))
    with open('{base}/loss_IG/3*3/data/3*3_T={T}_S0.pkl'.format(base=base, T=T), 'wb') as f:
        pkl.dump({'CM_m':S_m, 'CM':S_CM}, f)

# CM, CM_m, mn의 marginalized Free energy 계산

In [28]:
for T in T_list:
    fullconfigs=pd.read_pickle('{base}/loss_IG/3*3/3*3_full_T={T}.pkl'.format(base=base, T=T))
    for n_hid in [2]:
        dict0={}; dict1={}; dict2={}
        model_dicts_mn=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_mn.pkl'.format(base=base, n_hid=n_hid, T=T))
        model_dicts_CM_m=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_CM_m.pkl'.format(base=base, n_hid=n_hid, T=T))
        model_dicts_CM=pd.read_pickle('{base}/loss_IG/3*3/state_dict/n_hid={n_hid}_T={T}_CM.pkl'.format(base=base, n_hid=n_hid, T=T))

        for m in tnrange(10):
            dict0[str(m)]={}
            for n in range(10):
                model_dict0=model_dicts_mn[str(m)][str(n)]
                FE_V(model_dict0, fullconfigs[m])
            model_dict1=model_dicts_CM_m[str(m)]
            dict1[str(m)]=FE_V(model_dict1, fullconfigs[m])
        model_dict2=model_dicts_CM
        dict2=FE_V(model_dict2, flatten_list(fullconfigs))
        with open('{base}/loss_IG/3*3/data/3*3_n_hid={n_hid}_T={T}_FE_V.pkl'.format(base=base, n_hid=n_hid, T=T), 'wb') as f:
            pkl.dump({'mn':dict0, 'CM_m':dict1, 'CM':dict2}, f)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[4.4187, 6.7715]])
tensor([[-14.0793, -16.4321]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[11.9456,  9.3290]])
tensor([[-16.7156, -14.0990]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[10.1814, 11.0163]])
tensor([[-15.0212, -15.8560]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[16.5818, 14.6011]])
tensor([[-16.5818, -14.6011]])
tensor([[16.5818, 14.6011]])
tensor([[-16.

KeyboardInterrupt: ignored