<a href="https://colab.research.google.com/github/physicaone/loss_IG/blob/master/%5B210516%5DGet_data_with_PT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import numpy as np
import torch
import torchvision.datasets
import torchvision.models
import torchvision.transforms
import torch.nn.functional as F
from torchvision import datasets, transforms
from torchvision.utils import make_grid
import torch.utils.data
import torch.nn as nn
from datetime import datetime
from tqdm import tqdm, tnrange
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline
# np.seterr(divide='ignore', invalid='ignore')
import itertools
import re
from time import sleep

In [10]:
class RBM(nn.Module):

    def __init__(self, n_vis, n_hid, k, use_cuda):
        """Create a RBM."""
        super(RBM, self).__init__()
        
        if use_cuda==True:
            self.v = nn.Parameter(torch.ones(1, n_vis).cuda())
            self.h = nn.Parameter(torch.zeros(1, n_hid).cuda())
            self.W = nn.Parameter(torch.randn(n_hid, n_vis).cuda())
            self.k = k
        else:
            self.v = nn.Parameter(torch.ones(1, n_vis))
            self.h = nn.Parameter(torch.zeros(1, n_hid))
            self.W = nn.Parameter(torch.randn(n_hid, n_vis))
            self.k = k     

    def visible_to_hidden(self, v, beta):
        return torch.sigmoid(F.linear(v, self.W, self.h)*beta)

    def hidden_to_visible(self, h, beta):
        return torch.sigmoid(F.linear(h, self.W.t(), self.v)*beta)

    def free_energy(self, v):
        v_term = torch.matmul(v, self.v.t())
        w_x_h = F.linear(v, self.W, self.h)
        h_term = torch.sum(F.softplus(w_x_h), dim=1)
        return torch.mean(-h_term - v_term)
    
    def energy(self, v):
        v=v.bernoulli()
        h=torch.sigmoid(F.linear(v, self.W, self.h))
        h=h.bernoulli()
        return -torch.matmul(v, self.v.t())-torch.matmul(torch.matmul(v, self.W.t()),h.t())-torch.matmul(h, self.h.t())
    
    def energy2(self, v, h):
        v=v.bernoulli()
        h=h.bernoulli()
        return -torch.matmul(v, self.v.t())-torch.matmul(torch.matmul(v, self.W.t()),h.t())-torch.matmul(h, self.h.t())

    def forward(self, v):
        h = self.visible_to_hidden(v)
        h = h.bernoulli()
        for _ in range(self.k):
            v_gibb = self.hidden_to_visible(h)
            v_gibb = v_gibb.bernoulli()
            h = self.visible_to_hidden(v_gibb)
            h = h.bernoulli()
        return v, v_gibb

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
CUDA = torch.cuda.is_available()
CUDA_DEVICE = 0

if CUDA:
    device='cuda'
else:
    device='cpu'
torch.cuda.is_available()

False

In [13]:
CUDA

False

In [14]:
# 아래 두 함수는 PT와는 아무 상관 없습니다. 10진법을 2진법으로 바꾸고 리스트 형태로 변형하는 함수입니다. 예)15->[1,1,1,1]
# 그리고 그 다음은 역함수입니다.
def decimal_to_binary(integer):
    string=bin(integer)[2:]
    list0=[float(d) for d in string]
    while len(list0)<n_hid:
        list0=[0.]+list0
    return torch.tensor([list0])

def binary_to_decimal(list0):
    value=0
    for i in range(len(list0)):
        value+=list0[-i-1]*2**(i)
    return value

# 이 함수는 PT에 사용되는 transition 확률을 계산합니다.
def swap_prob(i,j, model, list00, list11):
    v1=torch.tensor(list00[i]).view(1,n_vis)
    v2=torch.tensor(list00[j]).view(1,n_vis)
    h1=torch.tensor(list11[i]).view(1,n_hid)
    h2=torch.tensor(list11[j]).view(1,n_hid)
    beta1=beta_list[i]
    beta2=beta_list[j]
    return torch.exp((beta2-beta1)*(model.energy2(v2, h2)-model.energy2(v1, h1)))

# 이 함수는 tansition 확률에 의거, swap을 수행합니다.
def swap(list0, list1, model):
    k=np.random.randint(0, len(list0)-1)
    combination=[k, k+1]
    if swap_prob(combination[0], combination[1], model, list0, list1)>=np.random.rand():
        a=list0[combination[0]]
        b=list0[combination[1]] 
        list0[combination[0]]=b
        list0[combination[1]]=a
        
        c=list1[combination[0]]
        d=list1[combination[1]] 
        list1[combination[0]]=d
        list1[combination[1]]=c
    else: None
#     return list0, list1
#     combinations=list(itertools.combinations(list(range(len(list0))), 2))
#     for k in range(len(combinations)):
#         if swap_prob(combinations[k][1], combinations[k][0])>=np.random.rand():
#             list0[combinations[k][0]]=list0[combinations[k][1]]; list0[combinations[k][1]]=list0[combinations[k][0]]
#         else: None
#     return list0

def P_h(list0):
    config_count={}
    for i in range(len(list0)):
        config_count[str(int(binary_to_decimal(list0[i][0])))]=0
    for i in range(len(list0)):
        config_count[str(int(binary_to_decimal(list0[i][0])))]+=1
    return config_count

def prod(L):
    p=1
    for i in L:
        p= i * p
    return p

# def Estimate_Z(model0, states):
#     Z=0
#     for i in range(len(states[0])):
#         Z+=torch.exp(-model0.energy2(states[0][i], states[1][i])).detach()
#     return float(Z.detach().numpy())

def get_hist(list00, color='red'):
    bins=range(int(min(list00)-30), int(max(list00)+30), 1)
    y1,x1,_ = plt.hist(list00, bins = bins, histtype='step', color=color)
    x1 = 0.5*(x1[1:]+x1[:-1])
    return x1, y1

def Shannon_entropy(list0):
    H_s=0
    N=np.sum(list0)
    for i in range(len(list0)):
        H_s-=list0[i]*np.log(list0[i]/N)/N
    return H_s



eq step 도입하기. 수렴성 확인하기 <br>
다 0으로 vs 랜덤으로 초기조건 주고, 초기효과가 사라지는 시점 비교해보기 <br>
h간의 FE편차가 얼마나 줄어드는가 (시계열적으로) 
<!-- beta_list 간격좀 바꿔보기 -->
<!-- PT 1스텝당 스왑 몇번 할지 -->
<!-- T별로 Autocorrelation 측정해보기 -->

# $F=\ln[Q(\vec{h})\prod_i\sigma(-a_i-\sum_jW_{ij}h_j)]-\vec{b}\cdot\vec{h}$

In [15]:
def FE_mean_std(model0, p):
    FE_list=[]
    for i in range(len(p)):
        integer=list(p.keys())[i]
        n_vis=len(model0.v[0]); n_hid=len(model0.h[0])
        h=decimal_to_binary(int(integer)).to(device)
        a=model0.v.view(1,n_vis).detach()
        b=model0.h.view(1,n_hid).detach()
        W=model0.W.view(n_hid,n_vis).detach()
        
        FE=prod(torch.sigmoid(-F.linear(h, W.t(), a)).detach()[0])
        FE=FE*p[integer]
        FE=FE/sum(list(p.values()))
        FE=torch.log(FE.detach())-torch.dot(b[0],h[0].t())
        FE_list.append(FE.detach())
    return torch.mean(torch.tensor(FE_list)), torch.std(torch.tensor(FE_list))

# step별 온도별 스테이트를 모두 저장하지 않고 매 스텝의 온도별 값 1개씩만 hidden_states_now에 기록합니다.
# 물론, 가장 차가운 모델(beta=1)은 스텝별 상태도 따로 저장하여 그 결과를 샘플링 결과로써 활용합니다.
def FE_PT(n_hid, model):
    hidden_states_now=[decimal_to_binary(0)]*len(beta_list)
    visible_states_now=[0.]*len(beta_list)
    # hottest=[]
    coldest_hidden=[]
    coldest_visible=[]

    for step in range(100000):
        for i in range(len(beta_list)):
            visible_states_now[i]=model.hidden_to_visible(hidden_states_now[i], beta_list[i]).bernoulli().detach()
            hidden_states_now[i]=model.visible_to_hidden(visible_states_now[i], beta_list[i]).bernoulli().detach()
        swap(visible_states_now, hidden_states_now, model)
        coldest_hidden.append(hidden_states_now[0])
        coldest_visible.append(visible_states_now[0])
    m, s=FE_mean_std(model, P_h(coldest_hidden))
    print(n_hid, m, s)
    # with open('coldest_n_hid={n_hid}_PT.pkl'.format(n_hid=n_hid), 'wb') as f:
    #     pkl.dump(P_h(coldest_hidden), f)


In [16]:

# for test
def dE_mean_std(model0, model1, v_list, h_list):
    dE=[]
    for i in range(len(v_list)):
        E0=model0.energy2(v_list[i], h_list[i]).detach()[0]
        E1=model1.energy2(v_list[i], h_list[i]).detach()[0]
        dE.append(E1-E0)
    return torch.mean(torch.tensor(dE)), torch.std(torch.tensor(dE))

def FE_and_dE_PT(fullmodel, submodel, eq_step, n_step):
    # states for full model
    hidden_states_now0=[decimal_to_binary(2**n_hid-1)]*len(beta_list)
    visible_states_now0=[1.]*len(beta_list)
    # states for sub model
    hidden_states_now1=[decimal_to_binary(2**n_hid-1)]*len(beta_list)
    visible_states_now1=[1.]*len(beta_list)
    # lists for fullmode samples
    coldest_hidden0=[]
    coldest_visible0=[]
    # lists for submodel samples
    coldest_hidden1=[]
    coldest_visible1=[]
    # lists for FE of full model
    FE0_mean_list=[]
    FE0_std_list=[]
    # lists for Fe of sub model
    FE1_mean_list=[]
    FE1_std_list=[]
    # lists for dE
    dE_mean_list=[]
    dE_std_list=[]
    
    # equilibrium steps for full model
    for step in range(eq_step):
        for i in range(len(beta_list)):
            visible_states_now0[i]=fullmodel.hidden_to_visible(hidden_states_now0[i].to(device), beta_list[i]).bernoulli().detach()
            hidden_states_now0[i]=fullmodel.visible_to_hidden(visible_states_now0[i].to(device), beta_list[i]).bernoulli().detach()
    
    # equilibrium steps for sub model
    for step in range(eq_step):
        for i in range(len(beta_list)):
            visible_states_now1[i]=submodel.hidden_to_visible(hidden_states_now1[i].to(device), beta_list[i]).bernoulli().detach()
            hidden_states_now1[i]=submodel.visible_to_hidden(visible_states_now1[i].to(device), beta_list[i]).bernoulli().detach()

    # Tasks with PT
    for step in tnrange(n_step):
        for i in range(len(beta_list)):
            # Gibbs sampling of fu
            visible_states_now0[i]=fullmodel.hidden_to_visible(hidden_states_now0[i].to(device), beta_list[i]).bernoulli().detach()
            hidden_states_now0[i]=fullmodel.visible_to_hidden(visible_states_now0[i].to(device), beta_list[i]).bernoulli().detach()
            visible_states_now1[i]=submodel.hidden_to_visible(hidden_states_now1[i].to(device), beta_list[i]).bernoulli().detach()
            hidden_states_now1[i]=submodel.visible_to_hidden(visible_states_now1[i].to(device), beta_list[i]).bernoulli().detach()
        swap(visible_states_now0, hidden_states_now0, fullmodel)
        swap(visible_states_now1, hidden_states_now1, submodel)
        coldest_hidden0.append(hidden_states_now0[0])
        coldest_visible0.append(visible_states_now0[0])
        coldest_hidden1.append(hidden_states_now1[0])
        coldest_visible1.append(visible_states_now1[0])
        if step%(n_step/10)==0:
            Fm0, Fs0=FE_mean_std(fullmodel, P_h(coldest_hidden0))
            FE0_mean_list.append(Fm0)
            FE0_std_list.append(Fs0)
            Fm1, Fs1=FE_mean_std(submodel, P_h(coldest_hidden1))
            FE1_mean_list.append(Fm1)
            FE1_std_list.append(Fs1)
            Em, Es=dE_mean_std(fullmodel, submodel, coldest_visible0, coldest_hidden0)
            dE_mean_list.append(Em)
            dE_std_list.append(Es)

    return FE0_mean_list, FE0_std_list, FE1_mean_list, FE1_std_list, dE_mean_list, dE_std_list

In [None]:
beta_list=torch.tensor(np.linspace(1,0,40).astype(int)).to(device)
n_vis=16
T='cri'
def get_data(n_hid, T):
    por='full'
    model0=torch.load('drive/MyDrive/loss_IG/Ising/lr=0.01/state_dict/2021-05-02_n_hid={n_hid}_epoch=1000_{por}_T={T}'.format(n_hid=n_hid, por=por, T=T), map_location=device)
    fullmodel=RBM(n_vis=n_vis, n_hid=n_hid, k=1, use_cuda=CUDA)
    fullmodel.load_state_dict(model0)
    por='sub'
    model1=torch.load('drive/MyDrive/loss_IG/Ising/lr=0.01/state_dict/2021-05-02_n_hid={n_hid}_epoch=1000_{por}_T={T}'.format(n_hid=n_hid, por=por, T=T), map_location=device)
    submodel=RBM(n_vis=n_vis, n_hid=n_hid, k=1, use_cuda=CUDA)
    submodel.load_state_dict(model1)
    FE0_mean_list16, FE0_std_list16, FE1_mean_list16, FE1_std_list16, dE_mean_list16, dE_std_list16=FE_and_dE_PT(fullmodel, submodel, 10, 1000000)
    return [FE0_mean_list16, FE0_std_list16, FE1_mean_list16, FE1_std_list16, dE_mean_list16, dE_std_list16]
PT40=[]
for n_hid in [1,2,4,8,16]:
    PT40.append(get_data(n_hid, T))
with open('drive/MyDrive/loss_IG/Ising/lr=0.01/data_PT40_1M_{T}.pkl'.format(T=T), 'wb') as f:
    pkl.dump(PT40, f)

HBox(children=(FloatProgress(value=0.0, max=1000000.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1000000.0), HTML(value='')))

In [None]:
T='inf'
i=1
data0=pd.read_pickle('drive/MyDrive/loss_IG/Ising/lr=0.01/data_PT40_1M_{T}.pkl'.format(T=T))
d = {'Fmean': data0[i][0], 'Fstd': data0[i][1], 'Fp_mean':data0[i][2], 'Fp_std':data0[i][3], 'dE_mean':data0[i][4], 'dE_std':data0[i][5]}
df = pd.DataFrame(data=d, index=['0.1M', '0.2M', '0.3M' , '0.4M' , '0.5M' , '0.6M', '0.7M', '0.8M', '0.9M', '1M'])
df.style.set_table_attributes("style='display:inline'").set_caption('Caption table')
df

# n_hid=16

Unnamed: 0,Fmean,Fstd,Fp_mean,Fp_std
0.1M,-11.187125,0.0,-9.231873,0.0
0.2M,-10.913754,0.438017,-11.514229,0.041189
0.3M,-11.214238,0.232848,-11.514883,0.05925
0.4M,-10.930634,0.610621,-11.504685,0.048278
0.5M,-10.969474,0.465314,-11.490219,0.03608
0.6M,-11.039572,0.37999,-11.490772,0.023371
0.7M,-11.02245,0.393445,-11.495219,0.026775
0.8M,-11.055025,0.324285,-11.490352,0.024045
0.9M,-11.075507,0.256054,-11.49208,0.024673
1M,-11.055081,0.280483,-11.495882,0.028407


In [None]:
fullconfigs=torch.tensor(pd.read_pickle('drive/MyDrive/loss_IG/4*4_inf_full.pkl'))

In [None]:
def FE_V(model, v):
    a=model.v
    b=model.h
    W=model.W
    v_term = torch.matmul(v, a.t())
    w_x_h = F.linear(v, W, b)
    h_term = torch.sum(F.softplus(w_x_h), dim=1)
    return torch.mean(-h_term - v_term)

for n_hid in [1,2,4,8,16]:
    Fq=[]
    por='full'
    T='inf'
    model0=torch.load('Ising/lr=0.01/models/2021-05-02_n_hid={n_hid}_epoch=1000_{por}_T={T}'.format(n_hid=n_hid, por=por, T=T), map_location=device)
    for i in range(len(fullconfigs)):
        Fq.append(FE_V(model0, fullconfigs[i]).detach().numpy())
    print(n_hid, np.mean(Fq))
#     print(n_hid, first_term(model0, torch.tensor(fullconfigs)))


1 -0.11297618
2 -0.098462254
4 -0.104218565
8 -0.10864953
16 -1.5577167


In [None]:
def get_H_s(x, y):
    list100=[]
    list100kmk=[]
    for i in range(len(x)):
        list100kmk.append(x[i]*y[i])
    for i in range(len(x)):
        for j in range(list100kmk[i]):
            list100.append(x[i])
    N=len(list100)
    H_s=0
    for i in range(len(x)):
        H_s-=(x[i]*y[i]/N)*np.log2(x[i]/N)
    return H_s