## 100개의 feature을 통해서 label y를 예측하기 위한 프로세스 마련. 

In [None]:
import pandas as pd
import numpy as np

import random
import pickle
import os

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.datasets import make_moons
def get_noisy_two_moons(n_samples=1000, n_feats=100, noise_twomoon=0.1, noise_nuisance=1.0, seed_=1234):
    X, Y = make_moons(n_samples=n_samples, noise=noise_twomoon, random_state=seed_)
    np.random.seed(seed_)
    N = np.random.normal(loc=0., scale=noise_nuisance, size=[n_samples, n_feats-2])
    X = np.concatenate([X, N], axis=1)

    Y_onehot = np.zeros([n_samples, 2])
    Y_onehot[Y == 0, 0] = 1
    Y_onehot[Y == 1, 1] = 1

    return X, Y, Y_onehot

##function that arbritrally adds up the feature (there are only two important features but, in order to check for the robustness of the method,  add up all the features)
def get_blockcorr(X, block_size=10, noise_=0.5, seed_=1234): 
    '''
        noise 0.5 ~ 0.85 correlation
        noise 1.0 ~ 0.66 correlation
    '''
    for p in range(X.shape[1]):
        np.random.seed(seed_ + p)
        tmp   = X[:, [p]] + np.random.normal(loc=0., scale=noise_, size=[X.shape[0], block_size-1])

        if p == 0:
            X_new = np.concatenate([X[:, [p]], tmp], axis=1)
        else:
            X_new = np.concatenate([X_new, X[:, [p]], tmp], axis=1)    
    return X_new   

In [None]:
## L is a lower diagonal form of a covariance matrix, and in this case we assume there is no removement of index. 


In [None]:
import scipy
def mask_generation(mb_size_, pi_, L):
    ## mb_size is a size of minibatch, pi_ as a hyper parameter that controls the probability, 
    epsilon = np.random.normal(loc=0., scale=1., size=[np.shape(L)[0], mb_size_])
    g=np.matmul(L, epsilon)
    m = (1/2 * (1 + scipy.special.erf(g/np.sqrt(2)) ) < pi_).astype(float).T
    return m

In [None]:
## let me create any correlation matrix
def correlation_matrix_generator(n_feats, seed_=1234):
    np.random.seed(seed_)
    L = np.random.normal(loc=0., scale=1., size=[n_feats, n_feats])
    L = np.tril(L)
    return L
L=correlation_matrix_generator(100)
m=mask_generation(32, 0.5, L)

In [None]:
m[0]

In [None]:
def mask_generation(mb_size_, pi_):
    '''
        Phi(x; mu, sigma) = 1/2 * (1 + erf( (x-mu)/(sigma * sqrt(2)) )) 
        --> Phi(x; 0,1)   = 1/2 * (1 + erf( x/sqrt(2) )) 
    '''
    if len(remove_idx) == 0:
        epsilon = np.random.normal(loc=0., scale=1., size=[np.shape(L)[0], mb_size_])
        g       = np.matmul(L, epsilon)
    else:
        present_idx = [i for i in range(x_dim) if i not in remove_idx]
        epsilon     = np.random.normal(loc=0., scale=1., size=[np.shape(L)[0], mb_size_])
        g2      = np.random.normal(loc=0., scale=1., size=[len(remove_idx), mb_size_])
        g1      = np.matmul(L, epsilon)
        g       = np.zeros([x_dim, mb_size_])

        g[present_idx, :] = g1
        g[remove_idx, :]  = g2

    m = (1/2 * (1 + scipy.special.erf(g/np.sqrt(2)) ) < pi_).astype(float).T    
    return m


def copula_generation(mb_size_):
    if len(remove_idx) == 0:
        epsilon = np.random.normal(loc=0., scale=1., size=[np.shape(L)[0], mb_size_])
        g       = np.matmul(L, epsilon)
    else:
        present_idx = [i for i in range(x_dim) if i not in remove_idx]
        epsilon     = np.random.normal(loc=0., scale=1., size=[np.shape(L)[0], mb_size_])
        g2      = np.random.normal(loc=0., scale=1., size=[len(remove_idx), mb_size_])
        g1      = np.matmul(L, epsilon)
        g       = np.zeros([x_dim, mb_size_])

        g[present_idx, :] = g1
        g[remove_idx, :]  = g2

    return g.T

In [None]:
seed    = 1234 
p         = 10
sigma_n = 1.0
tr_X, tr_Y, tr_Y_onehot = get_noisy_two_moons(n_samples=1000, n_feats=p, noise_twomoon=0.1, noise_nuisance=sigma_n, seed_=seed)

In [None]:
## self-supervised 방식이므로, 라벨된 데이터의 개수를 10개로 지정. 
max_labeled_samples=10
idx1 = random.sample(np.where(tr_Y==1)[0].tolist(), max_labeled_samples)
idx0 = random.sample(np.where(tr_Y==0)[0].tolist(), max_labeled_samples)
idx=idx1+idx0

In [None]:
tr_X.shape

In [None]:
get_blockcorr(tr_X).shape

In [None]:
import torch.nn as nn
class SEFS(nn.Module):
    def __init__(self, input_dim, z_dim, h_dim, num_layers, dropout):
        super(SEFS, self).__init__()
        self.input_dim = input_dim
        self.z_dim = z_dim
        self.h_dim = h_dim
        self.num_layers = num_layers
        self.dropout = dropout
        self.fc = nn.Linear(input_dim   , h_dim)
        ## want to add 3 layers for the fc
        ## writh thre fully connected layer at once 
        ## want activation functiopn between each layer
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, h_dim),
            ## activation function
            nn.ReLU(),
            nn.Linear(h_dim, h_dim),
            nn.ReLU(),
            nn.Linear(h_dim, h_dim),
            nn.ReLU(),
        )

        self.xhatdecoder = nn.Sequential(
            nn.Linear( z_dim, h_dim),
            nn.ReLU(),
            nn.Linear(h_dim, input_dim),
            nn.ReLU(),
        )

        self.maskdecoder = nn.Sequential(
            nn.Linear(z_dim, h_dim),
            nn.ReLU(),
            nn.Linear(h_dim, input_dim),
            nn.ReLU(),
        )
        ## fc with outputlayer size 
        self.fc_out = nn.Linear(h_dim, z_dim)
    
    def encode(self, x):
        x=self.encoder(x)
        x=self.fc_out(x)
        return x
    def xhatdecode(self,x):
        x=self.xhatdecoder(x)
        return x

    def maskdecode(self,x):
        x=self.maskdecoder(x)
        return x
    
    def forward(self, x): 
        x=self.encode(x)
        xtilde=self.xhatdecode(x)
        mask=self.maskdecode(x)
        return x,xtilde, mask

In [None]:
import torch

model=SEFS(10000, 10, 100, 3, 0.5)



In [None]:
## torchsummary
 ## deivce selection
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from torchsummary import summary
summary(model.to(device), (10,10000)) ## input size: sameplesize*feature size

## torchsummaryX

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import scipy
import numpy as np
def Gaussian_CDF(x):
    return 0.5 * (1. + torch.erf(x / torch.sqrt(2.)))

class FCNet(nn.Module):
    def __init__(self, in_features, out_features, num_layers=1, hidden_features=100,
                 activation=nn.ReLU, dropout=0.0):
        super(FCNet, self).__init__()

        self.layers = nn.ModuleList()
        
        for i in range(num_layers - 1):
            self.layers.append(nn.Linear(in_features, hidden_features))
            self.layers.append(activation())
            self.layers.append(nn.Dropout(dropout))

        self.layers.append(nn.Linear(hidden_features, out_features))

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x


class SEFS_SS_Phase(nn.Module):
    def __init__(self, input_dims, network_settings):
        super(SEFS_SS_Phase, self).__init__()

        self.x_dim = input_dims['x_dim']
        self.z_dim = input_dims['z_dim']

        self.x_hat=network_settings['x_hat'] ##computed beforhand
        self.pi_ = network_settings['pi_'] ## selected beforhand
        self.LT = network_settings['LT']     ##  computed beforehand
        self.batch_size = network_settings['batch_size']  ## selected beforhand
        self.reg_scale = network_settings['reg_scale']
        self.h_dim_e = network_settings['h_dim_e']
        self.num_layers_e = network_settings['num_layers_e']
        self.h_dim_d = network_settings['h_dim_d']
        self.num_layers_d = network_settings['num_layers_d']
        self.fc_activate_fn = network_settings['fc_activate_fn']

        self.encoder = FCNet(self.x_dim, self.z_dim, self.num_layers_e, self.h_dim_e,
                             self.fc_activate_fn)
        self.decoder_x = FCNet(self.z_dim, self.x_dim, self.num_layers_d, self.h_dim_d,
                                self.fc_activate_fn)
        self.decoder_m = FCNet(self.z_dim, self.x_dim, self.num_layers_d, self.h_dim_d,
                                self.fc_activate_fn)
        
    def sample_gate_vector(self,x):
        # x: (batch_size, x_dim)
        # LT_matrix: (x_dim, x_dim) , Lower triangel of correlation matrix should be computed before the traing phase.(via choleskly decompostion) 
        # batch_size: batch_size
        # pi_: (x_dim, 1) , pi_ is a hyper parameter that controls the probability,
        ## given correlateion matrix, sample a binary vector from a multivariate Bernoulli distribution
        
        mask=self.mask_generation(self.pi_, self.LT, self.batch_size)
        mask=torch.from_numpy(mask).to(device).float()
        x_tilde=mask*x+ (1-mask)*self.x_hat

        ## 애매한건 다 네트워크 입력값에 넣는걸로 해놨음. ex batchsize, x_hat(평균값), pi_ 등등


        return x_tilde,mask


    
    def mask_generation(self, pi_, L, batch_size):
        ## mb_size is a size of minibatch, pi_ as a hyper parameter that controls the probability, 
        epsilon = np.random.normal(loc=0., scale=1., size=[np.shape(L)[0], batch_size])
        g=np.matmul(L, epsilon)
        m = (1/2 * (1 + scipy.special.erf(g/np.sqrt(2)) ) < pi_).astype(float).T
        return m
        # generate a mask matrix
        
        
    def forward(self, x):
        # sample a binary vector from 
        x_tilde,mask=self.sample_gate_vector(x) ## xtilde and 원본 마스크에 대한 정보 저장
        z = self.encoder(x_tilde)
        x_hat = self.decoder_x(z) ## xtilde로 부터 복원된 x_hat
        m_hat = self.decoder_m(z) ## mask로부터 복원된 m_hat

        loss_recon = F.mse_loss(x_hat, x, reduction='none')
        loss_cross_entropy=F.binary_cross_entropy(m_hat, mask, reduction='none')

        return loss_recon,loss_cross_entropy, x_hat, m_hat

In [None]:
!pip install torchsummary

In [None]:
np.random.rand(100,1)

In [16]:
## create dictionary named network_settings
## device

import torch.nn as nn
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dims={'x_dim':100, 'z_dim':100}
network_settings={'batch_size':100, 'reg_scale':0.5, 'h_dim_e':100, 'num_layers_e':3, 'h_dim_d':100, 'num_layers_d':3, 'fc_activate_fn':nn.ReLU, 'x_hat':torch.randn(100,1), 'pi_':np.random.rand(100,1), 'LT':np.random.rand(100,100)}
model=SEFS_SS_Phase(input_dims, network_settings).to(device)

## torchsummary
    ## deivce selection
## want to import torchsummary
from torchsummary import summary


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
summary(model, (100,100)) ## input size: sameplesize*feature size





RuntimeError: The size of tensor a (2) must match the size of tensor b (100) at non-singleton dimension 1

In [5]:
from scipy import special