In [1]:
%run ../chap06/adam_model.ipynb
go_=1

In [2]:
class CnnBasicModel(AdamModel):
    def __init__(self, name, dataset, hconfigs, show_maps = False):
        if isinstance(hconfigs, list) and \
        not isinstance(hconfigs[0], (list, int)):
            hconfigs = [hconfigs]
        self.show_maps = show_maps
        self.need_maps = False
        self.kernels = []
        super(CnnBasicModel, self).__init__(name, dataset, hconfigs)
        self.use_adam = True

In [3]:
def cnn_basic_alloc_layer_param(self, input_shape, hconfig):
    layer_type = get_layer_type(hconfig)
    
    m_name = 'alloc_{}_layer'.format(layer_type)
    method = getattr(self, m_name)
    pm, output_shape = method(input_shape, hconfig)

    return pm, output_shape

CnnBasicModel.alloc_layer_param = cnn_basic_alloc_layer_param

In [4]:
def cnn_basic_forward_layer(self, x, hconfig, pm):
    layer_type = get_layer_type(hconfig)
    
    m_name = 'forward_{}_layer'.format(layer_type)
    method = getattr(self, m_name)
    y, aux = method(x, hconfig, pm)
        
    return y, aux

CnnBasicModel.forward_layer = cnn_basic_forward_layer

In [5]:
def cnn_basic_backprop_layer(self, G_y, hconfig, pm, aux):
    layer_type = get_layer_type(hconfig)
    
    m_name = 'backprop_{}_layer'.format(layer_type)
    method = getattr(self, m_name)
    G_input = method(G_y, hconfig, pm, aux)

    return G_input

CnnBasicModel.backprop_layer = cnn_basic_backprop_layer

In [6]:
def cnn_basic_alloc_conca_layer(self, input_shape, hconfig):
    if self.jongwoo==1:
        input_cnt = np_cpu.prod(input_shape)+go_
        self.jong_input_cnt=input_cnt
        
    else:
        input_cnt = np_cpu.prod(input_shape)
    output_cnt = get_conf_param(hconfig, 'width', hconfig)

    weight = np.random.normal(0, self.rand_std, [input_cnt, output_cnt], dtype = 'float32')
    weight[-go_:][:]=1*weight[-go_:][:]
    #print("conca",weight.shape)
    bias = np.zeros([output_cnt], dtype = 'float32')
    #special = np.random.normal(20, self.rand_std, [30])
    return {'w':weight, 'b':bias}, [output_cnt]
    #return {'w':weight, 'b':bias,'s':special}, [output_cnt]
def cnn_basic_alloc_full_layer(self, input_shape, hconfig):
    input_cnt = np_cpu.prod(input_shape)   #입력층의 크기가 얼마인가?
    output_cnt = get_conf_param(hconfig, 'width', hconfig) #출력 층의 크기가 얼마인가?

    weight = np.random.normal(0, self.rand_std, [input_cnt, output_cnt], dtype = 'float32')  #2차원 행렬 [입력층 크기, 출력 층 크기] 
                                                                                            #무작위 값을 생성하라.
    bias = np.zeros([output_cnt], dtype = 'float32')          #바이어스 초기값 0으로 생성하라.
    #print("full",weight.shape)
    return {'w':weight, 'b':bias}, [output_cnt]    #생성된 가중치 전달해주자.
    
def cnn_basic_alloc_conv_layer(self, input_shape, hconfig):
    assert len(input_shape) == 3
    xh, xw, xchn = input_shape
    kh, kw = get_conf_param_2d(hconfig, 'ksize')
    ychn = get_conf_param(hconfig, 'chn')

    kernel = np.random.normal(0, self.rand_std, [kh, kw, xchn, ychn], dtype = 'float32')
    bias = np.zeros([ychn], dtype = 'float32')

    if self.show_maps: self.kernels.append(kernel)

    return {'k':kernel, 'b':bias}, [xh, xw, ychn]
    
def cnn_basic_alloc_pool_layer(self, input_shape, hconfig):
    assert len(input_shape) == 3
    xh, xw, xchn = input_shape
    sh, sw = get_conf_param_2d(hconfig, 'stride')

    assert xh % sh == 0
    assert xw % sw == 0

    return {}, [xh//sh, xw//sw, xchn]
CnnBasicModel.alloc_conca_layer = cnn_basic_alloc_conca_layer
CnnBasicModel.alloc_full_layer = cnn_basic_alloc_full_layer
CnnBasicModel.alloc_conv_layer = cnn_basic_alloc_conv_layer
CnnBasicModel.alloc_max_layer = cnn_basic_alloc_pool_layer
CnnBasicModel.alloc_avg_layer = cnn_basic_alloc_pool_layer

In [None]:
def cnn_basic_alloc_ele_conca_layer(self, input_shape, hconfig):
    if self.jongwoo==1:
        input_cnt = np_cpu.prod(input_shape)+go_    #go_ 은 1차원 크로마틴 정보 노드의 크기입니다. 막상 보니 필요없는 코드였네요.
        self.jong_input_cnt=input_cnt

    output_cnt = get_conf_param(hconfig, 'width', hconfig) #출력 크기는 40 입니다.
    #print(output_cnt)
    weight = np.random.normal(0, self.rand_std, [go_, output_cnt], dtype = 'float32') # 2차원 행렬인데 크기 [1, 40] 가중치를 만듭니다.

    #print("conca",weight.shape)
    #bias = np.zeros([output_cnt], dtype = 'float32')
    #special = np.random.normal(20, self.rand_std, [30])
    return {'w':weight}, [output_cnt]   # 바이어스 했다는 내용도 없어 생략했습니다. 가중치만 전달해줍니다.
    #return {'w':weight, 'b':bias,'s':special}, [output_cnt]


def cnn_basic_forward_ele_conca_layer(self, x, hconfig, pm): # 포워딩 함수입니다.
    if pm is None: return x, None
    
    x_org_shape = x.shape
    #print(x.shape)
    if len(x.shape) != 2:  #2차원이 아닌 출력 값들을 펴주는 역할입니다.
        mb_size = x.shape[0]
        x = x.reshape([mb_size, -1])
    if self.jongwoo == 1:
        if self.is_training==True:  #학습할 때
            from_idx_ = self.n * self.batch_size  #현재 입력 값에 대한 크로마틴 정보 번호를 찾고 있습니다.
            to_idx_ = (self.n + 1) * self.batch_size
            temp_temp=self.dataset.ext_training_xs[self.dataset.indices[from_idx_:to_idx_]]  #[배치사이즈, 1] 형태로 값을 얻습니다.
            temp_temp=temp_temp.reshape([-1, go_])
            
        else:    #테스트할 때
            temp_temp=self.dataset.ext_test_xs   #테스트 값에 대한 크로마틴 정보 출력합니다.
            #next_temp=temp_temp.reshape([-1, go_])
            temp_temp=temp_temp.reshape([-1, go_])
    
    affine = np.matmul(temp_temp, pm['w'])  #크로마틴 정보와 가중치가 연산을 진행합니다.
    #y = self.activate(affine, hconfig)
    y = np.multiply(x, affine)   #이후 원래 입력 값인x와 element-wise 연산을 수행합니다.
    #print("output",y.shape)
    return y, [x, y, x_org_shape, affine, temp_temp]  #최종 값 y와 역전파에 필요한 정보들을 회수해갑니다.

def cnn_basic_backprop_ele_conca_layer(self, G_y, hconfig, pm, aux): # 각종 필요한 값들을 받습니다.
    if pm is None: return G_y

    x, y, x_org_shape, affine_, temp_temp = aux  #역전파에 필요한 정보 그대로 수령합니다.
    
    #G_affine = self.activate_derv(G_y, y, hconfig)
    
    G_affine = G_y 
    G_affine = G_affine*affine_ # for x      # 최후에 연산된 것은 기존 x 값과 크로마틴 40차원 벡터의 곱 연산입니다.
                                             # 여기서 특징은 element-wise 연산이므로, 역전파 공식에 의해 단순한 곱셈으로 처리됩니다.
                                             # 행렬 곱이 사용안된 것이 특징입니다.
    G_affine_extra = G_affine*x              # 40차원으로 표현된 값에 해당하는 에러도 마찬가지로 element-wise로 계산되었기 때문에
                                             # x를 곱해주면서 처리됩니다.
    #print(G_affine.shape,G_affine_extra.shape)
    #g_affine_weight = x.transpose()
    #print(g_affine_weight.shape)
    g_affine_weight = temp_temp.transpose()  #여기서 사용된건 fully connected layer 연산이므로 크로마틴 값을 전치하여 계산준비합니다.
    #print(g_affine_weight.shape,G_affine_extra.shape)
    G_weight = np.matmul(g_affine_weight, G_affine_extra) #크로마틴을 40차원으로 변형해주는 가중치에 해당하는 에러를 계산합니다.
    #G_bias = np.sum(G_affine, axis = 0)
    #G_input = np.matmul(G_affine, g_affine_input)
    
    self.update_param(pm, 'w', G_weight)  #가중치 업데이트 해줍니다.
    
    
    return G_affine.reshape(x_org_shape)   #출력 원형으로 복구해주고 아래로 에러를 전달해줍니다.

CnnBasicModel.backprop_ele_conca_layer = cnn_basic_backprop_ele_conca_layer

CnnBasicModel.forward_ele_conca_layer = cnn_basic_forward_ele_conca_layer

CnnBasicModel.alloc_ele_conca_layer = cnn_basic_alloc_ele_conca_layer


In [7]:
def get_layer_type(hconfig):
    if not isinstance(hconfig, list): return 'full'
    return hconfig[0]

def get_conf_param(hconfig, key, defval = None):
    if not isinstance(hconfig, list): return defval #리스트가 없으면 return
    if len(hconfig) <= 1: return defval
    if not key in hconfig[1]: return defval
    return hconfig[1][key]
    
def get_conf_param_2d(hconfig, key, defval = None):
    if len(hconfig) <= 1: return defval
    if not key in hconfig[1]: return defval
    val = hconfig[1][key]
    if isinstance(val, list): return val
    return [val, val]

In [8]:
"""
def cnn_basic_forward_conca_layer(self, x, hconfig, pm):
    if pm is None: return x, None
    
    x_org_shape = x.shape
    #print(x.shape)
    if len(x.shape) != 2:
        mb_size = x.shape[0]
        x = x.reshape([mb_size, -1])
    if self.jongwoo == 1:
        if self.is_training==True:
            from_idx_ = self.n * self.batch_size
            to_idx_ = (self.n + 1) * self.batch_size
            temp_temp=self.dataset.ext_training_xs[self.dataset.indices[from_idx_:to_idx_]]
            next_temp=temp_temp.reshape([-1, go_])
            #temp_temp=temp_temp.reshape([-1, go_])
            #temp_temp=next_temp[:,-go_:]
            #print(temp_temp.shape,"temp_temp.shape")
            #
            #print(pm['s'])
            temp_temp=next_temp*pm['s']
            #print(x.shape, temp_temp.shape)
            x=np.concatenate((x,temp_temp),axis=1)
        else:
            temp_temp=self.dataset.ext_test_xs
            next_temp=temp_temp.reshape([-1, go_])
            #temp_temp=temp_temp.reshape([-1, go_])
            #next_temp=temp_temp
            temp_temp=next_temp*pm['s']
            x=np.concatenate((x,temp_temp),axis=1)
      
        #print("x.shape",x.shape)
    #-----------
    
    #------------------
    affine = np.matmul(x, pm['w']) + pm['b']
    y = self.activate(affine, hconfig)
    
    return y, [x, y, x_org_shape, next_temp]

CnnBasicModel.forward_conca_layer = cnn_basic_forward_conca_layer
"""
def cnn_basic_forward_conca_layer(self, x, hconfig, pm):
    if pm is None: return x, None
    
    x_org_shape = x.shape
    #print(x.shape)
    if len(x.shape) != 2:
        mb_size = x.shape[0]
        x = x.reshape([mb_size, -1])
    if self.jongwoo == 1:
        if self.is_training==True:
            from_idx_ = self.n * self.batch_size
            to_idx_ = (self.n + 1) * self.batch_size
            temp_temp=self.dataset.ext_training_xs[self.dataset.indices[from_idx_:to_idx_]]
            #next_temp=temp_temp.reshape([-1, go_])
            temp_temp=temp_temp.reshape([-1, go_])
            #temp_temp=next_temp[:,-go_:]
            #print(temp_temp.shape,"temp_temp.shape")
            #
            #print(pm['s'])
            #temp_temp=next_temp*pm['s']
            #print(x.shape, temp_temp.shape)
            x=np.concatenate((x,temp_temp),axis=1)
        else:
            temp_temp=self.dataset.ext_test_xs
            #next_temp=temp_temp.reshape([-1, go_])
            temp_temp=temp_temp.reshape([-1, go_])
            #next_temp=temp_temp
            #temp_temp=next_temp*pm['s']
            x=np.concatenate((x,temp_temp),axis=1)
      
        #print("x.shape",x.shape)
    #-----------
    
    #------------------
    affine = np.matmul(x, pm['w']) + pm['b']
    y = self.activate(affine, hconfig)
    
    return y, [x, y, x_org_shape]

CnnBasicModel.forward_conca_layer = cnn_basic_forward_conca_layer


In [9]:
"""
def cnn_basic_backprop_conca_layer(self, G_y, hconfig, pm, aux):
    if pm is None: return G_y

    x, y, x_org_shape= aux
    
    G_affine = self.activate_derv(G_y, y, hconfig)
    
    
    g_affine_weight = x.transpose()
    g_affine_input = pm['w'].transpose()
    
    G_weight = np.matmul(g_affine_weight, G_affine) 
    G_bias = np.sum(G_affine, axis = 0)
    G_input = np.matmul(G_affine, g_affine_input)
    if self.epoch<140:
        self.inter_stop_1=1
        self.update_param(pm, 'w', G_weight)
        self.update_param(pm, 'b', G_bias)
        self.inter_stop_1=0

        
    else:
        self.inter_stop=1
        self.update_param(pm, 'w', G_weight)
        self.update_param(pm, 'b', G_bias)
        self.inter_stop=0
        self.stop=1
    #print(G_affine.shape)
    #print(g_affine_weight.shape)
    #print(g_affine_input.shape)
    #print(G_weight.shape)
    #print(G_bias.shape)
    #print(G_input.shape)
    if self.jongwoo == 1:
        
        return G_input[:,:-go_].reshape(x_org_shape)
    else:
        return G_input.reshape(x_org_shape)

CnnBasicModel.backprop_conca_layer = cnn_basic_backprop_conca_layer

def cnn_basic_backprop_conca_layer(self, G_y, hconfig, pm, aux):
    if pm is None: return G_y

    x, y, x_org_shape,next_temp = aux
    
    G_affine = self.activate_derv(G_y, y, hconfig)
    
    
    g_affine_weight = x.transpose()
    
    #g_affine_weight = g_affine_weight[-go_:,:]*pm['s']
    g_affine_input = pm['w'].transpose()
    
    G_weight = np.matmul(g_affine_weight, G_affine) 
    G_bias = np.sum(G_affine, axis = 0)
    G_input = np.matmul(G_affine, g_affine_input)
    
    G_temp=G_input[:,-go_:]
    G_temp_1=next_temp*G_temp
    G_stair = np.sum(G_temp_1,axis=0)
    
    #G_stair = np.sum(G_weight
    #G_stair = np.sum(G_input[:,-go_:]*x[:,-go_:], axis = 0) 
    #print(G_stair)
    self.update_param(pm, 'w', G_weight)
    self.update_param(pm, 'b', G_bias)
    self.learning_rate*=1
    self.update_param(pm, 's', G_stair)
    self.learning_rate/=1
    #print(G_affine.shape)
    #print(g_affine_weight.shape)
    #print(g_affine_input.shape)
    #print(G_weight.shape)
    #print(G_bias.shape)
    #print(G_input.shape)
    if self.jongwoo == 1:
        
        return G_input[:,:-go_].reshape(x_org_shape)
    else:
        return G_input.reshape(x_org_shape)

CnnBasicModel.backprop_conca_layer = cnn_basic_backprop_conca_layer
"""
def cnn_basic_backprop_conca_layer(self, G_y, hconfig, pm, aux):
    if pm is None: return G_y

    x, y, x_org_shape = aux
    
    G_affine = self.activate_derv(G_y, y, hconfig)
    
    G_affine = G_affine 
    g_affine_weight = x.transpose()
    g_affine_input = pm['w'].transpose()
    
    G_weight = np.matmul(g_affine_weight, G_affine) 
    G_bias = np.sum(G_affine, axis = 0)
    G_input = np.matmul(G_affine, g_affine_input)
    #G_stair = np.zeros(go_)
    #for i in range(go_):
    #    G_stair[i] = np.sum(x[:,-i]*G_weight[-i,:],axis=0)
    
    self.update_param(pm, 'w', G_weight)
    self.update_param(pm, 'b', G_bias)
    #self.update_param(pm, 's', G_stair)
    #print(G_affine.shape)
    #print(g_affine_weight.shape)
    #print(g_affine_input.shape)
    #print(G_weight.shape)
    #print(G_bias.shape)
    #print(G_input.shape)
    if self.jongwoo == 1:
        
        return G_input[:,:-go_].reshape(x_org_shape)
    else:
        return G_input.reshape(x_org_shape)

CnnBasicModel.backprop_conca_layer = cnn_basic_backprop_conca_layer


In [10]:
def cnn_basic_forward_full_layer(self, x, hconfig, pm): # x 는 아래층에서 오는 입력 값, hconfig 는 해당 층의 메타 정보, pm은 가중치
    if pm is None: return x, None
    
    x_org_shape = x.shape
    
    if len(x.shape) != 2: #convoultional layer 경우에는 출력 값이 아닙니다. 그러므로 fully connected layer를 위해 펼쳐줘야겠죠.
        mb_size = x.shape[0]
        x = x.reshape([mb_size, -1]) #펼쳐줍니다.
        
    affine = np.matmul(x, pm['w']) + pm['b'] #가장 기본적인 인공신경망 연산입니다. matmul은 행렬곱을 의미합니다.
                                             #입력 값과 가중치를 행렬 곱을 하고 바이어스를 더해줍니다.
    y = self.activate(affine, hconfig)       #활성함수에 넣어서 출력 결과를 얻습니다. 다른 곳에 모든 활성함수 일일히 정의하였습니다.
    
    return y, [x, y, x_org_shape]   #y를 다음 층으로 건내주고, 역전파에 필요한 x, y, 등 정보도 기록할겁니다.

CnnBasicModel.forward_full_layer = cnn_basic_forward_full_layer

In [11]:
def cnn_basic_backprop_full_layer(self, G_y, hconfig, pm, aux): # G_y는 윗 층에서 온 에러 정보, aux 는 역전파에 필요한 x, y 정보들입니다
    if pm is None: return G_y

    x, y, x_org_shape = aux  #aux 에서 각 정보를 가지고 옵니다. 역전파 할 때는 순전파 할 떄 사용된 x와 y가 필요합니다.
    
    G_affine = self.activate_derv(G_y, y, hconfig)  # 역전파는 전부 반대 순서대로 합니다. 가장 나중에 사용된 연산인
                                                    # 활성함수 연산이 만들어낸 오차를 G_y에 첨가해줍니다. 
    
    g_affine_weight = x.transpose()    #가중치의 미분 값은 위에서 내려온 에러 곱하기 x의 전치행렬이므로 x. 전치행렬을 구합니다.
    g_affine_input = pm['w'].transpose() #아래 층으로 전달될 에러 값은 위에서 내려온 에러 값 곱하기 가중치 전치행렬입니다.
    
    G_weight = np.matmul(g_affine_weight, G_affine)  #가중치에 해당하는 에러 크기를 계산하였습니다.
    G_bias = np.sum(G_affine, axis = 0)   #바이어스는 모든 데이터에서 온 에러 값을 더해주기 위해 batch 차원인 0번 차원 기준으로 더합니다.
    G_input = np.matmul(G_affine, g_affine_input)  #아래로 전달해줄 에러 계산을 해줍니다.
    
    self.update_param(pm, 'w', G_weight)  #가중치 업데이트 해줍니다.
    self.update_param(pm, 'b', G_bias)    #바이어스 업데이트 해줍니다.

    return G_input.reshape(x_org_shape)  #포워딩 때 해준 입력 층을 펼쳐주는걸 복구해주기 위해 다시 변형한 다음 전달해줍니다.

CnnBasicModel.backprop_full_layer = cnn_basic_backprop_full_layer

In [12]:
def cnn_basic_activate(self, affine, hconfig):
    if hconfig is None: return affine
    
    func = get_conf_param(hconfig, 'actfunc', 'relu')
    #print(func)
    if func == 'none':      return affine
    elif func == 'relu':    return relu(affine)
    elif func == 'sigmoid': return sigmoid(affine)
    elif func == 'tanh':    return tanh(affine)
    else:                   assert 0
        
def cnn_basic_activate_derv(self, G_y, y, hconfig):
    if hconfig is None: return G_y
    
    func = get_conf_param(hconfig, 'actfunc', 'relu')
    
    if func == 'none':      return G_y
    elif func == 'relu':    return relu_derv(y) * G_y
    elif func == 'sigmoid': return sigmoid_derv(y) * G_y
    elif func == 'tanh':    return tanh_derv(y) * G_y
    else:                   assert 0

CnnBasicModel.activate = cnn_basic_activate
CnnBasicModel.activate_derv = cnn_basic_activate_derv

In [13]:
def forward_conv_layer_adhoc(self, x, hconfig, pm):
    mb_size, xh, xw, xchn = x.shape
    kh, kw, _, ychn = pm['k'].shape
    
    conv = np.zeros((mb_size, xh, xw, ychn))
    
    for n in range(mb_size):
        for r in range(xh):
            for c in range(xw):
                for ym in range(ychn):
                    for i in range(kh):
                        for j in range(kw):
                            rx = r + i - (kh-1) // 2
                            cx = c + j - (kw-1) // 2
                            if rx < 0 or rx >= xh: continue
                            if cx < 0 or cx >= xw: continue
                            for xm in range(xchn):
                                kval = pm['k'][i][j][xm][ym]
                                ival = x[n][rx][cx][xm]
                                conv[n][r][c][ym] += kval * ival

    y = self.activate(conv + pm['b'], hconfig)
    
    return y, [x, y]

In [14]:
def forward_conv_layer_better(self, x, hconfig, pm):
    mb_size, xh, xw, xchn = x.shape
    kh, kw, _, ychn = pm['k'].shape
    
    conv = np.zeros((mb_size, xh, xw, ychn))

    bh, bw = (kh-1)//2, (kw-1)//2
    eh, ew = xh + kh - 1, xw + kw - 1
    
    x_ext = np.zeros((mb_size, eh, ew, xchn))
    x_ext[:, bh:bh + xh, bw:bw + xw, :] = x
    
    k_flat = pm['k'].transpose([3, 0, 1, 2]).reshape([ychn, -1])
    
    for n in range(mb_size):
        for r in range(xh):
            for c in range(xw):
                for ym in range(ychn):
                    xe_flat = x_ext[n, r:r + kh, c:c + kw, :].flatten()
                    conv[n, r, c, ym] = (xe_flat*k_flat[ym]).sum()
                    
    y = self.activate(conv + pm['b'], hconfig)
    
    return y, [x, y]

In [15]:
def cnn_basic_forward_conv_layer(self, x, hconfig, pm):
    mb_size, xh, xw, xchn = x.shape
    kh, kw, _, ychn = pm['k'].shape
    
    x_flat = get_ext_regions_for_conv(x, kh, kw)
    k_flat = pm['k'].reshape([kh*kw*xchn, ychn])
    conv_flat = np.matmul(x_flat, k_flat)
    conv = conv_flat.reshape([mb_size, xh, xw, ychn])

    y = self.activate(conv + pm['b'], hconfig)

    if self.need_maps: self.maps.append(y)
    
    return y, [x_flat, k_flat, x, y]

CnnBasicModel.forward_conv_layer = cnn_basic_forward_conv_layer

In [16]:
def cnn_basic_backprop_conv_layer(self, G_y, hconfig, pm, aux):
    x_flat, k_flat, x, y = aux
    
    kh, kw, xchn, ychn = pm['k'].shape
    mb_size, xh, xw, _ = G_y.shape
    
    G_conv = self.activate_derv(G_y, y, hconfig)

    G_conv_flat = G_conv.reshape(mb_size*xh*xw, ychn)

    g_conv_k_flat = x_flat.transpose()
    g_conv_x_flat = k_flat.transpose()
    
    G_k_flat = np.matmul(g_conv_k_flat, G_conv_flat)
    G_x_flat = np.matmul(G_conv_flat, g_conv_x_flat)
    G_bias = np.sum(G_conv_flat, axis = 0)
    
    G_kernel = G_k_flat.reshape([kh, kw, xchn, ychn])
    G_input = undo_ext_regions_for_conv(G_x_flat, x, kh, kw)
    
    self.update_param(pm, 'k', G_kernel)
    self.update_param(pm, 'b', G_bias)
    
    return G_input

CnnBasicModel.backprop_conv_layer = cnn_basic_backprop_conv_layer

In [17]:
def get_ext_regions_for_conv(x, kh, kw):
    mb_size, xh, xw, xchn = x.shape

    regs = get_ext_regions(x, kh, kw, 0)
    regs = regs.transpose([2, 0, 1, 3, 4, 5])
    
    return regs.reshape([mb_size*xh*xw, kh*kw*xchn])

def get_ext_regions(x, kh, kw, fill):
    mb_size, xh, xw, xchn = x.shape
    
    eh, ew = xh + kh - 1, xw + kw - 1
    bh, bw = (kh-1)//2, (kw-1)//2

    x_ext = np.zeros((mb_size, eh, ew, xchn), dtype = 'float32') + fill
    x_ext[:, bh:bh + xh, bw:bw + xw, :] = x
    
    regs = np.zeros((xh, xw, mb_size*kh*kw*xchn), dtype = 'float32')

    for r in range(xh):
        for c in range(xw):
            regs[r, c, :] = x_ext[:, r:r + kh, c:c + kw, :].flatten()

    return regs.reshape([xh, xw, mb_size, kh, kw, xchn])

In [18]:
def undo_ext_regions_for_conv(regs, x, kh, kw):
    mb_size, xh, xw, xchn = x.shape

    regs = regs.reshape([mb_size, xh, xw, kh, kw, xchn])
    regs = regs.transpose([1, 2, 0, 3, 4, 5])
    
    return undo_ext_regions(regs, kh, kw)

def undo_ext_regions(regs, kh, kw):
    xh, xw, mb_size, kh, kw, xchn = regs.shape
    
    eh, ew = xh + kh - 1, xw + kw - 1
    bh, bw = (kh-1)//2, (kw-1)//2

    gx_ext = np.zeros([mb_size, eh, ew, xchn], dtype = 'float32')

    for r in range(xh):
        for c in range(xw):
            gx_ext[:, r:r + kh, c:c + kw, :] += regs[r, c]

    return gx_ext[:, bh:bh + xh, bw:bw + xw, :]

In [19]:
def cnn_basic_forward_avg_layer(self, x, hconfig, pm):
    mb_size, xh, xw, chn = x.shape
    sh, sw = get_conf_param_2d(hconfig, 'stride')
    yh, yw = xh // sh, xw // sw

    x1 = x.reshape([mb_size, yh, sh, yw, sw, chn])
    x2 = x1.transpose(0, 1, 3, 5, 2, 4)
    x3 = x2.reshape([-1, sh*sw])
    
    y_flat = np.average(x3, 1)
    y = y_flat.reshape([mb_size, yh, yw, chn])
    
    if self.need_maps: self.maps.append(y)

    return y, None

def cnn_basic_backprop_avg_layer(self, G_y, hconfig, pm, aux):
    mb_size, yh, yw, chn = G_y.shape
    sh, sw = get_conf_param_2d(hconfig, 'stride')
    xh, xw = yh * sh, yw * sw
    
    gy_flat = G_y.flatten() / (sh * sw)

    gx1 = np.zeros([mb_size*yh*yw*chn, sh*sw], dtype = 'float32')
    for i in range(sh*sw):
        gx1[:, i] = gy_flat
    gx2 = gx1.reshape([mb_size, yh, yw, chn, sh, sw])
    gx3 = gx2.transpose([0, 1, 4, 2, 5, 3])

    G_input = gx3.reshape([mb_size, xh, xw, chn])
        
    return G_input

CnnBasicModel.forward_avg_layer = cnn_basic_forward_avg_layer
CnnBasicModel.backprop_avg_layer = cnn_basic_backprop_avg_layer

In [20]:
def cnn_basic_forward_max_layer(self, x, hconfig, pm):
    mb_size, xh, xw, chn = x.shape
    sh, sw = get_conf_param_2d(hconfig, 'stride')
    yh, yw = xh // sh, xw // sw

    x1 = x.reshape([mb_size, yh, sh, yw, sw, chn])
    x2 = x1.transpose(0, 1, 3, 5, 2, 4)
    x3 = x2.reshape([-1, sh*sw])

    idxs = np.argmax(x3, axis = 1)
    y_flat = x3[np.arange(mb_size*yh*yw*chn), idxs]
    y = y_flat.reshape([mb_size, yh, yw, chn])
    
    if self.need_maps: self.maps.append(y)

    return y, idxs

def cnn_basic_backprop_max_layer(self, G_y, hconfig, pm, aux):
    idxs = aux
    
    mb_size, yh, yw, chn = G_y.shape
    sh, sw = get_conf_param_2d(hconfig, 'stride')
    xh, xw = yh * sh, yw * sw
    
    gy_flat = G_y.flatten()

    gx1 = np.zeros([mb_size*yh*yw*chn, sh*sw], dtype = 'float32')
    gx1[np.arange(mb_size*yh*yw*chn), idxs] = gy_flat[:]
    gx2 = gx1.reshape([mb_size, yh, yw, chn, sh, sw])
    gx3 = gx2.transpose([0, 1, 4, 2, 5, 3])

    G_input = gx3.reshape([mb_size, xh, xw, chn])
        
    return G_input

CnnBasicModel.forward_max_layer = cnn_basic_forward_max_layer
CnnBasicModel.backprop_max_layer = cnn_basic_backprop_max_layer

In [21]:
def cnn_basic_visualize(self, num):
    print('Model {} Visualization'.format(self.name))
    
    self.need_maps = self.show_maps
    self.maps = []

    deX, deY = self.dataset.get_visualize_data(num)
    est = self.get_estimate(deX)

    if self.show_maps:
        for kernel in self.kernels:
            kh, kw, xchn, ychn = kernel.shape
            grids = kernel.reshape([kh, kw, -1]).transpose(2, 0, 1)
            draw_images_horz(grids[0:5, :, :])

        for pmap in self.maps:
            draw_images_horz(pmap[:, :, :, 0])
        
    self.dataset.visualize(deX, est, deY)

    self.need_maps = False
    self.maps = None

CnnBasicModel.visualize = cnn_basic_visualize

In [None]:
def cnn_basic_alloc_logic_full_layer(self, input_shape, hconfig):
    input_cnt = 1
    output_cnt = 1
    
    weight = np.array([[1]])

    return {'w':weight}, [output_cnt]

CnnBasicModel.alloc_logic_full_layer = cnn_basic_alloc_logic_full_layer

def cnn_basic_forward_logic_full_layer(self, x, hconfig, pm):
    if pm is None: return x, None
    
    x_org_shape = x.shape
    
    if len(x.shape) != 2:
        mb_size = x.shape[0]
        x = x.reshape([mb_size, -1])
    #print(x.shape, pm['w'].shape,pm['b'].shape)
    affine = np.matmul(x, pm['w'])
    y = affine
    
    return y, [x, y, x_org_shape]

CnnBasicModel.forward_logic_full_layer = cnn_basic_forward_logic_full_layer

def cnn_basic_backprop_logic_full_layer(self, G_y, hconfig, pm, aux):
    if pm is None: return G_y

    x, y, x_org_shape = aux

    

    return G_y.reshape(x_org_shape)

CnnBasicModel.backprop_logic_full_layer = cnn_basic_backprop_logic_full_layer
