# Env

In [None]:
# imports
import argparse
import os
import random
import shutil
import json
import zipfile
import math
import copy
import collections
import re

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sentencepiece as spm
import tensorflow as tf
import tensorflow.keras.backend as K

from tqdm.notebook import tqdm, trange

In [None]:
# 환경 설정
args = {
    # random seed value
    "seed": 1234,
}
args = argparse.Namespace(**args)

print(args)

In [None]:
# random seed 설정
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

In [None]:
# data dir
data_dir = '/content/drive/MyDrive/문서/강의계획서/삼성전기/삼성전기.20220228/data'
os.listdir(data_dir)

# Vocabulary*

In [None]:
# vocab loading
vocab = spm.SentencePieceProcessor()
vocab.load(os.path.join(data_dir, 'kowiki', 'kowiki_32000.model'))

# Data & Input

In [None]:
# 입력 문장
sentence = "나는 수학 학생 영어 선생님 과학 조교 친구 입니다"

In [None]:
# 학습용 입력 데이터 생성
train_inputs = []
train_inputs.append(vocab.encode_as_ids(sentence))

# train inputs을 numpy array로 변환
train_inputs = np.array(train_inputs)

train_inputs

In [None]:
# embedding
embedding = tf.keras.layers.Embedding(len(vocab), 4)

In [None]:
# 단어벡터
hidden = embedding(train_inputs)
hidden

# CNN

## Tutorial

In [None]:
# CNN
conv = tf.keras.layers.Conv1D(filters=1, kernel_size=3)
conv_hidden = conv(hidden)
conv_hidden

In [None]:
weight, bias = conv.get_weights()
weight, bias

In [None]:
weight.shape, bias.shape

In [None]:
t_weight = tf.squeeze(weight)
t_weight

In [None]:
t_hidden = tf.squeeze(hidden)
t_hidden

In [None]:
for i in range(t_hidden.shape[0] - 2):
    sub_hidden = t_hidden[i:i+3]
    # print(sub_hidden)
    sub_fea = sub_hidden * t_weight
    sub_val = tf.reduce_sum(sub_fea)
    print(sub_val)

In [None]:
conv_hidden

## padding valid

In [None]:
# CNN (padding valid)
conv = tf.keras.layers.Conv1D(filters=5, kernel_size=3, padding='valid')  # 기본 값
conv(hidden)

In [None]:
weight, bias = conv.get_weights()
weight, bias

In [None]:
weight.shape, bias.shape

## padding causal

In [None]:
# CNN (padding causal)
conv = tf.keras.layers.Conv1D(filters=5, kernel_size=3, padding='causal')
conv(hidden)

In [None]:
weight, bias = conv.get_weights()
weight, bias

In [None]:
weight.shape, bias.shape

## padding same

In [None]:
# CNN (padding same)
conv = tf.keras.layers.Conv1D(filters=5, kernel_size=3, padding='same')
conv(hidden)

In [None]:
weight, bias = conv.get_weights()
weight, bias

In [None]:
weight.shape, bias.shape

# 실습
- CNN을 3개이상 사용한 문장분류 모델을 생성해 보세요.
- 생성된 모델의 plot을 그려보세요.