In [12]:
from google.colab import drive
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import os
import matplotlib.pyplot as plt
import seaborn as sns

import os
import mxnet as mx
import multiprocessing as mp
import gluonnlp as nlp

from mxnet import gluon, nd, init
from mxnet.gluon import nn, rnn
from mxnet import autograd, gluon, nd
from d2l import try_gpu
import pandas as pd

# iUse sklearn's metric function to evaluate the results of the experiment
from sklearn.metrics import accuracy_score, f1_score

main_seed = 21

np.random.seed(main_seed)
random.seed(main_seed)
tf.random.set_seed(main_seed)

ModuleNotFoundError: ignored

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
base_path = '/content/drive/MyDrive/project4/IMDB'

In [6]:
import pickle
import gzip

def load_data(file_name):
  global base_path
  with gzip.open(os.path.join(base_path, file_name), 'rb') as f:
    return pickle.load(f)

In [7]:
# 파일 
print('X_train...')
X_train = load_data('X_train_word.pickle')
print('X_val...')
X_val = load_data('X_val_word.pickle')
print('X_test...')
X_test = load_data('X_test_word.pickle')

print('y_train...')
y_train = load_data('y_train.pickle')
print('y_val...')
y_val = load_data('y_val.pickle')
print('y_test...')
y_test = load_data('y_test.pickle')

print('embeddings...')
embeddings = load_data('embeddings.pickle')

print('max_seq_len...')
max_seq_len = load_data('max_len.pickle')

X_train...
X_val...
X_test...
y_train...
y_val...
y_test...
embeddings...
max_seq_len...


#### 기준모델

In [8]:
import keras
import keras.backend as K
from keras.layers import Input, Embedding, LSTM, Lambda, concatenate, Dropout, Flatten, Dense, Bidirectional, GRU, Conv1D, GlobalMaxPooling1D, BatchNormalization, MaxPooling1D
from keras.models import Model
from keras import initializers
from sklearn.metrics import accuracy_score, f1_score

np.random.seed(main_seed)
tf.random.set_seed(main_seed)

In [9]:
from scipy import stats

# 최빈 클래스 확인
mode_class = stats.mode(y_train)
print(mode_class)

ModeResult(mode=array([False]), count=array([171327]))


In [10]:
y_pred = [mode_class[0]] * len(y_val)
print('val accuracy:', accuracy_score(y_val, y_pred))
print('val f1_score:', f1_score(y_val, y_pred))

val accuracy: 0.7735757050361661
val f1_score: 0.0


In [11]:
# custom attention layer
# in this class, we want to implement the operation:
# softmax(W_2 * tanh(W_1 * H))
# where H is the word embedding of the whole sentence, of shape (num_of_word, embed_size)
class SelfAttention(nn.HybridBlock):
    def __init__(self, att_unit, att_hops, **kwargs):
        super(SelfAttention, self).__init__(**kwargs)
        with self.name_scope():
            # this layer is tanh(w_1 * H), the att_unit corresponds to d_a in the essay
            self.ut_dense = nn.Dense(att_unit, activation='tanh', flatten=False)
            # this layer implements the multiple hops
            self.et_dense = nn.Dense(att_hops, activation=None, flatten=False)

    def hybrid_forward(self, F, x): # F is the backend which implements the tensor operation
        # x shape: [batch_size, seq_len, embedding_width]
        # ut shape: [batch_size, seq_len, att_unit]
        ut = self.ut_dense(x) # batch_size * seq_len [* embed_size * embed_size *] att_unit
        # et shape: [batch_size, seq_len, att_hops]
        et = self.et_dense(ut)# batch_size * seq_len [* att_unit * att_unit *] att_hops

        # att shape: [batch_size,  att_hops, seq_len]
        # softmax is performed along the seq_len dimension
        att = F.softmax(F.transpose(et, axes=(0, 2, 1)), axis=-1)
        # output shape [batch_size, att_hops, embedding_width]
        output = F.batch_dot(att, x)
        # output is the weighted matrix representation of the matrix
        # att is the weighted vector we use as attention
        return output, att
    
# d_a = 20, hops = 5
print(SelfAttention(20, 5))

NameError: ignored