# AI 전문가 교육과정 Day 1 실습

***
### NLP응용: 소셜미디어 상의 감성분석
Applied Natrual Language Processing: Sentiment Analysis in Social Media

강사: 차미영 교수 (카이스트 전산학부)    
조교: 정현규, 신민기 (카이스트 전산학부)

# 1. Basic Setting
## 1.1 Import Python Libraries

In [1]:
pip install transformers

Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m84.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m58.4 MB/s[0m eta [36m0:00:0

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd drive/My\ Drive/data

/content/drive/My Drive/data


In [4]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('omw-1.4')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [5]:
import pandas as pd
import re
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from tqdm.notebook import tqdm
from sklearn.metrics import classification_report

## 1.2. Data preprocessing and visualization

IMDb data: This dataset is a collection of movie review.

More detailed information: https://www.imdb.com/interfaces/

In [6]:
# Load Dataset
train_df = pd.read_csv('./reduced_train.csv')
test_df = pd.read_csv('./reduced_test.csv')

# Change labels from (1, 2) to (0, 1)
train_df['label'] = train_df['label'] - 1
test_df['label'] = test_df['label'] - 1

In [7]:
train_df.head(5)

Unnamed: 0,label,review
0,0,"Unfortunately, the frustration of being Dr. Go..."
1,1,Been going to Dr. Goldberg for over 10 years. ...
2,0,I don't know what Dr. Goldberg was like before...
3,0,I'm writing this review to give you a heads up...
4,1,All the food is great here. But the best thing...


In [8]:
train_df.groupby('label').count()

Unnamed: 0_level_0,review
label,Unnamed: 1_level_1
0,30233
1,25768


In [9]:
train_df['review'][0]

"Unfortunately, the frustration of being Dr. Goldberg's patient is a repeat of the experience I've had with so many other doctors in NYC -- good doctor, terrible staff.  It seems that his staff simply never answers the phone.  It usually takes 2 hours of repeated calling to get an answer.  Who has time for that or wants to deal with it?  I have run into this problem with many other doctors and I just don't get it.  You have office workers, you have patients with medical needs, why isn't anyone answering the phone?  It's incomprehensible and not work the aggravation.  It's with regret that I feel that I have to give Dr. Goldberg 2 stars."

## 1.3. Text preprocessing
1. Remove non-alphabetic sequences  
2. Remove stop words   
3. Lemmatize   
4. Remove very small words  

In [10]:
def clean_text(sentence):
    # remove non alphabetic sequences
    pattern = re.compile(r'[^a-z]+')
    sentence = sentence.lower()
    sentence = pattern.sub(' ', sentence).strip()

    # Tokenize
    word_list = word_tokenize(sentence)

    # stop words
    stopwords_list = set(stopwords.words('english'))

    # remove stop words
    word_list = [word for word in word_list if word not in stopwords_list]

    # remove very small words, length < 3
    # they don't contribute any useful information
    word_list = [word for word in word_list if len(word) > 2]

    # lemmatize
    lemma = WordNetLemmatizer()
    word_list = [lemma.lemmatize(word) for word in word_list]

    # list to sentence
    sentence = ' '.join(word_list)

    return sentence

In [11]:
tqdm.pandas()
train_df['review'] = train_df['review'].progress_apply(lambda x: clean_text(str(x)))
test_df['review'] = test_df['review'].progress_apply(lambda x: clean_text(str(x)))

  0%|          | 0/56001 [00:00<?, ?it/s]

  0%|          | 0/3801 [00:00<?, ?it/s]

# 2. Sentiment Analysis - 1: Vader

### 2.1. Download pre-designed vader model

In [12]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [13]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

### 2.2. Calculate the sentiment scores using vader

In [14]:
sia = SentimentIntensityAnalyzer()

print(train_df['review'][0])
print(sia.polarity_scores(train_df['review'][0]))

unfortunately frustration goldberg patient repeat experience many doctor nyc good doctor terrible staff seems staff simply never answer phone usually take hour repeated calling get answer time want deal run problem many doctor get office worker patient medical need anyone answering phone incomprehensible work aggravation regret feel give goldberg star
{'neg': 0.23, 'neu': 0.701, 'pos': 0.069, 'compound': -0.872}


In [15]:
def get_answer(score):
    if score['neg'] > score['pos']:
        return 0
    return 1

vader_polarity_scores = [
    get_answer(sia.polarity_scores(review))
    for review in tqdm(test_df['review'])
]

  0%|          | 0/3801 [00:00<?, ?it/s]

In [16]:
print(classification_report(test_df['label'], vader_polarity_scores))

              precision    recall  f1-score   support

           0       0.96      0.34      0.50      2082
           1       0.55      0.98      0.71      1719

    accuracy                           0.63      3801
   macro avg       0.76      0.66      0.60      3801
weighted avg       0.78      0.63      0.59      3801



# 3. Sentiment Analysis - 2: Word2Vec

In [17]:
from sklearn.linear_model import LogisticRegression
from gensim.models import Word2Vec

### 3.1. Token to numbers
Use tokenizer on NLTK library

In [18]:
train_tokens = [word_tokenize(review) for review in tqdm(train_df['review'])]
test_tokens = [word_tokenize(review) for review in tqdm(test_df['review'])]
len(train_tokens)

  0%|          | 0/56001 [00:00<?, ?it/s]

  0%|          | 0/3801 [00:00<?, ?it/s]

56001

### 3.2. Define Word2Vec Model

In [19]:
word_vector_size = 300
try:
    word2vec_model = Word2Vec(
        sentences=train_tokens,
        vector_size=word_vector_size, # Dimension of word vector
        window=5, # Context window size
        min_count=5, # Minimum frequency for each word (smoothing)
        workers=8, # Number of workers for training
        sg=0 # Type of architecture 0: CBOW, 1: Skip-gram
    )
except:
    word2vec_model = Word2Vec(
        sentences=train_tokens,
        size=word_vector_size, # Dimension of word vector
        window=5, # Context window size
        min_count=5, # Minimum frequency for each word (smoothing)
        workers=8, # Number of workers for training
        sg=0 # Type of architecture 0: CBOW, 1: Skip-gram
    )

In [20]:
word2vec_model.wv.most_similar("expensive")

[('pricey', 0.74525386095047),
 ('overpriced', 0.699744462966919),
 ('cheaper', 0.695587158203125),
 ('steep', 0.6852782368659973),
 ('pricy', 0.6594589948654175),
 ('premium', 0.6557731628417969),
 ('comparable', 0.6151935458183289),
 ('average', 0.6139956116676331),
 ('cheap', 0.6112958192825317),
 ('cheapest', 0.610413670539856)]

### 3.3. Get Word Embedding
Get average word embeddings of each reviews.

In [21]:
wv_dict = word2vec_model.wv

def get_review_embeddings(tokens):
    review_embeddings = []
    for tokens in tqdm(tokens):
        word_embeddings = [wv_dict[t] for t in tokens if t in wv_dict]
        word_embeddings.append(np.zeros(word_vector_size))
        word_embeddings = np.array(word_embeddings)
        sentence_embedding = np.mean(word_embeddings, axis=0)
        review_embeddings.append(sentence_embedding)
    review_embeddings = np.array(review_embeddings)
    return review_embeddings

train_review_embeddings = get_review_embeddings(train_tokens)
test_review_embeddings = get_review_embeddings(test_tokens)
assert(len(train_review_embeddings) == len(train_df['label']))
assert(len(test_review_embeddings) == len(test_df['label']))

  0%|          | 0/56001 [00:00<?, ?it/s]

  0%|          | 0/3801 [00:00<?, ?it/s]

In [22]:
train_review_embeddings.shape

(56001, 300)

### 3.4. Train Machine Learning Model
Use logistic regression using Scikit-learn library.

In [23]:
regression_model = LogisticRegression(max_iter=500)
regression_model.fit(train_review_embeddings, train_df['label'])


print(regression_model.score(train_review_embeddings, train_df['label']))
print(regression_model.score(test_review_embeddings, test_df['label']))

print(classification_report(test_df['label'], regression_model.predict(test_review_embeddings)))

0.8920019285369905
0.8950276243093923
              precision    recall  f1-score   support

           0       0.90      0.91      0.90      2082
           1       0.89      0.88      0.88      1719

    accuracy                           0.90      3801
   macro avg       0.89      0.89      0.89      3801
weighted avg       0.89      0.90      0.89      3801



# 4. Sentiment Analysis - 3: RNN (LSTM)

In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from transformers import BertTokenizer

### 4.1. Token to numbers
To use natural language dataset in gpu, we makea a numeric mapping for words.

In [25]:
class MyTokenizer():
    def __init__(self, total_tokens):
        total_tokens.insert(0, '[UNK]')
        total_tokens.insert(0, '[PAD]')

        self.token2id_dict = {}
        self.id2token_dict = {}
        for i, t in enumerate(tqdm(total_tokens)):
            self.token2id_dict[t] = i
            self.id2token_dict[i] = t

    def get_vocab_size(self):
        return len(self.token2id_dict)

    def tokenize(self, token):
        if token in self.token2id_dict:
            return self.token2id_dict[token]
        else:
            return self.token2id_dict['[UNK]']

    def get_token(self, idx):
        if idx in self.id2token_dict:
            return self.id2token_dict[idx]
        else:
            assert(0)

### For convenience, we will user BertTokenizer!!

In [26]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
vocab_size = tokenizer.vocab_size

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

### 4.2. Pytorch Dataset

In [28]:
class MovieReviewDataset(Dataset):
    def __init__(self, data):
        # We will get dataframe for input data
        self.reviews = data['review']
        self.label = data['label']

    def __len__(self):
        return len(self.reviews)

    def __getitem__(self, idx):
        review = self.reviews[idx]
        encoding = tokenizer(
            review,
            max_length=512,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        label = self.label[idx]

        return encoding, label

In [29]:
train_dataset = MovieReviewDataset(train_df)
test_dataset = MovieReviewDataset(test_df)
encoding, label = train_dataset[0]
print(encoding['input_ids'].shape)

torch.Size([1, 512])


### 4.3. Create Dataloader

In [30]:
train_dataloader = DataLoader(
    train_dataset, batch_size=16, shuffle=True, num_workers=9
)

test_dataloader = DataLoader(
    test_dataset, batch_size=16, shuffle=False, num_workers=9
)



In [31]:
for d in train_dataloader:
    encoding, label = d
    print(encoding['input_ids'].shape)
    print(label.shape)
    break

torch.Size([16, 1, 512])
torch.Size([16])


### 4.4. Model Definition

In [32]:
class MyLSTMModel(nn.Module):
    def __init__(
        self,
        vocab_size,
    ):
        super().__init__()
        embedding_size = 256
        hidden_size = 256

        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_size, padding_idx=0)
        self.lstm = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, batch_first=True, num_layers=2, dropout=0.1)
        self.fc = nn.Linear(hidden_size, 1)

    def mean_pooling(self, token_embeddings, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()

        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

    def forward(
        self,
        input_ids,
        attention_mask
    ):
        embedding = self.embedding(input_ids)
        output, _ = self.lstm(embedding)
        output = self.mean_pooling(output, attention_mask)
        logit = self.fc(output).squeeze(dim=1)

        return logit

In [33]:
criterion = nn.BCEWithLogitsLoss()

device = 'cuda' if torch.cuda.is_available() else 'cpu'
my_lstm_model = MyLSTMModel(vocab_size)
#my_lstm_model = nn.DataParallel(my_model)
my_lstm_model = my_lstm_model.to(device)

optimizer = optim.AdamW(my_lstm_model.parameters(), 0.001)

### 4.5. Training

In [34]:
def train(
    model=None,
    optimizer=None,
    train_dataloader=None,
):
    optimizer.zero_grad()

    train_loss_list = []
    for idx, d in enumerate(tqdm(train_dataloader)):
        encoding, label = d
        input_ids = encoding['input_ids'].to(device).squeeze(dim=1)
        attention_mask = encoding['attention_mask'].to(device).squeeze(dim=1)
        label = label.to(device).float()

        logit = model(input_ids, attention_mask)
        loss = criterion(logit, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss_list.append(loss.item())

        if idx % 100 == 0:
            print(
                f'Train Total Loss: ' +
                f'{round(sum(train_loss_list)/len(train_loss_list), 4)}')
            train_loss_list = []

    return model

In [35]:
my_lstm_model.train()
for epoch in range(1):
    print(f'Start Epoch {epoch} Training')
    my_lstm_model = train(
        my_lstm_model,
        optimizer,
        train_dataloader,
    )

Start Epoch 0 Training


  0%|          | 0/3501 [00:00<?, ?it/s]

Train Total Loss: 0.6938
Train Total Loss: 0.5521
Train Total Loss: 0.401
Train Total Loss: 0.3924
Train Total Loss: 0.3722
Train Total Loss: 0.3085
Train Total Loss: 0.3306
Train Total Loss: 0.3111
Train Total Loss: 0.2864
Train Total Loss: 0.2997
Train Total Loss: 0.2856
Train Total Loss: 0.3179
Train Total Loss: 0.3008
Train Total Loss: 0.2816
Train Total Loss: 0.2895
Train Total Loss: 0.2735
Train Total Loss: 0.2775
Train Total Loss: 0.2457
Train Total Loss: 0.2408
Train Total Loss: 0.2574
Train Total Loss: 0.2644
Train Total Loss: 0.2513
Train Total Loss: 0.2536
Train Total Loss: 0.2417
Train Total Loss: 0.2276
Train Total Loss: 0.224
Train Total Loss: 0.2536
Train Total Loss: 0.2151
Train Total Loss: 0.2339
Train Total Loss: 0.241
Train Total Loss: 0.2307
Train Total Loss: 0.2287
Train Total Loss: 0.1975
Train Total Loss: 0.2642
Train Total Loss: 0.2553
Train Total Loss: 0.2303


### 4.6. Save Model

In [36]:
#torch.save(my_model.module.state_dict(), './lstm_model.pt')
torch.save(my_lstm_model.state_dict(), './lstm_model.pt')

### 4.7. Load Model

In [37]:
my_lstm_model = MyLSTMModel(vocab_size)
my_lstm_model.load_state_dict(torch.load('./lstm_model.pt'))
my_lstm_model = nn.DataParallel(my_lstm_model)
my_lstm_model = my_lstm_model.cuda()

### 4.8. Evaluation

In [38]:
@torch.no_grad()
def evaluate(
    model=None,
    test_dataloader=None,
):
    preds = []
    answer = []
    for idx, d in enumerate(tqdm(test_dataloader)):
        encoding, label = d
        input_ids = encoding['input_ids'].cuda().squeeze(dim=1)
        attention_mask = encoding['attention_mask'].cuda().squeeze(dim=1)

        logit = model(input_ids, attention_mask)
        cur_preds = torch.where(logit > 0.5, 1, 0).cpu().tolist()
        cur_answer = label.tolist()

        preds += cur_preds
        answer += cur_answer

    return preds, answer

In [39]:
my_lstm_model.eval()
preds, answer = evaluate(my_lstm_model, test_dataloader)
print(classification_report(answer, preds))

  0%|          | 0/238 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.90      0.94      0.92      2082
           1       0.93      0.87      0.90      1719

    accuracy                           0.91      3801
   macro avg       0.91      0.91      0.91      3801
weighted avg       0.91      0.91      0.91      3801



# 5. Sentiment Analysis - 4: BERT

### 5.1. Load BERT tokenizer and BERT model

In [40]:
from transformers import BertTokenizer, BertModel, BertConfig

In [41]:
bert_model = BertModel.from_pretrained('bert-base-uncased')
bert_config = BertConfig.from_pretrained('bert-base-uncased')
#tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
vocab_size = tokenizer.vocab_size

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [42]:
bert_config

BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.30.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

### 5.2. Create Dataloader

In [43]:
train_dataloader = DataLoader(
    train_dataset, batch_size=4, shuffle=True, num_workers=9
)

test_dataloader = DataLoader(
    test_dataset, batch_size=4, shuffle=False, num_workers=9
)



In [44]:
for d in train_dataloader:
    encoding, label = d
    print(encoding['input_ids'].shape)
    print(label.shape)
    break

torch.Size([4, 1, 512])
torch.Size([4])


### 5.3. Model Definition

In [45]:
class MyBertModel(nn.Module):
    def __init__(
        self,
    ):
        super().__init__()
        self.hidden_size = bert_config.hidden_size

        self.bert = bert_model
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(self.hidden_size, 1)

    def forward(
        self,
        input_ids,
        attention_mask
    ):
        output = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        embedding = output['pooler_output'] # CLS token of the sentence
        embedding = self.dropout(embedding)
        logit = self.fc(embedding).squeeze(dim=1)

        return logit

In [46]:
criterion = nn.BCEWithLogitsLoss()

device = 'cuda' if torch.cuda.is_available() else 'cpu'
my_bert_model = MyBertModel()
#my_bert_model = nn.DataParallel(my_model)
my_bert_model = my_bert_model.to(device)

optimizer = optim.AdamW(my_bert_model.parameters(), 1e-5)

### 5.4. Training

In [47]:
def train(
    model=None,
    optimizer=None,
    train_dataloader=None,
    device=None,
):
    optimizer.zero_grad()

    train_loss_list = []
    for idx, d in enumerate(tqdm(train_dataloader)):
        encoding, label = d
        input_ids = encoding['input_ids'].cuda().squeeze(dim=1)
        attention_mask = encoding['attention_mask'].cuda().squeeze(dim=1)
        label = label.to(device).float()

        logit = model(input_ids, attention_mask)
        loss = criterion(logit, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss_list.append(loss.item())

        if idx % 100 == 0:
            print(
                f'Train Total Loss: ' +
                f'{round(sum(train_loss_list)/len(train_loss_list), 4)}')
            train_loss_list = []

    return model

In [48]:
my_bert_model.train()
for epoch in range(1):
    print(f'Start Epoch {epoch} Training')
    my_bert_model = train(
        my_bert_model,
        optimizer,
        train_dataloader,
        device
    )

Start Epoch 0 Training


  0%|          | 0/14001 [00:00<?, ?it/s]

Train Total Loss: 0.7242
Train Total Loss: 0.6102
Train Total Loss: 0.3572
Train Total Loss: 0.3719
Train Total Loss: 0.3361
Train Total Loss: 0.3266
Train Total Loss: 0.2866
Train Total Loss: 0.2568
Train Total Loss: 0.2691
Train Total Loss: 0.2821
Train Total Loss: 0.3189
Train Total Loss: 0.2824
Train Total Loss: 0.2577
Train Total Loss: 0.2186
Train Total Loss: 0.2408
Train Total Loss: 0.3005
Train Total Loss: 0.2518
Train Total Loss: 0.1941
Train Total Loss: 0.2398
Train Total Loss: 0.2648
Train Total Loss: 0.2936
Train Total Loss: 0.2283
Train Total Loss: 0.2746
Train Total Loss: 0.2533
Train Total Loss: 0.2001
Train Total Loss: 0.2501
Train Total Loss: 0.2755
Train Total Loss: 0.2273
Train Total Loss: 0.1925
Train Total Loss: 0.2401
Train Total Loss: 0.2643
Train Total Loss: 0.2373
Train Total Loss: 0.1964
Train Total Loss: 0.2315
Train Total Loss: 0.2183
Train Total Loss: 0.2423
Train Total Loss: 0.2256
Train Total Loss: 0.2542
Train Total Loss: 0.2165
Train Total Loss: 0.2146


KeyboardInterrupt: ignored

### 5.5. Save Model

In [49]:
#torch.save(my_model.module.state_dict(), './bert_model.pt')
torch.save(my_bert_model.state_dict(), './bert_model.pt')

### 5.6. Load Model

In [50]:
my_bert_model = MyBertModel()
my_bert_model.load_state_dict(torch.load('./bert_model.pt'))
#my_bert_model = nn.DataParallel(my_bert_model)
my_bert_model = my_bert_model.cuda()

### 5.7. Evaluation

In [51]:
@torch.no_grad()
def evaluate(
    model=None,
    test_dataloader=None,
):
    preds = []
    answer = []
    for idx, d in enumerate(tqdm(test_dataloader)):
        encoding, label = d
        input_ids = encoding['input_ids'].cuda().squeeze(dim=1)
        attention_mask = encoding['attention_mask'].cuda().squeeze(dim=1)

        logit = model(input_ids, attention_mask)
        cur_preds = torch.where(logit > 0.5, 1, 0).cpu().tolist()
        cur_answer = label.tolist()

        preds += cur_preds
        answer += cur_answer

    return preds, answer

In [52]:
my_bert_model.eval()
preds, answer = evaluate(my_bert_model, test_dataloader)
print(classification_report(answer, preds))

  0%|          | 0/951 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.93      0.95      0.94      2082
           1       0.94      0.91      0.93      1719

    accuracy                           0.93      3801
   macro avg       0.93      0.93      0.93      3801
weighted avg       0.93      0.93      0.93      3801

