In [1]:
# [라] 라이브러리
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
# [모] 모델 정의
class myRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(myRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size)
        out = self.rnn(x, h0)[0]
        out = self.fc(out[:,-1,:])
        return out

In [3]:
# [데] 데이터 로딩
data = pd.read_csv("../data/IMDB_reviews_binary_suicide_squad.csv", encoding="cp949")

reviews = data['review'].tolist()
ratings = data['rating'].tolist()

print(len(reviews), len(ratings))

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(reviews).toarray()

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(ratings)

X_train, X_test, y_train, y_test = train_test_split(X, y)


class ReviewsDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.int64)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = ReviewsDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)

2471 2471


In [4]:
# [초] 초기화
input_size = X.shape[1]
hidden_size = 16
output_size = 2
learning_rate = 0.01
num_epochs = 100

model = myRNN(input_size, hidden_size, output_size)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [5]:
# [훈] 훈련
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs.unsqueeze(1))

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    if (epoch+1)%2 == 0:
        print(f'Epoch[{epoch+1}], Loss:{loss.item()}')
    if (loss.item() < 0.00001):
        break

Epoch[2], Loss:0.15765772759914398
Epoch[4], Loss:0.002539031207561493
Epoch[6], Loss:0.0005474975914694369
Epoch[8], Loss:0.00194193993229419


In [6]:
# [실] 실행
# (test 에 대한 실행)
test_dataset = ReviewsDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        outputs = model(inputs.unsqueeze(1))
        predicted = torch.max(outputs.data, 1)[1]
        total += 1
        correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test accuracy: {accuracy*100:.2f}')

Test accuracy: 70.06


In [7]:
# [실] 실행
# (random review 에 대한 실행)

def predict_review(review):
    review_vector = vectorizer.transform([review]).toarray()
    inputs = torch.tensor(review_vector, dtype=torch.float32)
    with torch.no_grad(): # 미분 안 하겠다는 뜻
        output = model(inputs.unsqueeze(0))
        predicted = torch.max(output.data, 1)[1]
        predicted_rating = label_encoder.classes_[predicted.item()]
        return predicted_rating

my_reviews = []
my_reviews.append("This product exceeded my expectations!")
my_reviews.append("This is a masterpiece of my life")
my_reviews.append("A bull-shit!")
my_reviews.append("I curse all the guys who discourages this movie")
my_reviews.append("I enjoyed a deep sleep during watching this movie.")
my_reviews.append("A sleeping drug.")
my_reviews.append("A perfect remedy for an insomnia.")
my_reviews.append("I will recommend this movie to my worst friends.")

for review in my_reviews:
    predicted_rating = predict_review(review)
    print(f'{review}: {predicted_rating}')

This product exceeded my expectations!: 0
This is a masterpiece of my life: 1
A bull-shit!: 0
I curse all the guys who discourages this movie: 1
I enjoyed a deep sleep during watching this movie.: 1
A sleeping drug.: 0
A perfect remedy for an insomnia.: 0
I will recommend this movie to my worst friends.: 0
