<a href="https://colab.research.google.com/github/raphaelfontenelle/PLN/blob/main/Classificador_de_Sentimentos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Construindo um Classificador de Sentimentos com PyTorch

In [1]:
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import sklearn
from torch.utils.data import DataLoader, Dataset
from sklearn.feature_extraction.text import CountVectorizer
from tqdm.notebook import tqdm, tqdm_notebook

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
nomes_colunas = ['Review', 'Sentimento']
dados_filmes = pd.read_csv('imdb_reviews.csv', sep = '\t', names = nomes_colunas)

In [5]:
dados_filmes.head()

Unnamed: 0,Review,Sentimento
0,"A very, very, very slow-moving, aimless movie ...",0
1,Not sure who was more lost - the flat characte...,0
2,Attempting artiness with black & white and cle...,0
3,Very little music or anything to speak of.,0
4,The best scene in the movie was when Gerardo i...,1


In [6]:
dados_filmes.shape

(748, 2)

In [7]:
dados_filmes['Sentimento'].value_counts()

1    386
0    362
Name: Sentimento, dtype: int64

In [8]:
vectorizer = CountVectorizer(stop_words = 'english', max_df = 0.99, min_df = 0.005)
vectorizer

CountVectorizer(max_df=0.99, min_df=0.005, stop_words='english')

In [9]:
sequences = vectorizer.fit_transform(dados_filmes.Review.tolist())
sequences

<748x320 sparse matrix of type '<class 'numpy.int64'>'
	with 2931 stored elements in Compressed Sparse Row format>

In [10]:
labels = dados_filmes.Sentimento.tolist()

In [11]:
token2idx = vectorizer.vocabulary_

In [12]:
idx2token = {idx: token for token, idx in token2idx.items()}

In [13]:
class Sequences():
    def __init__(self):
        self.vectorizer = CountVectorizer(stop_words = 'english', max_df = 0.99, min_df = 0.005)
        self.sequences = self.vectorizer.fit_transform(dados_filmes.Review.tolist())
        self.labels = dados_filmes.Sentimento.tolist()
        self.token2idx = self.vectorizer.vocabulary_
        self.idx2token = {idx: token for token, idx in self.token2idx.items()}
        
    def __getitem__(self, i):
        return self.sequences[i, :].toarray(), self.labels[i]
    
    def __len__(self):
        return self.sequences.shape[0]

In [14]:
dados_frases = Sequences()

In [15]:
train_loader = DataLoader(dados_frases, batch_size = 4096)
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7f1e51c6db90>

In [16]:
class BagOfWordsClassifier(nn.Module):
    
    # Método construtor para inicializar os atributos
    def __init__(self, vocab_size, hidden1, hidden2):
        super(BagOfWordsClassifier, self).__init__()
        self.fc1 = nn.Linear(vocab_size, hidden1)
        self.fc2 = nn.Linear(hidden1, hidden2)
        self.fc3 = nn.Linear(hidden2, 1)
    
    # Método para a passada para a frente (forward)
    def forward(self, inputs):
        x = F.relu(self.fc1(inputs.squeeze(1).float()))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [17]:
modelo = BagOfWordsClassifier(len(dados_frases.token2idx), 128, 64)

In [18]:
modelo

BagOfWordsClassifier(
  (fc1): Linear(in_features=320, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
)

In [19]:
criterion = nn.BCEWithLogitsLoss()

In [20]:
optimizer = optim.Adam([p for p in modelo.parameters() if p.requires_grad], lr = 0.001)

In [21]:
modelo.train()

train_losses = []

epochs = 30

for epoch in range(epochs): 

    progress_bar = tqdm_notebook(train_loader, leave = False)

    losses = []
    total = 0

    for inputs, target in progress_bar:

        modelo.zero_grad()

        output = modelo(inputs)

        loss = criterion(output.squeeze(), target.float())
        
        loss.backward()
        
        nn.utils.clip_grad_norm_(modelo.parameters(), 3)

        optimizer.step()
  
        progress_bar.set_description(f'\nErro do Modelo: {loss.item():.3f}')
  
        losses.append(loss.item())
        total += 1
 
    epoch_loss = sum(losses) / total
  
    train_losses.append(epoch_loss)
        
    tqdm.write(f'Epoch #{epoch + 1}\tErro em Treinamento: {epoch_loss:.3f}')

  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #1	Erro em Treinamento: 0.693


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #2	Erro em Treinamento: 0.692


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #3	Erro em Treinamento: 0.691


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #4	Erro em Treinamento: 0.689


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #5	Erro em Treinamento: 0.688


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #6	Erro em Treinamento: 0.687


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #7	Erro em Treinamento: 0.685


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #8	Erro em Treinamento: 0.683


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #9	Erro em Treinamento: 0.681


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #10	Erro em Treinamento: 0.679


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #11	Erro em Treinamento: 0.677


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #12	Erro em Treinamento: 0.674


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #13	Erro em Treinamento: 0.671


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #14	Erro em Treinamento: 0.667


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #15	Erro em Treinamento: 0.663


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #16	Erro em Treinamento: 0.659


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #17	Erro em Treinamento: 0.654


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #18	Erro em Treinamento: 0.649


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #19	Erro em Treinamento: 0.643


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #20	Erro em Treinamento: 0.637


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #21	Erro em Treinamento: 0.630


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #22	Erro em Treinamento: 0.623


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #23	Erro em Treinamento: 0.615


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #24	Erro em Treinamento: 0.607


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #25	Erro em Treinamento: 0.598


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #26	Erro em Treinamento: 0.588


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #27	Erro em Treinamento: 0.578


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #28	Erro em Treinamento: 0.568


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #29	Erro em Treinamento: 0.557


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch #30	Erro em Treinamento: 0.545


In [22]:
def predict_sentiment(text):

    modelo.eval()

    with torch.no_grad():

        test_vector = torch.LongTensor(dados_frases.vectorizer.transform([text]).toarray())

        output = modelo(test_vector)

        prediction = torch.sigmoid(output).item()

        if prediction >= 0.5:
            print(f'{prediction:0.3}: Positive')
        else:
            print(f'{prediction:0.3}: Negative')

In [23]:
test_text = """
Cool Cat Saves The Kids is a symbolic masterpiece directed by Derek Savage that
is not only satirical in the way it makes fun of the media and politics, but in
the way in questions as how we humans live life and how society tells us to
live life.

Before I get into those details, I wanna talk about the special effects in this
film. They are ASTONISHING, and it shocks me that Cool Cat Saves The Kids got
snubbed by the Oscars for Best Special Effects. This film makes 2001 look like
garbage, and the directing in this film makes Stanley Kubrick look like the
worst director ever. You know what other film did that? Birdemic: Shock and
Terror. Both of these films are masterpieces, but if I had to choose my
favorite out of the 2, I would have to go with Cool Cat Saves The Kids. It is
now my 10th favorite film of all time.

Now, lets get into the symbolism: So you might be asking yourself, Why is Cool
Cat Orange? Well, I can easily explain. Orange is a color. Orange is also a
fruit, and its a very good fruit. You know what else is good? Good behavior.
What behavior does Cool Cat have? He has good behavior. This cannot be a
coincidence, since cool cat has good behavior in the film.

Now, why is Butch The Bully fat? Well, fat means your wide. You wanna know who
was wide? Hitler. Nuff said this cannot be a coincidence.

Why does Erik Estrada suspect Butch The Bully to be a bully? Well look at it
this way. What color of a shirt was Butchy wearing when he walks into the area?
I don't know, its looks like dark purple/dark blue. Why rhymes with dark? Mark.
Mark is that guy from the Room. The Room is the best movie of all time. What is
the opposite of best? Worst. This is how Erik knew Butch was a bully.

and finally, how come Vivica A. Fox isn't having a successful career after
making Kill Bill.

I actually can't answer that question.

Well thanks for reading my review.
"""

predict_sentiment(test_text)

0.977: Positive


In [25]:
test_text = """
What the heck is this ? There is not one redeeming quality about this terrible
and very poorly done "movie". I can't even say that it's a "so bad it's good
movie".It is undeniably pointless to address all the things wrong here but
unfortunately even the "life lessons" about bullies and stuff like this are so
wrong and terrible that no kid should hear them.The costume is also horrible
and the acting...just unbelievable.No effort whatsoever was put into this thing
and it clearly shows,I have no idea what were they thinking or who was it even
meant for. I feel violated after watching this trash and I deeply recommend you
stay as far away as possible.This is certainly one of the worst pieces of c***
I have ever seen.
"""

predict_sentiment(test_text)

0.000231: Negative


In [24]:
test_text = """
Don't let any bullies out there try and shape your judgment on this gem of a
title.

Some people really don't have anything better to do, except trash a great movie
with annoying 1-star votes and spread lies on the Internet about how "dumb"
Cool Cat is.

I wouldn't be surprised to learn if much of the unwarranted negativity hurled
at this movie is coming from people who haven't even watched this movie for
themselves in the first place. Those people are no worse than the Butch the
Bully, the film's repulsive antagonist.

As it just so happens, one of the main points of "Cool Cat Saves the Kids" is
in addressing the attitudes of mean naysayers who try to demean others who
strive to bring good attitudes and fun vibes into people's lives. The message
to be learned here is that if one is friendly and good to others, the world is
friendly and good to one in return, and that is cool. Conversely, if one is
miserable and leaving 1-star votes on IMDb, one is alone and doesn't have any
friends at all. Ain't that the truth?

The world has uncovered a great, new, young filmmaking talent in "Cool Cat"
creator Derek Savage, and I sure hope that this is only the first of many
amazing films and stories that the world has yet to appreciate.

If you are a cool person who likes to have lots of fun, I guarantee that this
is a movie with charm that will uplift your spirits and reaffirm your positive
attitudes towards life.
"""

predict_sentiment(test_text)

0.915: Positive


Referência: Data Science Academy