In [5]:
import pandas as pd
import numpy as np
import spacy
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from spacy.cli.download import download

In [6]:
def criar_embeddings(df_tweets):
    try:
        nlp = spacy.load('pt_core_news_lg')
    except (IOError, OSError):
        download('pt_core_news_lg')
        nlp = spacy.load('pt_core_news_lg')
    # desativamos todos os outros pipes que vem com o modelo nlp porque não preicsaremos deles
    with nlp.disable_pipes():
        # transformamos cada texto em um vetor e colocamos em uma array
        print('Fazendo os word embeddings')
        vetores = np.array([nlp(texto).vector for texto in df_tweets.Texto])

    return vetores

In [7]:
path_datasets = '../resources/datasets'

# importação dos dados
df_treinamento = pd.read_csv(
    f'{path_datasets}/tweets_ekman.csv',
    usecols=['Texto', 'Sentimento']
).dropna()
df_alvo = pd.read_csv(f'{path_datasets}/tweets_pandemia.csv')

x_train, x_test, y_train, y_test = train_test_split(
    df_treinamento,
    df_treinamento['Sentimento'],
    test_size=0.2,
    random_state=42
)

# processamento dos dados para word embeddings
embeddings = criar_embeddings(x_train)
embeddings_teste = criar_embeddings(x_test)

# algoritmos de aprendizado de máquina para serem treinados
svc = LinearSVC(C=100, random_state=0, dual=True, max_iter=10000)
tree = DecisionTreeClassifier(random_state=0)
forest = RandomForestClassifier(random_state=0)

# treinamento dos algoritmos
svc.fit(embeddings, y_train)
tree.fit(embeddings, y_train)
forest.fit(embeddings, y_train)

# previsões
previsoes_svc = svc.predict(embeddings_teste)
previsoes_tree = tree.predict(embeddings_teste)
previsoes_forest = forest.predict(embeddings_teste)

Fazendo os word embeddings
Fazendo os word embeddings




In [8]:
# testar performance
print('Relatório Linear SVC')
print(classification_report(y_test, previsoes_svc))

Relatório Linear SVC
              precision    recall  f1-score   support

      Neutro       0.96      0.94      0.95       495
       feliz       0.74      0.45      0.56      1931
        medo       0.35      0.83      0.50      2320
        nojo       0.50      0.24      0.32      1648
       raiva       0.13      0.18      0.16       562
      triste       0.45      0.08      0.14      2097

    accuracy                           0.43      9053
   macro avg       0.52      0.45      0.44      9053
weighted avg       0.50      0.43      0.40      9053



In [9]:
print('Relatório Decision Tree')
print(classification_report(y_test, previsoes_tree))

Relatório Decision Tree
              precision    recall  f1-score   support

      Neutro       0.78      0.80      0.79       495
       feliz       0.48      0.47      0.48      1931
        medo       0.37      0.37      0.37      2320
        nojo       0.32      0.31      0.32      1648
       raiva       0.11      0.12      0.12       562
      triste       0.34      0.35      0.34      2097

    accuracy                           0.38      9053
   macro avg       0.40      0.40      0.40      9053
weighted avg       0.39      0.38      0.39      9053



In [11]:
print('Relatório Random Forest')
print(classification_report(y_test, previsoes_forest))

Relatório Random Forest
              precision    recall  f1-score   support

      Neutro       0.95      0.87      0.91       495
       feliz       0.63      0.66      0.64      1931
        medo       0.45      0.56      0.50      2320
        nojo       0.48      0.43      0.46      1648
       raiva       0.96      0.05      0.09       562
      triste       0.45      0.47      0.46      2097

    accuracy                           0.52      9053
   macro avg       0.66      0.51      0.51      9053
weighted avg       0.55      0.52      0.51      9053

