In [1]:
from google.colab import drive

drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [2]:
%cd gdrive/MyDrive/IC_Leticia/Dataset

/content/gdrive/.shortcut-targets-by-id/16f9nXoTeh8KK8EltZTEP4Q38e0gcr6ne/IC_Leticia/Dataset


In [3]:
!ls

Aquisição  GoEmotions  GoEmotions_pred.csv  seed_tweets.csv


In [None]:
!pip install -r "GoEmotions/requirements.txt"

In [5]:
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from pprint import pprint
import pandas as pd

#Folder path containing the fine-tuned model files
model_path = 'GoEmotions/fine_tuned_model'

model = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)

classifier = pipeline('text-classification', model=model, tokenizer=tokenizer, top_k=None)

threshold = 0.3

In [6]:
df = pd.read_csv("seed_tweets.csv",index_col=0)
inputs = df["seedtweet"].values.tolist()

In [7]:
df.head()

Unnamed: 0_level_0,seedtweet
index,Unnamed: 1_level_1
0,"O Presidente @ArthurLira_ ,após ouvir a maior..."
1,"Dentre outros fatores, amanhã votarei pela li..."
2,STF decide manter a prisão do Deputado @danie...
3,O Conselho de Ética da Câmara vai começar a t...
4,Viúvas da ditadura se arvoram contra o STF ap...


In [8]:
output = classifier(inputs)
predictions = []

for prediction in output:
	predictions.append(list(x for x in prediction if x['score']>= threshold))

In [9]:
pprint(predictions)

[[{'label': 'aprovação', 'score': 0.5587501525878906}],
 [{'label': 'neutro', 'score': 0.6186824440956116}],
 [{'label': 'neutro', 'score': 0.9494044184684753}],
 [{'label': 'curiosidade', 'score': 0.3775072693824768}],
 [{'label': 'neutro', 'score': 0.36077290773391724}],
 [{'label': 'curiosidade', 'score': 0.5730711817741394}],
 [{'label': 'neutro', 'score': 0.9068360328674316}],
 [{'label': 'neutro', 'score': 0.6243780851364136}],
 [{'label': 'constrangimento', 'score': 0.5402638912200928}],
 [{'label': 'neutro', 'score': 0.6096152067184448}],
 [{'label': 'curiosidade', 'score': 0.5807187557220459}],
 [{'label': 'neutro', 'score': 0.6631458401679993}],
 [{'label': 'neutro', 'score': 0.8577451109886169}],
 [{'label': 'neutro', 'score': 0.9119518995285034}],
 [{'label': 'decepção', 'score': 0.46035119891166687}],
 [{'label': 'neutro', 'score': 0.7890118360519409}],
 [{'label': 'admiração', 'score': 0.37314921617507935}],
 [{'label': 'neutro', 'score': 0.6008589267730713}],
 [{'label':

In [10]:
emotions = []
scores = []
for row in predictions:
  if len(row)>0:
    first_result = row[0]
    print("Label: ",first_result.get("label")," - Scores:",first_result.get("score"))
    emotions.append(first_result.get("label"))
    scores.append(first_result.get("score"))
  else:
    print("Nan")
    emotions.append("NaN")
    scores.append(0)
  

Label:  aprovação  - Scores: 0.5587501525878906
Label:  neutro  - Scores: 0.6186824440956116
Label:  neutro  - Scores: 0.9494044184684753
Label:  curiosidade  - Scores: 0.3775072693824768
Label:  neutro  - Scores: 0.36077290773391724
Label:  curiosidade  - Scores: 0.5730711817741394
Label:  neutro  - Scores: 0.9068360328674316
Label:  neutro  - Scores: 0.6243780851364136
Label:  constrangimento  - Scores: 0.5402638912200928
Label:  neutro  - Scores: 0.6096152067184448
Label:  curiosidade  - Scores: 0.5807187557220459
Label:  neutro  - Scores: 0.6631458401679993
Label:  neutro  - Scores: 0.8577451109886169
Label:  neutro  - Scores: 0.9119518995285034
Label:  decepção  - Scores: 0.46035119891166687
Label:  neutro  - Scores: 0.7890118360519409
Label:  admiração  - Scores: 0.37314921617507935
Label:  neutro  - Scores: 0.6008589267730713
Label:  admiração  - Scores: 0.3365543484687805
Nan
Label:  desaprovação  - Scores: 0.6816696524620056
Label:  otimismo  - Scores: 0.4443388283252716
Nan
L

In [11]:
df["emotion"] = emotions
df["score"] = scores

In [12]:
df.head()

Unnamed: 0_level_0,seedtweet,emotion,score
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,"O Presidente @ArthurLira_ ,após ouvir a maior...",aprovação,0.55875
1,"Dentre outros fatores, amanhã votarei pela li...",neutro,0.618682
2,STF decide manter a prisão do Deputado @danie...,neutro,0.949404
3,O Conselho de Ética da Câmara vai começar a t...,curiosidade,0.377507
4,Viúvas da ditadura se arvoram contra o STF ap...,neutro,0.360773


In [13]:
#df = pd.DataFrame(df, reset_index=True, columns=["predicoes"])
df.to_csv("GoEmotions_pred.csv")