In [1]:
import re
import requests
import pandas as pd
from bs4 import BeautifulSoup, SoupStrainer
import urllib
import os
from argparse import Namespace
from urllib.request import urlopen
import time
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('ggplot')

import nltk
import torch
from nltk.tokenize import sent_tokenize,word_tokenize
nltk.download("punkt")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\m1273747\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
df = pd.read_csv('horoscopes.csv', 
                         sep='|', 
                         on_bad_lines='skip',
                         names=["ID", "COMMENT", "DATE", "SIGN"])
df = df.dropna(subset=['COMMENT'])
df = df.reset_index()
df = df.drop(columns=['ID', 'index'])
df.index.name = 'ID'
df = df.head(50)
print(df.shape)
print(df.head(3))

(50, 3)
                                              COMMENT        DATE   SIGN
ID                                                                      
0   You’re not the sort to play safe and even if y...  12-01-2013  aries
1   There is no such thing as something for nothin...  12-02-2013  aries
2   As the new moon falls in one of the more adven...  12-03-2013  aries


In [3]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax

In [4]:
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm

sia = SentimentIntensityAnalyzer()

In [5]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

In [6]:
def polarity_scores_roberta(text):  
    encoded_text = tokenizer(text,return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    scores_dict = {
        'roberta' : (scores[0]*-1+scores[2])/(scores[0]+scores[1]+scores[2])
    }
    return scores_dict

In [7]:
res_vaders = {} #dictionary
res_roberta = {} #dictionary

for i in df.index:
    comment = df['COMMENT'][i]
    myid = i
    res_vaders[myid] = sia.polarity_scores(comment)
    res_roberta[myid] = polarity_scores_roberta(comment)

In [9]:
vaders = pd.DataFrame(res_vaders).T
vaders.index.name = 'ID'
vaders = vaders.drop(columns=['neg', 'neu', 'pos'])
vaders.rename(columns = {'compound':'vaders'}, inplace = True)
vaders = pd.merge(vaders, df, how ='inner', on= ['ID'], validate='one_to_one')
#vaders = vaders.drop(columns=['COMMENT'])
vaders

Unnamed: 0_level_0,vaders,COMMENT,DATE,SIGN
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.7102,You’re not the sort to play safe and even if y...,12-01-2013,aries
1,0.34,There is no such thing as something for nothin...,12-02-2013,aries
2,0.4939,As the new moon falls in one of the more adven...,12-03-2013,aries
3,0.9581,You will hear something amazing today but can ...,12-04-2013,aries
4,0.8268,A friend or colleague you have not seen for a ...,12-05-2013,aries
5,0.1154,You may be a nice guy by nature but most likel...,12-06-2013,aries
6,-0.25,Someone will give you a tough time this weeken...,12-07-2013,aries
7,0.3071,"You will be restless this coming week, without...",12-08-2013,aries
8,0.9153,You may be open and honest by nature but is le...,12-09-2013,aries
9,-0.4767,You must avoid wishful thinking today. Cosmic ...,12-10-2013,aries


In [10]:
vaders[vaders['vaders']<= -0.8]

Unnamed: 0_level_0,vaders,COMMENT,DATE,SIGN
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
15,-0.8464,Family and financial problems can easily be re...,12-16-2013,aries
16,-0.9081,There are times when it is right to back down ...,12-17-2013,aries


In [13]:
print(vaders['vaders'], vaders['COMMENT'])

ID
3     0.9581
4     0.8268
8     0.9153
10    0.8225
17    0.8163
19    0.8442
21    0.9666
22    0.9307
31    0.8988
34    0.8207
37    0.8428
46    0.8990
47    0.8591
48    0.8805
Name: vaders, dtype: float64 ID
3     You will hear something amazing today but can ...
4     A friend or colleague you have not seen for a ...
8     You may be open and honest by nature but is le...
10    If you have not made as much progress as you w...
17    You will get exactly what you deserve today: n...
19    A clash of opposing viewpoints is likely on th...
21    Go out of your way to be pleasant to everyone ...
22    If you are happy with the way your career is g...
31    If you need to ask a favor of someone in a pos...
34    What you gain over the coming week will more t...
37    You will gain more by doing less today. Either...
46    Don’t rush into anything over the next few day...
47    Whatever you happen to be working on now you w...
48    Even if you prefer to walk alone it will pay y...

In [34]:
roberta = pd.DataFrame(res_roberta).T
roberta.index.name = 'ID'
roberta = pd.merge(roberta, df, how ='inner', on= ['ID'], validate='one_to_one')
#roberta = roberta.drop(columns=['COMMENT'])

print(roberta['roberta'][47], " : ", roberta['COMMENT'][47])
print(roberta['roberta'][48], " : ", roberta['COMMENT'][48])
print(roberta['roberta'][49], " : ", roberta['COMMENT'][49])

0.7539331475272775  :  Whatever you happen to be working on now you will get it done quicker and better if you get other people involved. But they must be involved as equals. It may be your vision, your baby, but don’t let your ego get in the way of success.
0.9516182392835617  :  Even if you prefer to walk alone it will pay you to get more involved in group activities over the next few days. Friends and social contacts will bring you luck and make life more enjoyable, so get out there and have some fun.
-0.08593526482582092  :  You may not agree with what you hear today but you must let others have their say. Free speech is worth fighting for, so if someone tries to shout down a dissenting opinion make it your business to defend their right to be different.


In [14]:
merged = pd.merge(roberta, vaders, how ='inner', on= ['DATE', 'SIGN'], validate='one_to_one')
merged.head(3)

Unnamed: 0,roberta_com,DATE,SIGN,vaders
0,-0.22426,12-01-2013,aries,0.7102
1,-0.250313,12-02-2013,aries,0.34
2,0.716438,12-03-2013,aries,0.4939
