# C-More

In [1]:
import tweepy
import json
import time
import pandas as pd

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob
import textacy.resources

In [2]:
# Twitter API v2

bearer_token = ''

client = tweepy.Client(bearer_token=bearer_token)

### 1. Telco (Vodafone)

#### 1.1. Get tweets

In [3]:
query = "Vodafone 5G -is:retweet lang:en"

# maches tweets with words Vodafone and 5G, excluding retweets, for tweets in EN (app language)

In [4]:
response = client.get_recent_tweets_count(query, granularity="day") # count of tweets for the last 7 days
# granularity can be minute, hour or day

In [5]:
for count in response.data:
    print(count)

{'end': '2022-07-14T00:00:00.000Z', 'start': '2022-07-13T09:34:59.000Z', 'tweet_count': 30}
{'end': '2022-07-15T00:00:00.000Z', 'start': '2022-07-14T00:00:00.000Z', 'tweet_count': 28}
{'end': '2022-07-16T00:00:00.000Z', 'start': '2022-07-15T00:00:00.000Z', 'tweet_count': 29}
{'end': '2022-07-17T00:00:00.000Z', 'start': '2022-07-16T00:00:00.000Z', 'tweet_count': 16}
{'end': '2022-07-18T00:00:00.000Z', 'start': '2022-07-17T00:00:00.000Z', 'tweet_count': 21}
{'end': '2022-07-19T00:00:00.000Z', 'start': '2022-07-18T00:00:00.000Z', 'tweet_count': 65}
{'end': '2022-07-20T00:00:00.000Z', 'start': '2022-07-19T00:00:00.000Z', 'tweet_count': 46}
{'end': '2022-07-20T09:34:59.000Z', 'start': '2022-07-20T00:00:00.000Z', 'tweet_count': 8}


In [6]:
response = client.search_recent_tweets(query, 
                                       end_time="2022-07-18T00:00:00Z", 
                                       max_results=100, 
                                       tweet_fields=['lang', 'created_at', 'public_metrics'])

In [7]:
for tweet in response.data:
    print(tweet.id, tweet.text, tweet.lang, tweet.created_at)

1548818090401714177 Towns and smaller cities set to lose out on 5G | Vodafone report - Vodafone UK News Centre #5G  CC:@UK_CIF https://t.co/OiBvjPV35T en 2022-07-17 23:53:03+00:00
1548817148147032068 Forbes deep-dives into how Vodafone is revamping its operations with @Oracle to reduce costs and deliver the next generation of #5G and #IoT services. https://t.co/ozIpyLeXRx https://t.co/r1qa1GAt68 en 2022-07-17 23:49:18+00:00
1548815541837500416 @SamsungAU Any reason 4g and 5g is almost un usable on @VodafoneAU using S22 Ultra. Vodafone are saying it's a Samsung problem? en 2022-07-17 23:42:55+00:00
1548796630748602368 Forbes deep-dives into how Vodafone is revamping its operations with @Oracle to reduce costs and deliver the next generation of #5G and #IoT services. https://t.co/Rp6wyB9Juk https://t.co/OlFgzoxqbm en 2022-07-17 22:27:46+00:00
1548760366829690887 #5gradiation #swindon #northswindon #wiltshire #youth #fearless #swindondeservesbetter #Vodafone #EMF2022 #emf #colchester #knu

#### 1.2. Store tweets in data frame

In [8]:
data = [[tweet.id, 
         tweet.text,
         tweet.lang,
         tweet.public_metrics['retweet_count'], 
         tweet.public_metrics['reply_count'], 
         tweet.public_metrics['like_count'], 
         tweet.public_metrics['quote_count'], 
         tweet.created_at
        ] for tweet in response.data]

df_vodafone = pd.DataFrame(data, columns = ['id', 'text', 'lang', 'retweets', 'replies', 'likes', 'quotes', 'created_at'])

In [9]:
df_vodafone.head()

Unnamed: 0,id,text,lang,retweets,replies,likes,quotes,created_at
0,1548818090401714177,Towns and smaller cities set to lose out on 5G...,en,0,0,0,0,2022-07-17 23:53:03+00:00
1,1548817148147032068,Forbes deep-dives into how Vodafone is revampi...,en,0,0,0,0,2022-07-17 23:49:18+00:00
2,1548815541837500416,@SamsungAU Any reason 4g and 5g is almost un u...,en,0,2,0,0,2022-07-17 23:42:55+00:00
3,1548796630748602368,Forbes deep-dives into how Vodafone is revampi...,en,0,0,0,0,2022-07-17 22:27:46+00:00
4,1548760366829690887,#5gradiation #swindon #northswindon #wiltshire...,en,2,0,1,0,2022-07-17 20:03:40+00:00


In [10]:
df_vodafone.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype              
---  ------      --------------  -----              
 0   id          100 non-null    int64              
 1   text        100 non-null    object             
 2   lang        100 non-null    object             
 3   retweets    100 non-null    int64              
 4   replies     100 non-null    int64              
 5   likes       100 non-null    int64              
 6   quotes      100 non-null    int64              
 7   created_at  100 non-null    datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), int64(5), object(2)
memory usage: 6.4+ KB


In [11]:
df_vodafone['lang'].value_counts()

en    100
Name: lang, dtype: int64

All our tweets are in English, so we can drop this column.

In [12]:
df_vodafone.drop('lang', axis=1, inplace=True)

In [13]:
df_vodafone.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype              
---  ------      --------------  -----              
 0   id          100 non-null    int64              
 1   text        100 non-null    object             
 2   retweets    100 non-null    int64              
 3   replies     100 non-null    int64              
 4   likes       100 non-null    int64              
 5   quotes      100 non-null    int64              
 6   created_at  100 non-null    datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), int64(5), object(1)
memory usage: 5.6+ KB


#### 1.3. Sentiment Analysis with VADER

In [14]:
sid = SentimentIntensityAnalyzer()

In [15]:
df_vodafone['scores'] = df_vodafone['text'].map(lambda tweet: sid.polarity_scores(tweet))

df_vodafone['vader_score']  = df_vodafone['scores'].map(lambda score_dict: score_dict['compound'])

df_vodafone['vader_label'] = df_vodafone['vader_score'].map(lambda comp: 'pos' if comp >=0.05 else ('neg' if comp<=-0.05 else 'neu'))

In [16]:
df_vodafone

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,scores,vader_score,vader_label
0,1548818090401714177,Towns and smaller cities set to lose out on 5G...,0,0,0,0,2022-07-17 23:53:03+00:00,"{'neg': 0.13, 'neu': 0.87, 'pos': 0.0, 'compou...",-0.4019,neg
1,1548817148147032068,Forbes deep-dives into how Vodafone is revampi...,0,0,0,0,2022-07-17 23:49:18+00:00,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neu
2,1548815541837500416,@SamsungAU Any reason 4g and 5g is almost un u...,0,2,0,0,2022-07-17 23:42:55+00:00,"{'neg': 0.119, 'neu': 0.881, 'pos': 0.0, 'comp...",-0.4019,neg
3,1548796630748602368,Forbes deep-dives into how Vodafone is revampi...,0,0,0,0,2022-07-17 22:27:46+00:00,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neu
4,1548760366829690887,#5gradiation #swindon #northswindon #wiltshire...,2,0,1,0,2022-07-17 20:03:40+00:00,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neu
...,...,...,...,...,...,...,...,...,...,...
95,1547273112328146945,"The Adani group, through unit Adani Data Netwo...",0,0,0,0,2022-07-13 17:33:51+00:00,"{'neg': 0.0, 'neu': 0.858, 'pos': 0.142, 'comp...",0.5106,pos
96,1547258380950990848,Vodafone and VMO2 will deploy 4G and 5G on Lon...,0,0,0,0,2022-07-13 16:35:19+00:00,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neu
97,1547252964360097792,Adani’s entry in 5G may heat up bidding race\n...,0,1,1,0,2022-07-13 16:13:47+00:00,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neu
98,1547252953337442305,Adani group to participate in 5G spectrum auct...,0,1,1,0,2022-07-13 16:13:45+00:00,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neu


In [17]:
df_vodafone['vader_label'].value_counts()

neu    47
pos    44
neg     9
Name: vader_label, dtype: int64

In [18]:
print(df_vodafone[df_vodafone['vader_label'] == 'pos']['text'].values)

['Amazing 5G throughout at super crowded location with Vodafone and Ericsson. @vodafone_de, @Ericsson_GmbH #5G #duesseldorf #sommerkirmis https://t.co/w7xzZGeb5M'
 'What an incredible initiative from @VodafoneUK. With the use of #5G and haptic suits, deaf and hard-of hearing fans will now feel every beat through vibration at festivals. https://t.co/Uw0oBO7FGo https://t.co/LhURU7PMz5'
 'The latest Telecom Pulse - Vignesh Iyer Daily! https://t.co/k4TC6ZWXkO Thanks to @NpoReports @hornbilltv #5g #vodafone'
 'You can now capture and share every moment of your adventure through Vodafone’s Data Roaming. Enjoy free additional roaming data and roaming minutes on all Unlimited 5G plans. Check out our 5G Unlimited plans and roam worry-free with Vodafone! T&amp;Cs apply.\nhttps://t.co/crqgRDxqqn https://t.co/Dv2p7tAYsV'
 'Google Cloud has arguably been at the forefront of telco/hyperscaler engagements in 2022, having also announced key partnerships with the likes of Vodafone, BT, Telefónica Deuts

In [19]:
print(df_vodafone[df_vodafone['vader_label'] == 'neg']['text'].values)

['Towns and smaller cities set to lose out on 5G | Vodafone report - Vodafone UK News Centre #5G  CC:@UK_CIF https://t.co/OiBvjPV35T'
 "@SamsungAU Any reason 4g and 5g is almost un usable on @VodafoneAU using S22 Ultra. Vodafone are saying it's a Samsung problem?"
 'Fake claim in circulation that people are dying due to 5G trials and not COVID-19 - Oneindia News\n\nhttps://t.co/VYK3KeawR5\n\n #AIRTEL #Jio #vodafone'
 "Thank You @ViCustomerCare!!\n#Vodafone #Vi #vodafoneidea #CustomerCare\nEvery mobile operator right now: We are bringing 5G\nVodafoneIdea : Why don't you see how 2G was?\nDon't even want to waste my time with customer service now. It's the 5th time I am raising this issue. https://t.co/Ic1VZgz1Pg"
 '@eatsruns Sadly not - I was with John Lewis before, and just switched to Vodafone, but not 5G.'
 "What if Bankruptcy of Vodafone-Idea followed by Bailout by govt of India and Adani's bid for 5G spectrum are related?\n\n#5Gauction"
 "What an useless Govt it is. Playing dirty ma

Nota: para a análise de sentimento, pode ser importante excluir as replies - o tweet original pode ter um determinado sentimento associado e uma dada resposta um sentimento oposto porque estamos a ignorar o contexto do tweet original.

Exemplo: um tweet em que alguém que se queixa de um problema com a vodafone (sentimento negativo) a que outra pessoa responde com uma solução referindo que a operadora X é muito melhor (sentimento positivo, mas que acaba por ser negativo para a Vodafone).

In [20]:
df_vodafone.drop('scores', axis=1, inplace=True)

In [21]:
df_vodafone.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype              
---  ------       --------------  -----              
 0   id           100 non-null    int64              
 1   text         100 non-null    object             
 2   retweets     100 non-null    int64              
 3   replies      100 non-null    int64              
 4   likes        100 non-null    int64              
 5   quotes       100 non-null    int64              
 6   created_at   100 non-null    datetime64[ns, UTC]
 7   vader_score  100 non-null    float64            
 8   vader_label  100 non-null    object             
dtypes: datetime64[ns, UTC](1), float64(1), int64(5), object(2)
memory usage: 7.2+ KB


#### 1.4. Sentiment Analysis with TextBlob

In [22]:
corpus = textacy.Corpus("en_core_web_sm", df_vodafone['text'])

In [23]:
print(corpus)

Corpus(100 docs, 3473 tokens)


In [24]:
pol = []

for doc in corpus:
    
    tblob = TextBlob(doc.text)
    
    pol.append({'polarity': tblob.sentiment.polarity})

In [25]:
df_vodafone['tblob_score'] = pd.DataFrame(pol)

In [26]:
df_vodafone.head()

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score
0,1548818090401714177,Towns and smaller cities set to lose out on 5G...,0,0,0,0,2022-07-17 23:53:03+00:00,-0.4019,neg,0.0
1,1548817148147032068,Forbes deep-dives into how Vodafone is revampi...,0,0,0,0,2022-07-17 23:49:18+00:00,0.0,neu,0.0
2,1548815541837500416,@SamsungAU Any reason 4g and 5g is almost un u...,0,2,0,0,2022-07-17 23:42:55+00:00,-0.4019,neg,0.0
3,1548796630748602368,Forbes deep-dives into how Vodafone is revampi...,0,0,0,0,2022-07-17 22:27:46+00:00,0.0,neu,0.0
4,1548760366829690887,#5gradiation #swindon #northswindon #wiltshire...,2,0,1,0,2022-07-17 20:03:40+00:00,0.0,neu,0.25


In [27]:
df_vodafone['tblob_label'] = df_vodafone['tblob_score'].map(lambda pol: 'pos' if pol >=0.05 else ('neg' if pol <=-0.05 else 'neu'))

In [28]:
df_vodafone.head()

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score,tblob_label
0,1548818090401714177,Towns and smaller cities set to lose out on 5G...,0,0,0,0,2022-07-17 23:53:03+00:00,-0.4019,neg,0.0,neu
1,1548817148147032068,Forbes deep-dives into how Vodafone is revampi...,0,0,0,0,2022-07-17 23:49:18+00:00,0.0,neu,0.0,neu
2,1548815541837500416,@SamsungAU Any reason 4g and 5g is almost un u...,0,2,0,0,2022-07-17 23:42:55+00:00,-0.4019,neg,0.0,neu
3,1548796630748602368,Forbes deep-dives into how Vodafone is revampi...,0,0,0,0,2022-07-17 22:27:46+00:00,0.0,neu,0.0,neu
4,1548760366829690887,#5gradiation #swindon #northswindon #wiltshire...,2,0,1,0,2022-07-17 20:03:40+00:00,0.0,neu,0.25,pos


In [29]:
df_vodafone['tblob_label'].value_counts()

neu    51
pos    41
neg     8
Name: tblob_label, dtype: int64

In [30]:
len(df_vodafone[(df_vodafone['vader_label'] == df_vodafone['tblob_label'])])

76

VADER and TextBlob results are in agreement in 76 of the 100 tweets.

#### 1.5. Emotion Analysis with DepecheMood

In [31]:
rs = textacy.resources.DepecheMood(lang="en", word_rep="lemma", min_freq=2)

moods = []

for doc in corpus:
    
    mood = sorted(rs.get_emotional_valence(doc).items(), key=lambda x: x[1], reverse=True)
    
    moods.append({'mood': mood})

In [32]:
df_vodafone['mood'] = pd.DataFrame(moods)

In [33]:
df_vodafone

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score,tblob_label,mood
0,1548818090401714177,Towns and smaller cities set to lose out on 5G...,0,0,0,0,2022-07-17 23:53:03+00:00,-0.4019,neg,0.000000,neu,"[(INSPIRED, 0.14742941371713533), (SAD, 0.1375..."
1,1548817148147032068,Forbes deep-dives into how Vodafone is revampi...,0,0,0,0,2022-07-17 23:49:18+00:00,0.0000,neu,0.000000,neu,"[(INSPIRED, 0.1535394550512804), (ANGRY, 0.133..."
2,1548815541837500416,@SamsungAU Any reason 4g and 5g is almost un u...,0,2,0,0,2022-07-17 23:42:55+00:00,-0.4019,neg,0.000000,neu,"[(ANGRY, 0.16900189733218576), (ANNOYED, 0.139..."
3,1548796630748602368,Forbes deep-dives into how Vodafone is revampi...,0,0,0,0,2022-07-17 22:27:46+00:00,0.0000,neu,0.000000,neu,"[(INSPIRED, 0.1535394550512804), (ANGRY, 0.133..."
4,1548760366829690887,#5gradiation #swindon #northswindon #wiltshire...,2,0,1,0,2022-07-17 20:03:40+00:00,0.0000,neu,0.250000,pos,"[(AMUSED, 0.1854112818637807), (INSPIRED, 0.17..."
...,...,...,...,...,...,...,...,...,...,...,...,...
95,1547273112328146945,"The Adani group, through unit Adani Data Netwo...",0,0,0,0,2022-07-13 17:33:51+00:00,0.5106,pos,0.150000,pos,"[(DONT_CARE, 0.18849327811381925), (AMUSED, 0...."
96,1547258380950990848,Vodafone and VMO2 will deploy 4G and 5G on Lon...,0,0,0,0,2022-07-13 16:35:19+00:00,0.0000,neu,0.000000,neu,"[(DONT_CARE, 0.15036639676231625), (INSPIRED, ..."
97,1547252964360097792,Adani’s entry in 5G may heat up bidding race\n...,0,1,1,0,2022-07-13 16:13:47+00:00,0.0000,neu,0.000000,neu,"[(AMUSED, 0.1547223483169548), (DONT_CARE, 0.1..."
98,1547252953337442305,Adani group to participate in 5G spectrum auct...,0,1,1,0,2022-07-13 16:13:45+00:00,0.0000,neu,0.125000,pos,"[(INSPIRED, 0.15171280796571587), (AMUSED, 0.1..."


In [34]:
df_vodafone['mood'].map(lambda x: len(x)).value_counts()

8    98
0     2
Name: mood, dtype: int64

It was not possible to analyse the emotion of 2 tweets.

In [35]:
df_vodafone['mood'][2]

[('ANGRY', 0.16900189733218576),
 ('ANNOYED', 0.13953618221387204),
 ('INSPIRED', 0.1393530094666046),
 ('AMUSED', 0.12893849653333483),
 ('DONT_CARE', 0.11759330102919252),
 ('HAPPY', 0.11060971045893975),
 ('SAD', 0.1068556063931119),
 ('AFRAID', 0.0881117965727586)]

In [36]:
df_vodafone['text'][2]

"@SamsungAU Any reason 4g and 5g is almost un usable on @VodafoneAU using S22 Ultra. Vodafone are saying it's a Samsung problem?"

The results make sense for this particular tweet.

In [37]:
df_vodafone['top1_mood'] = df_vodafone['mood'].map(lambda x: x[0][0] if len(x) > 0 else None)
df_vodafone['top2_mood'] = df_vodafone['mood'].map(lambda x: x[1][0] if len(x) > 0 else None)
df_vodafone['top3_mood'] = df_vodafone['mood'].map(lambda x: x[2][0] if len(x) > 0 else None)

In [38]:
df_vodafone.drop(['id', 'mood'], axis=1, inplace=True)

In [39]:
df_vodafone

Unnamed: 0,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score,tblob_label,top1_mood,top2_mood,top3_mood
0,Towns and smaller cities set to lose out on 5G...,0,0,0,0,2022-07-17 23:53:03+00:00,-0.4019,neg,0.000000,neu,INSPIRED,SAD,ANGRY
1,Forbes deep-dives into how Vodafone is revampi...,0,0,0,0,2022-07-17 23:49:18+00:00,0.0000,neu,0.000000,neu,INSPIRED,ANGRY,ANNOYED
2,@SamsungAU Any reason 4g and 5g is almost un u...,0,2,0,0,2022-07-17 23:42:55+00:00,-0.4019,neg,0.000000,neu,ANGRY,ANNOYED,INSPIRED
3,Forbes deep-dives into how Vodafone is revampi...,0,0,0,0,2022-07-17 22:27:46+00:00,0.0000,neu,0.000000,neu,INSPIRED,ANGRY,ANNOYED
4,#5gradiation #swindon #northswindon #wiltshire...,2,0,1,0,2022-07-17 20:03:40+00:00,0.0000,neu,0.250000,pos,AMUSED,INSPIRED,DONT_CARE
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,"The Adani group, through unit Adani Data Netwo...",0,0,0,0,2022-07-13 17:33:51+00:00,0.5106,pos,0.150000,pos,DONT_CARE,AMUSED,ANNOYED
96,Vodafone and VMO2 will deploy 4G and 5G on Lon...,0,0,0,0,2022-07-13 16:35:19+00:00,0.0000,neu,0.000000,neu,DONT_CARE,INSPIRED,AFRAID
97,Adani’s entry in 5G may heat up bidding race\n...,0,1,1,0,2022-07-13 16:13:47+00:00,0.0000,neu,0.000000,neu,AMUSED,DONT_CARE,INSPIRED
98,Adani group to participate in 5G spectrum auct...,0,1,1,0,2022-07-13 16:13:45+00:00,0.0000,neu,0.125000,pos,INSPIRED,AMUSED,ANNOYED


In [40]:
# exportar resultados para .csv

df_vodafone.to_csv('vodafone.csv', index=False)

### 2. Finance (Santander Bank)

#### 2.1. Get tweets

In [44]:
query = "Santander (card OR account OR loan OR banking) -is:retweet lang:en"

# maches tweets with words Santander and at least one of the following words:
# card, account, loan, banking,
# excluding retweets, for tweets in EN (app language)

In [45]:
response = client.get_recent_tweets_count(query, granularity="day") # count of tweets for the last 7 days
# granularity can be minute, hour or day

In [46]:
for count in response.data:
    print(count)

{'end': '2022-07-14T00:00:00.000Z', 'start': '2022-07-13T09:38:39.000Z', 'tweet_count': 18}
{'end': '2022-07-15T00:00:00.000Z', 'start': '2022-07-14T00:00:00.000Z', 'tweet_count': 34}
{'end': '2022-07-16T00:00:00.000Z', 'start': '2022-07-15T00:00:00.000Z', 'tweet_count': 20}
{'end': '2022-07-17T00:00:00.000Z', 'start': '2022-07-16T00:00:00.000Z', 'tweet_count': 12}
{'end': '2022-07-18T00:00:00.000Z', 'start': '2022-07-17T00:00:00.000Z', 'tweet_count': 28}
{'end': '2022-07-19T00:00:00.000Z', 'start': '2022-07-18T00:00:00.000Z', 'tweet_count': 31}
{'end': '2022-07-20T00:00:00.000Z', 'start': '2022-07-19T00:00:00.000Z', 'tweet_count': 29}
{'end': '2022-07-20T09:38:39.000Z', 'start': '2022-07-20T00:00:00.000Z', 'tweet_count': 10}


In [47]:
response = client.search_recent_tweets(query, 
                                       end_time="2022-07-18T00:00:00Z", 
                                       max_results=100, 
                                       tweet_fields=['lang', 'created_at', 'public_metrics'])

In [48]:
for tweet in response.data:
    print(tweet.id, tweet.text, tweet.lang, tweet.created_at)

1548805127930187776 Now Hiring: Policy and Governance Analyst at Santander Private Banking International (Dallas, TX) https://t.co/Ig8PdxkuuP #risk #compliance #compliancejobs en 2022-07-17 23:01:32+00:00
1548784317530259456 @GAMETrowbridge - think I left my credit card in store today? Were any left on the counter? A santander one? en 2022-07-17 21:38:50+00:00
1548782268377858052 @StuartG58493143 @r954ie Nice one. Nothing over in my basic account with Santander yet. en 2022-07-17 21:30:42+00:00
1548759534293991426 @NathanHeadPhoto Had the same with Santander.... called them up only to have someone who clearly didn't even know what crypto was read me a script about how risky it is 🤦‍♂️ then when they finally unblocked my account it happened again immediately..... en 2022-07-17 20:00:22+00:00
1548749996140351488 'It looks so legit!' Santander scam email includes full name and account number

Fake documents included her full name and last four digits of her account number. One of the hoax

#### 2.2. Store tweets in data frame

In [217]:
data = [[tweet.id, 
         tweet.text,
         tweet.lang,
         tweet.public_metrics['retweet_count'], 
         tweet.public_metrics['reply_count'], 
         tweet.public_metrics['like_count'], 
         tweet.public_metrics['quote_count'], 
         tweet.created_at
        ] for tweet in response.data]

df_santander = pd.DataFrame(data, columns = ['id', 'text', 'lang', 'retweets', 'replies', 'likes', 'quotes', 'created_at'])

In [50]:
df_santander.head()

Unnamed: 0,id,text,lang,retweets,replies,likes,quotes,created_at
0,1548805127930187776,Now Hiring: Policy and Governance Analyst at S...,en,0,0,0,0,2022-07-17 23:01:32+00:00
1,1548784317530259456,@GAMETrowbridge - think I left my credit card ...,en,0,0,0,0,2022-07-17 21:38:50+00:00
2,1548782268377858052,@StuartG58493143 @r954ie Nice one. Nothing ove...,en,0,1,0,0,2022-07-17 21:30:42+00:00
3,1548759534293991426,@NathanHeadPhoto Had the same with Santander.....,en,0,0,1,0,2022-07-17 20:00:22+00:00
4,1548749996140351488,'It looks so legit!' Santander scam email incl...,en,0,0,0,0,2022-07-17 19:22:28+00:00


In [51]:
df_santander.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype              
---  ------      --------------  -----              
 0   id          100 non-null    int64              
 1   text        100 non-null    object             
 2   lang        100 non-null    object             
 3   retweets    100 non-null    int64              
 4   replies     100 non-null    int64              
 5   likes       100 non-null    int64              
 6   quotes      100 non-null    int64              
 7   created_at  100 non-null    datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), int64(5), object(2)
memory usage: 6.4+ KB


In [52]:
df_santander['lang'].value_counts()

en    100
Name: lang, dtype: int64

All our tweets are in English, so we can drop this column.

In [53]:
df_santander.drop('lang', axis=1, inplace=True)

In [54]:
df_santander.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype              
---  ------      --------------  -----              
 0   id          100 non-null    int64              
 1   text        100 non-null    object             
 2   retweets    100 non-null    int64              
 3   replies     100 non-null    int64              
 4   likes       100 non-null    int64              
 5   quotes      100 non-null    int64              
 6   created_at  100 non-null    datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), int64(5), object(1)
memory usage: 5.6+ KB


#### 2.3. Sentiment Analysis with VADER

In [55]:
df_santander['scores'] = df_santander['text'].map(lambda tweet: sid.polarity_scores(tweet))

df_santander['vader_score']  = df_santander['scores'].map(lambda score_dict: score_dict['compound'])

df_santander['vader_label'] = df_santander['vader_score'].map(lambda comp: 'pos' if comp >=0.05 else ('neg' if comp<=-0.05 else 'neu'))

In [56]:
df_santander

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,scores,vader_score,vader_label
0,1548805127930187776,Now Hiring: Policy and Governance Analyst at S...,0,0,0,0,2022-07-17 23:01:32+00:00,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neu
1,1548784317530259456,@GAMETrowbridge - think I left my credit card ...,0,0,0,0,2022-07-17 21:38:50+00:00,"{'neg': 0.0, 'neu': 0.836, 'pos': 0.164, 'comp...",0.4836,pos
2,1548782268377858052,@StuartG58493143 @r954ie Nice one. Nothing ove...,0,1,0,0,2022-07-17 21:30:42+00:00,"{'neg': 0.0, 'neu': 0.811, 'pos': 0.189, 'comp...",0.4215,pos
3,1548759534293991426,@NathanHeadPhoto Had the same with Santander.....,0,0,1,0,2022-07-17 20:00:22+00:00,"{'neg': 0.041, 'neu': 0.897, 'pos': 0.062, 'co...",0.2263,pos
4,1548749996140351488,'It looks so legit!' Santander scam email incl...,0,0,0,0,2022-07-17 19:22:28+00:00,"{'neg': 0.227, 'neu': 0.682, 'pos': 0.092, 'co...",-0.8383,neg
...,...,...,...,...,...,...,...,...,...,...
95,1547335160869388289,@CitadelBanking in Pa. has hired a business ba...,0,0,0,0,2022-07-13 21:40:25+00:00,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neu
96,1547304181924495364,Just been on to @santanderukhelp for 2hrs 45 m...,0,1,0,0,2022-07-13 19:37:19+00:00,"{'neg': 0.105, 'neu': 0.895, 'pos': 0.0, 'comp...",-0.6463,neg
97,1547272714074849281,"@santanderuk @santanderukhelp For some reason,...",0,1,0,0,2022-07-13 17:32:16+00:00,"{'neg': 0.216, 'neu': 0.72, 'pos': 0.065, 'com...",-0.8011,neg
98,1547266401546784768,Santander for iOS will help you manage your ac...,0,0,0,0,2022-07-13 17:07:11+00:00,"{'neg': 0.0, 'neu': 0.769, 'pos': 0.231, 'comp...",0.4019,pos


In [57]:
df_santander['vader_label'].value_counts()

pos    48
neg    29
neu    23
Name: vader_label, dtype: int64

In [58]:
print(df_santander[df_santander['vader_label'] == 'pos']['text'].values)

['@GAMETrowbridge - think I left my credit card in store today? Were any left on the counter? A santander one?'
 '@StuartG58493143 @r954ie Nice one. Nothing over in my basic account with Santander yet.'
 "@NathanHeadPhoto Had the same with Santander.... called them up only to have someone who clearly didn't even know what crypto was read me a script about how risky it is 🤦\u200d♂️ then when they finally unblocked my account it happened again immediately....."
 '@IamObroniBa @thewordsmith0 On Tamale Airport, what Mahama submitted to Parliament in October 2016 was an Export Credit Facility from Banco Santander with Export Credit Guarantee from the UK Export Finance.However, Banco Santander was unable to put together the required loan syndication for the project…1/2'
 'On Tamale Airport, what Mahama submitted to Parliament in October 2016 was an Export Credit Facility from Banco Santander with Export Credit Guarantee from the UK Export Finance.However, Banco Santander was unable to put to

In [59]:
print(df_santander[df_santander['vader_label'] == 'neg']['text'].values)

["'It looks so legit!' Santander scam email includes full name and account number\n\nFake documents included her full name and last four digits of her account number. One of the hoax emails was entitled 'Payment(s) refused due to la...\n#Santander #Twitter\nhttps://t.co/QlZ6cld1hR"
 '@santanderukhelp I’ve had a text message purporting to be Santander (I have an account) to say a payment has been set up. Looks like a scam?'
 'Now Hiring: VP, Sr. Associate, Risk Modeling at Santander Private Banking International (Boston, MA) https://t.co/1zb0C9H2jE #risk #compliance #compliancejobs'
 '#Finance ‘It looks so legit!’ Santander scam email includes full name and account number https://t.co/gIGni9ML1u'
 "Santander scam: phishing email includes customer's full name and account number: Scammers called victim pretending to be from NatWest Fraud Unit. Get pensions\xa0... https://t.co/pCI8vdjfus #419fraud #fraud"
 '‘It looks so legit!’ Santander scam email includes full name and account number htt

Nota: neste caso concreto, o contacto com o banco está muitas vezes relacionado com problemas que os clientes enfrentam; embora o sentimento possa não ser necessariamente negativo, na grande maioria das vezes não é certamente positivo. Podemos, por isso, aumentar o threshold para que um tweet seja considerado positivo, tendo em conta os resultados obtidos.

In [65]:
# positive if score >= 0.6

df_santander['scores'] = df_santander['text'].map(lambda tweet: sid.polarity_scores(tweet))

df_santander['vader_score']  = df_santander['scores'].map(lambda score_dict: score_dict['compound'])

df_santander['vader_label'] = df_santander['vader_score'].map(lambda comp: 'pos' if comp >=0.6 else ('neg' if comp<=-0.05 else 'neu'))

In [66]:
df_santander['vader_label'].value_counts()

neu    46
neg    29
pos    25
Name: vader_label, dtype: int64

In [67]:
print(df_santander[df_santander['vader_label'] == 'pos']['text'].values)

['@IamObroniBa @thewordsmith0 On Tamale Airport, what Mahama submitted to Parliament in October 2016 was an Export Credit Facility from Banco Santander with Export Credit Guarantee from the UK Export Finance.However, Banco Santander was unable to put together the required loan syndication for the project…1/2'
 'On Tamale Airport, what Mahama submitted to Parliament in October 2016 was an Export Credit Facility from Banco Santander with Export Credit Guarantee from the UK Export Finance.However, Banco Santander was unable to put together the required loan syndication for the project…1/2 https://t.co/oTMdjNVxVZ https://t.co/W1vQFaZPFW'
 '@mustardd874 Sorry to hear that you are having an issue. Please call the number on the back of your Santander card and an advisor will be happy to help. ^JM'
 '@Deepdiver47 Hi, sorry to hear you are having an issue. If you call the number on the back of your Santander card, an advisor will be happy to help. ^JM'
 '@santanderukhelp happy Sunday! I’m hopin

Nota: os resultados parecem agora fazer mais sentido, embora continuem a ser afectados pelo tom mais profissional usado nestes contactos, ou seja, embora possamos estar a falar de problemas que os clientes têm com o banco, como o tom usado por parte do banco é habitualmente cordial e atencioso, os tweets acabam por ser classificados como tendo um sentimento positivo.

É importante termos em consideração que a **análise de sentimento está muito relacionada com o estilo e tom dos tweets e não necessariamente com o seu conteúdo**, sobretudo quando falamos de problemas que os clientes enfrentam.

Futuramente, podemos até **analisar separadamente o sentimento dos tweets escritos por contas oficiais do banco e o sentimento dos tweets escritos pelos clientes**.

In [68]:
df_santander.drop('scores', axis=1, inplace=True)

In [69]:
df_santander.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype              
---  ------       --------------  -----              
 0   id           100 non-null    int64              
 1   text         100 non-null    object             
 2   retweets     100 non-null    int64              
 3   replies      100 non-null    int64              
 4   likes        100 non-null    int64              
 5   quotes       100 non-null    int64              
 6   created_at   100 non-null    datetime64[ns, UTC]
 7   vader_score  100 non-null    float64            
 8   vader_label  100 non-null    object             
dtypes: datetime64[ns, UTC](1), float64(1), int64(5), object(2)
memory usage: 7.2+ KB


#### 2.4. Sentiment Analysis with TextBlob

In [70]:
corpus = textacy.Corpus("en_core_web_sm", df_santander['text'])

In [71]:
print(corpus)

Corpus(100 docs, 3651 tokens)


In [72]:
pol = []

for doc in corpus:
    
    tblob = TextBlob(doc.text)
    
    pol.append({'polarity': tblob.sentiment.polarity})

In [73]:
df_santander['tblob_score'] = pd.DataFrame(pol)

In [74]:
df_santander.head()

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score
0,1548805127930187776,Now Hiring: Policy and Governance Analyst at S...,0,0,0,0,2022-07-17 23:01:32+00:00,0.0,neu,0.0
1,1548784317530259456,@GAMETrowbridge - think I left my credit card ...,0,0,0,0,2022-07-17 21:38:50+00:00,0.4836,neu,0.0
2,1548782268377858052,@StuartG58493143 @r954ie Nice one. Nothing ove...,0,1,0,0,2022-07-17 21:30:42+00:00,0.4215,neu,0.3
3,1548759534293991426,@NathanHeadPhoto Had the same with Santander.....,0,0,1,0,2022-07-17 20:00:22+00:00,0.2263,neu,0.025
4,1548749996140351488,'It looks so legit!' Santander scam email incl...,0,0,0,0,2022-07-17 19:22:28+00:00,-0.8383,neg,0.015


In [75]:
df_santander['tblob_label'] = df_santander['tblob_score'].map(lambda pol: 'pos' if pol >=0.05 else ('neg' if pol <=-0.05 else 'neu'))

In [76]:
df_santander.head()

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score,tblob_label
0,1548805127930187776,Now Hiring: Policy and Governance Analyst at S...,0,0,0,0,2022-07-17 23:01:32+00:00,0.0,neu,0.0,neu
1,1548784317530259456,@GAMETrowbridge - think I left my credit card ...,0,0,0,0,2022-07-17 21:38:50+00:00,0.4836,neu,0.0,neu
2,1548782268377858052,@StuartG58493143 @r954ie Nice one. Nothing ove...,0,1,0,0,2022-07-17 21:30:42+00:00,0.4215,neu,0.3,pos
3,1548759534293991426,@NathanHeadPhoto Had the same with Santander.....,0,0,1,0,2022-07-17 20:00:22+00:00,0.2263,neu,0.025,neu
4,1548749996140351488,'It looks so legit!' Santander scam email incl...,0,0,0,0,2022-07-17 19:22:28+00:00,-0.8383,neg,0.015,neu


In [77]:
df_santander['tblob_label'].value_counts()

pos    47
neu    36
neg    17
Name: tblob_label, dtype: int64

Nota: Mais uma vez, temos uma grande predominância de tweets positivos. Podemos alterar também o threshold neste caso.

In [107]:
# positive if score >= 0.2

df_santander['tblob_label'] = df_santander['tblob_score'].map(lambda pol: 'pos' if pol >=0.2 else ('neg' if pol <=-0.05 else 'neu'))

In [108]:
df_santander['tblob_label'].value_counts()

neu    57
pos    26
neg    17
Name: tblob_label, dtype: int64

In [109]:
len(df_santander[(df_santander['vader_label'] == df_santander['tblob_label'])])

60

VADER and TextBlob results are in agreement in 60 of the 100 tweets.

In [110]:
print(df_santander[df_santander['tblob_label'] == 'pos']['text'].values)

['@StuartG58493143 @r954ie Nice one. Nothing over in my basic account with Santander yet.'
 '@santanderukhelp happy Sunday! I’m hoping to transfer an AMEX card over to my Santander Credit Card (via balance transfer) - how do I do this with AMEX having less digits than a standard Visa or Mastercard? The website insists on 16 digits whereas AMEX on have 15 digits'
 '#Finance ‘It looks so legit!’ Santander scam email includes full name and account number https://t.co/gIGni9ML1u'
 '@SanCyclesMK hi! I have been using London Santander Cycles this week and my account has been suspended for some reason? Any way I can get help through Twitter or a virtual chat? :)'
 '‘It looks so legit!’ Santander scam email includes full name and account number https://t.co/4tezxPCqda'
 "'It looks so legit!’ Santander scam email includes full name and account number. https://t.co/vwiDApSqJC"
 "'It looks so legit!’ Santander scam email includes full name and account number https://t.co/bnGdh1wpxI"
 '@Michell904

In [111]:
print(df_santander[df_santander['tblob_label'] == 'neg']['text'].values)

['@IamObroniBa @thewordsmith0 On Tamale Airport, what Mahama submitted to Parliament in October 2016 was an Export Credit Facility from Banco Santander with Export Credit Guarantee from the UK Export Finance.However, Banco Santander was unable to put together the required loan syndication for the project…1/2'
 'On Tamale Airport, what Mahama submitted to Parliament in October 2016 was an Export Credit Facility from Banco Santander with Export Credit Guarantee from the UK Export Finance.However, Banco Santander was unable to put together the required loan syndication for the project…1/2 https://t.co/oTMdjNVxVZ https://t.co/W1vQFaZPFW'
 '@santanderukhelp my card is locked after 3 wrong pin attempts. Can i unblock my card from any ATM or does it have to have to be a Santander one?'
 '@ChrisBarraclou3 Sorry we can only apologise again for your experience this evening Chris. You can also log a complaint via online banking if you have access. This is detailed here too: https://t.co/LulsTNkqk

#### 2.5. Emotion Analysis with DepecheMood

In [112]:
rs = textacy.resources.DepecheMood(lang="en", word_rep="lemma", min_freq=2)

moods = []

for doc in corpus:
    
    mood = sorted(rs.get_emotional_valence(doc).items(), key=lambda x: x[1], reverse=True)
    
    moods.append({'mood': mood})

In [113]:
df_santander['mood'] = pd.DataFrame(moods)

In [114]:
df_santander

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score,tblob_label,mood
0,1548805127930187776,Now Hiring: Policy and Governance Analyst at S...,0,0,0,0,2022-07-17 23:01:32+00:00,0.0000,neu,0.0000,neu,"[(SAD, 0.22348422135352278), (ANGRY, 0.1454915..."
1,1548784317530259456,@GAMETrowbridge - think I left my credit card ...,0,0,0,0,2022-07-17 21:38:50+00:00,0.4836,neu,0.0000,neu,"[(AMUSED, 0.15753847098962553), (INSPIRED, 0.1..."
2,1548782268377858052,@StuartG58493143 @r954ie Nice one. Nothing ove...,0,1,0,0,2022-07-17 21:30:42+00:00,0.4215,neu,0.3000,pos,"[(INSPIRED, 0.17976528969563574), (ANNOYED, 0...."
3,1548759534293991426,@NathanHeadPhoto Had the same with Santander.....,0,0,1,0,2022-07-17 20:00:22+00:00,0.2263,neu,0.0250,neu,"[(AMUSED, 0.15407307196625109), (DONT_CARE, 0...."
4,1548749996140351488,'It looks so legit!' Santander scam email incl...,0,0,0,0,2022-07-17 19:22:28+00:00,-0.8383,neg,0.0150,neu,"[(AMUSED, 0.15624914957121513), (ANGRY, 0.1501..."
...,...,...,...,...,...,...,...,...,...,...,...,...
95,1547335160869388289,@CitadelBanking in Pa. has hired a business ba...,0,0,0,0,2022-07-13 21:40:25+00:00,0.0000,neu,0.0000,neu,"[(INSPIRED, 0.16757707608389716), (DONT_CARE, ..."
96,1547304181924495364,Just been on to @santanderukhelp for 2hrs 45 m...,0,1,0,0,2022-07-13 19:37:19+00:00,-0.6463,neg,-0.3125,neg,"[(AMUSED, 0.1665948163879102), (INSPIRED, 0.14..."
97,1547272714074849281,"@santanderuk @santanderukhelp For some reason,...",0,1,0,0,2022-07-13 17:32:16+00:00,-0.8011,neg,-0.1250,neg,"[(AMUSED, 0.16205295489232163), (INSPIRED, 0.1..."
98,1547266401546784768,Santander for iOS will help you manage your ac...,0,0,0,0,2022-07-13 17:07:11+00:00,0.4019,neu,0.0000,neu,"[(AMUSED, 0.15850567830887197), (SAD, 0.157220..."


In [115]:
df_santander['mood'].map(lambda x: len(x)).value_counts()

8    100
Name: mood, dtype: int64

It was possible to analyse the emotion of all the 100 tweets.

In [154]:
df_santander['top1_mood'] = df_santander['mood'].map(lambda x: x[0][0])
df_santander['top2_mood'] = df_santander['mood'].map(lambda x: x[1][0])
df_santander['top3_mood'] = df_santander['mood'].map(lambda x: x[2][0])

In [155]:
df_santander.drop(['id', 'mood'], axis=1, inplace=True)

In [156]:
df_santander

Unnamed: 0,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score,tblob_label,top1_mood,top2_mood,top3_mood
0,Now Hiring: Policy and Governance Analyst at S...,0,0,0,0,2022-07-17 23:01:32+00:00,0.0000,neu,0.0000,neu,SAD,ANGRY,INSPIRED
1,@GAMETrowbridge - think I left my credit card ...,0,0,0,0,2022-07-17 21:38:50+00:00,0.4836,neu,0.0000,neu,AMUSED,INSPIRED,SAD
2,@StuartG58493143 @r954ie Nice one. Nothing ove...,0,1,0,0,2022-07-17 21:30:42+00:00,0.4215,neu,0.3000,pos,INSPIRED,ANNOYED,DONT_CARE
3,@NathanHeadPhoto Had the same with Santander.....,0,0,1,0,2022-07-17 20:00:22+00:00,0.2263,neu,0.0250,neu,AMUSED,DONT_CARE,INSPIRED
4,'It looks so legit!' Santander scam email incl...,0,0,0,0,2022-07-17 19:22:28+00:00,-0.8383,neg,0.0150,neu,AMUSED,ANGRY,INSPIRED
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,@CitadelBanking in Pa. has hired a business ba...,0,0,0,0,2022-07-13 21:40:25+00:00,0.0000,neu,0.0000,neu,INSPIRED,DONT_CARE,AMUSED
96,Just been on to @santanderukhelp for 2hrs 45 m...,0,1,0,0,2022-07-13 19:37:19+00:00,-0.6463,neg,-0.3125,neg,AMUSED,INSPIRED,ANNOYED
97,"@santanderuk @santanderukhelp For some reason,...",0,1,0,0,2022-07-13 17:32:16+00:00,-0.8011,neg,-0.1250,neg,AMUSED,INSPIRED,ANGRY
98,Santander for iOS will help you manage your ac...,0,0,0,0,2022-07-13 17:07:11+00:00,0.4019,neu,0.0000,neu,AMUSED,SAD,INSPIRED


In [158]:
df_santander['top1_mood'].value_counts()

INSPIRED     50
AMUSED       32
ANGRY        10
SAD           4
ANNOYED       3
DONT_CARE     1
Name: top1_mood, dtype: int64

In [163]:
print(df_santander[df_santander['top1_mood'] == 'ANGRY']['text'].values)

['@IamObroniBa @thewordsmith0 On Tamale Airport, what Mahama submitted to Parliament in October 2016 was an Export Credit Facility from Banco Santander with Export Credit Guarantee from the UK Export Finance.However, Banco Santander was unable to put together the required loan syndication for the project…1/2'
 '@santanderukhelp I’ve had a text message purporting to be Santander (I have an account) to say a payment has been set up. Looks like a scam?'
 'On Tamale Airport, what Mahama submitted to Parliament in October 2016 was an Export Credit Facility from Banco Santander with Export Credit Guarantee from the UK Export Finance.However, Banco Santander was unable to put together the required loan syndication for the project…1/2 https://t.co/oTMdjNVxVZ https://t.co/W1vQFaZPFW'
 "Santander scam: phishing email includes customer's full name and account number: Scammers called victim pretending to be from NatWest Fraud Unit. Get pensions\xa0... https://t.co/pCI8vdjfus #419fraud #fraud"
 'Sa

Nota: os resultados da análise de sentimento/emoção para este caso parecem não ser tão coerentes como no caso anterior.

Para além do contexto ser bastante diferente, a query utilizada (com vários termos de interesse distintos) também poderá ter influenciado os resultados.

In [172]:
# exportar resultados para .csv

df_santander.to_csv('santander.csv', index=False)

### 3. Oil (BP)

#### 3.1. Get tweets

In [208]:
query = "BP (Shell OR Repsol OR Galp OR Prio) -is:retweet lang:en"

# maches tweets with words BP and at least one of the following competitors:
# shell, repsol, galp, prio
# excluding retweets, for tweets in EN (app language)

In [209]:
response = client.get_recent_tweets_count(query, granularity="day") # count of tweets for the last 7 days
# granularity can be minute, hour or day

In [210]:
for count in response.data:
    print(count)

{'end': '2022-07-14T00:00:00.000Z', 'start': '2022-07-13T11:41:56.000Z', 'tweet_count': 61}
{'end': '2022-07-15T00:00:00.000Z', 'start': '2022-07-14T00:00:00.000Z', 'tweet_count': 126}
{'end': '2022-07-16T00:00:00.000Z', 'start': '2022-07-15T00:00:00.000Z', 'tweet_count': 96}
{'end': '2022-07-17T00:00:00.000Z', 'start': '2022-07-16T00:00:00.000Z', 'tweet_count': 88}
{'end': '2022-07-18T00:00:00.000Z', 'start': '2022-07-17T00:00:00.000Z', 'tweet_count': 68}
{'end': '2022-07-19T00:00:00.000Z', 'start': '2022-07-18T00:00:00.000Z', 'tweet_count': 128}
{'end': '2022-07-20T00:00:00.000Z', 'start': '2022-07-19T00:00:00.000Z', 'tweet_count': 154}
{'end': '2022-07-20T11:41:56.000Z', 'start': '2022-07-20T00:00:00.000Z', 'tweet_count': 63}


In [212]:
response = client.search_recent_tweets(query, 
                                       end_time="2022-07-18T00:00:00Z", 
                                       max_results=100, 
                                       tweet_fields=['lang', 'created_at', 'public_metrics'])

In [213]:
for tweet in response.data:
    print(tweet.id, tweet.text, tweet.lang, tweet.created_at)

1548808881756745728 When so-called markets are rigged by cartels, monopoly, and a handful of insiders on Wall St.... they aren't markets, it's organized THEFT. 

Right Jeffrey?  It's called Econ 101.

#GreenNewDeal 
#PriceControls

@JeffBezos

@APIenergy

@exxonmobil

@bp_America

@Shell

@Chevron https://t.co/YaztlZEuNZ en 2022-07-17 23:16:27+00:00
1548802842621788161 @bp_plc What’s gets me with BP and Shell etc is the sheer bullsh*t. I mean BP must think we are stupid. They lobby hard for more of the sabe whilst pretending they have some kind of green policy. Just be honest. Tell us you want more carbon fuels! en 2022-07-17 22:52:27+00:00
1548793871781576705 THANK YOU, ExxonMobil, Shell, Chevron, BP, etc. for encouraging all of us to transition away from the use of oil, via your price gouging. It's working. en 2022-07-17 22:16:48+00:00
1548784670342631427 @johnredwood Brexit and the global pandemic is the reason we have inflation. Greedy industries i.e. Esso, Shell, BP etc turned it 

#### 3.2. Store tweets in data frame

In [216]:
data = [[tweet.id, 
         tweet.text,
         tweet.lang,
         tweet.public_metrics['retweet_count'], 
         tweet.public_metrics['reply_count'], 
         tweet.public_metrics['like_count'], 
         tweet.public_metrics['quote_count'], 
         tweet.created_at
        ] for tweet in response.data]

df_bp = pd.DataFrame(data, columns = ['id', 'text', 'lang', 'retweets', 'replies', 'likes', 'quotes', 'created_at'])

In [218]:
df_bp.head()

Unnamed: 0,id,text,lang,retweets,replies,likes,quotes,created_at
0,1548808881756745728,"When so-called markets are rigged by cartels, ...",en,0,0,0,0,2022-07-17 23:16:27+00:00
1,1548802842621788161,@bp_plc What’s gets me with BP and Shell etc i...,en,0,0,0,0,2022-07-17 22:52:27+00:00
2,1548793871781576705,"THANK YOU, ExxonMobil, Shell, Chevron, BP, etc...",en,0,0,0,0,2022-07-17 22:16:48+00:00
3,1548784670342631427,@johnredwood Brexit and the global pandemic is...,en,0,0,0,0,2022-07-17 21:40:15+00:00
4,1548779691158519814,These BP and Shell adverts are a massive pisst...,en,0,0,0,0,2022-07-17 21:20:27+00:00


In [219]:
df_bp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype              
---  ------      --------------  -----              
 0   id          100 non-null    int64              
 1   text        100 non-null    object             
 2   lang        100 non-null    object             
 3   retweets    100 non-null    int64              
 4   replies     100 non-null    int64              
 5   likes       100 non-null    int64              
 6   quotes      100 non-null    int64              
 7   created_at  100 non-null    datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), int64(5), object(2)
memory usage: 6.4+ KB


In [220]:
df_bp['lang'].value_counts()

en    100
Name: lang, dtype: int64

All our tweets are in English, so we can drop this column.

In [221]:
df_bp.drop('lang', axis=1, inplace=True)

In [222]:
df_bp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype              
---  ------      --------------  -----              
 0   id          100 non-null    int64              
 1   text        100 non-null    object             
 2   retweets    100 non-null    int64              
 3   replies     100 non-null    int64              
 4   likes       100 non-null    int64              
 5   quotes      100 non-null    int64              
 6   created_at  100 non-null    datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), int64(5), object(1)
memory usage: 5.6+ KB


#### 3.3. Sentiment Analysis with VADER

In [223]:
df_bp['scores'] = df_bp['text'].map(lambda tweet: sid.polarity_scores(tweet))

df_bp['vader_score']  = df_bp['scores'].map(lambda score_dict: score_dict['compound'])

df_bp['vader_label'] = df_bp['vader_score'].map(lambda comp: 'pos' if comp >=0.05 else ('neg' if comp<=-0.05 else 'neu'))

In [224]:
df_bp

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,scores,vader_score,vader_label
0,1548808881756745728,"When so-called markets are rigged by cartels, ...",0,0,0,0,2022-07-17 23:16:27+00:00,"{'neg': 0.067, 'neu': 0.933, 'pos': 0.0, 'comp...",-0.3612,neg
1,1548802842621788161,@bp_plc What’s gets me with BP and Shell etc i...,0,0,0,0,2022-07-17 22:52:27+00:00,"{'neg': 0.09, 'neu': 0.771, 'pos': 0.139, 'com...",0.1511,pos
2,1548793871781576705,"THANK YOU, ExxonMobil, Shell, Chevron, BP, etc...",0,0,0,0,2022-07-17 22:16:48+00:00,"{'neg': 0.0, 'neu': 0.783, 'pos': 0.217, 'comp...",0.7672,pos
3,1548784670342631427,@johnredwood Brexit and the global pandemic is...,0,0,0,0,2022-07-17 21:40:15+00:00,"{'neg': 0.073, 'neu': 0.927, 'pos': 0.0, 'comp...",-0.3182,neg
4,1548779691158519814,These BP and Shell adverts are a massive pisst...,0,0,0,0,2022-07-17 21:20:27+00:00,"{'neg': 0.167, 'neu': 0.833, 'pos': 0.0, 'comp...",-0.5574,neg
...,...,...,...,...,...,...,...,...,...,...
95,1548372177413541888,@StansaidAirport @AuthorKimberley Pfffft I don...,0,0,1,0,2022-07-16 18:21:09+00:00,"{'neg': 0.176, 'neu': 0.736, 'pos': 0.088, 'co...",-0.3653,neg
96,1548364807996223489,@SteveDavis21069 @mikepompeo Hundreds of permi...,0,1,1,0,2022-07-16 17:51:52+00:00,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neu
97,1548358056601260036,Saving on fuel right now is a huge issue My lo...,0,0,0,0,2022-07-16 17:25:02+00:00,"{'neg': 0.09, 'neu': 0.829, 'pos': 0.081, 'com...",-0.1263,neg
98,1548351914416295938,"Each of the UK's 4 biggest oil companies (BP, ...",0,0,2,1,2022-07-16 17:00:38+00:00,"{'neg': 0.04, 'neu': 0.932, 'pos': 0.029, 'com...",-0.1027,neg


In [225]:
df_bp['vader_label'].value_counts()

pos    43
neg    41
neu    16
Name: vader_label, dtype: int64

In [226]:
print(df_bp[df_bp['vader_label'] == 'pos']['text'].values)

['@bp_plc What’s gets me with BP and Shell etc is the sheer bullsh*t. I mean BP must think we are stupid. They lobby hard for more of the sabe whilst pretending they have some kind of green policy. Just be honest. Tell us you want more carbon fuels!'
 "THANK YOU, ExxonMobil, Shell, Chevron, BP, etc. for encouraging all of us to transition away from the use of oil, via your price gouging. It's working."
 '@darrengrimes_ Even if climate change is a hoax (which it isn’t) - why would you not want fresher air, cleaner water, less rubbish, green spaces and thriving wildlife.  We can do better in all of those areas.  Shell and BP might not like it but there is a better way.'
 'Its funny how lurpack, shell,bp and the others are advertising all the time when its the ones ripping us off'
 'Stack up your savings from .40 Cents per gallon to over $1 by attaching the following promo codes to your profile as well. 1)THOUGHTS8272   2)Uber35  3) Snap37  4) 7Centbonus 5)comeback6    #shell #BP #GasCris

In [227]:
print(df_bp[df_bp['vader_label'] == 'neg']['text'].values)

["When so-called markets are rigged by cartels, monopoly, and a handful of insiders on Wall St.... they aren't markets, it's organized THEFT. \n\nRight Jeffrey?  It's called Econ 101.\n\n#GreenNewDeal \n#PriceControls\n\n@JeffBezos\n\n@APIenergy\n\n@exxonmobil\n\n@bp_America\n\n@Shell\n\n@Chevron https://t.co/YaztlZEuNZ"
 '@johnredwood Brexit and the global pandemic is the reason we have inflation. Greedy industries i.e. Esso, Shell, BP etc turned it into an avalanche. You are a lobbyist, you should know.'
 'These BP and Shell adverts are a massive pisstake. Knock the price down we don’t give a shit about your sandwiches'
 "OIL companies have no shame, they are raking in record profits while #gasprices GOUGING in front of your eyes! \nAnd you want the president to change that?\nHe's the USA leader not the chairman of EXXON SHELL BP CHEVRON MARATHON\n#Congress needs to do something beside take their $$$ https://t.co/OabGBXfHtB https://t.co/dGf4gGFf8D"
 '@GaryLineker @WarwickHunt4 It’s B

Some of the tweets classified as positive seem misclassified. We can try to increase the threshold.

In [248]:
# positive if score >= 0.6

df_bp['scores'] = df_bp['text'].map(lambda tweet: sid.polarity_scores(tweet))

df_bp['vader_score']  = df_bp['scores'].map(lambda score_dict: score_dict['compound'])

df_bp['vader_label'] = df_bp['vader_score'].map(lambda comp: 'pos' if comp >=0.6 else ('neg' if comp<=-0.05 else 'neu'))

In [249]:
df_bp['vader_label'].value_counts()

neg    41
neu    33
pos    26
Name: vader_label, dtype: int64

In [250]:
print(df_bp[df_bp['vader_label'] == 'pos']['text'].values)

["THANK YOU, ExxonMobil, Shell, Chevron, BP, etc. for encouraging all of us to transition away from the use of oil, via your price gouging. It's working."
 '@darrengrimes_ Even if climate change is a hoax (which it isn’t) - why would you not want fresher air, cleaner water, less rubbish, green spaces and thriving wildlife.  We can do better in all of those areas.  Shell and BP might not like it but there is a better way.'
 '@RedRobko @Tesco @ShellOil @asda #Shell, #bp &amp; the likes are making hay why the sun shines - with a dying biz model &amp; massive impact on profits during covid, they are unified in squeezing the market &amp; are the biggest contributor to inflation &amp; the pending recession. God help us as they enter energy retailing.'
 '@JDCocchiarella There are *5* reasons: "The top five oil companies alone—Shell, ExxonMobil, BP, Chevron, and ConocoPhillips—brought in more than 200 percent more in profits than in the first quarter of 2021. That is a total of more than $35 b

In [251]:
df_bp.drop('scores', axis=1, inplace=True)

In [252]:
df_bp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype              
---  ------       --------------  -----              
 0   id           100 non-null    int64              
 1   text         100 non-null    object             
 2   retweets     100 non-null    int64              
 3   replies      100 non-null    int64              
 4   likes        100 non-null    int64              
 5   quotes       100 non-null    int64              
 6   created_at   100 non-null    datetime64[ns, UTC]
 7   vader_score  100 non-null    float64            
 8   vader_label  100 non-null    object             
dtypes: datetime64[ns, UTC](1), float64(1), int64(5), object(2)
memory usage: 7.2+ KB


#### 3.4. Sentiment Analysis with TextBlob

In [253]:
corpus = textacy.Corpus("en_core_web_sm", df_bp['text'])

In [254]:
print(corpus)

Corpus(100 docs, 4495 tokens)


In [255]:
pol = []

for doc in corpus:
    
    tblob = TextBlob(doc.text)
    
    pol.append({'polarity': tblob.sentiment.polarity})

In [256]:
df_bp['tblob_score'] = pd.DataFrame(pol)

In [257]:
df_bp.head()

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score
0,1548808881756745728,"When so-called markets are rigged by cartels, ...",0,0,0,0,2022-07-17 23:16:27+00:00,-0.3612,neg,0.285714
1,1548802842621788161,@bp_plc What’s gets me with BP and Shell etc i...,0,0,0,0,2022-07-17 22:52:27+00:00,0.1511,neu,0.080093
2,1548793871781576705,"THANK YOU, ExxonMobil, Shell, Chevron, BP, etc...",0,0,0,0,2022-07-17 22:16:48+00:00,0.7672,pos,0.0
3,1548784670342631427,@johnredwood Brexit and the global pandemic is...,0,0,0,0,2022-07-17 21:40:15+00:00,-0.3182,neg,0.0
4,1548779691158519814,These BP and Shell adverts are a massive pisst...,0,0,0,0,2022-07-17 21:20:27+00:00,-0.5574,neg,-0.118519


In [258]:
df_bp['tblob_label'] = df_bp['tblob_score'].map(lambda pol: 'pos' if pol >=0.05 else ('neg' if pol <=-0.05 else 'neu'))

In [259]:
df_bp.head()

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score,tblob_label
0,1548808881756745728,"When so-called markets are rigged by cartels, ...",0,0,0,0,2022-07-17 23:16:27+00:00,-0.3612,neg,0.285714,pos
1,1548802842621788161,@bp_plc What’s gets me with BP and Shell etc i...,0,0,0,0,2022-07-17 22:52:27+00:00,0.1511,neu,0.080093,pos
2,1548793871781576705,"THANK YOU, ExxonMobil, Shell, Chevron, BP, etc...",0,0,0,0,2022-07-17 22:16:48+00:00,0.7672,pos,0.0,neu
3,1548784670342631427,@johnredwood Brexit and the global pandemic is...,0,0,0,0,2022-07-17 21:40:15+00:00,-0.3182,neg,0.0,neu
4,1548779691158519814,These BP and Shell adverts are a massive pisst...,0,0,0,0,2022-07-17 21:20:27+00:00,-0.5574,neg,-0.118519,neg


In [260]:
df_bp['tblob_label'].value_counts()

pos    49
neu    30
neg    21
Name: tblob_label, dtype: int64

Nota: Mais uma vez, temos uma grande predominância de tweets positivos. Podemos alterar também o threshold neste caso.

In [306]:
# positive if score >= 0.2

df_bp['tblob_label'] = df_bp['tblob_score'].map(lambda pol: 'pos' if pol >=0.2 else ('neg' if pol <=-0.05 else 'neu'))

In [307]:
df_bp['tblob_label'].value_counts()

neu    47
pos    32
neg    21
Name: tblob_label, dtype: int64

In [308]:
len(df_bp[(df_bp['vader_label'] == df_bp['tblob_label'])])

48

VADER and TextBlob results are in agreement in only 48 of the 100 tweets.

In [309]:
print(df_bp[df_bp['tblob_label'] == 'pos']['text'].values)

["When so-called markets are rigged by cartels, monopoly, and a handful of insiders on Wall St.... they aren't markets, it's organized THEFT. \n\nRight Jeffrey?  It's called Econ 101.\n\n#GreenNewDeal \n#PriceControls\n\n@JeffBezos\n\n@APIenergy\n\n@exxonmobil\n\n@bp_America\n\n@Shell\n\n@Chevron https://t.co/YaztlZEuNZ"
 'Its funny how lurpack, shell,bp and the others are advertising all the time when its the ones ripping us off'
 'This episode will get you up-to-date with the latest H2 news from Shell’s renewable hydrogen plant to bp and thyssenkrupp’s plan to decarbonize steel production.\n\nListen Here: https://t.co/uPYcJznGvJ\n#hydrogen #renewables\n#infrastructure https://t.co/sDNQrpcVl5'
 '@AhkiraEssien lol i go to BP, Shell, or Exxon'
 '@bp_UK @Shell \nThe real #Villans #enemies of the #planet #carbonfootprint my #BrownCandyass \nhttps://t.co/KmPdj1nMJZ'
 '@Dean02600306 @steve8221 @FoxNews Shell; ExxonMobil; BP control how much profit they want to make; they are price gouging t

In [310]:
print(df_bp[df_bp['tblob_label'] == 'neg']['text'].values)

['These BP and Shell adverts are a massive pisstake. Knock the price down we don’t give a shit about your sandwiches'
 "South Africa has only one operational oil refinery. It ran out of crude oil this weekend hence shut down.\n\nBP and Shell halted their operations indefinitely at Sapref in February.\n\nAstron Energy's Cape Town facility, shut its doors in 2020.\nhttps://t.co/7eSkTnPBen"
 'But business as usual for the climate crisis deniers &amp; green washers \u2066@Shell_UKLtd\u2069 \u2066@bp_UK\u2069 \u2066@GWPF_org\u2069 \nForest fires rage across Europe as heatwave sends temperatures soaring https://t.co/JULcGS68kE'
 "UK's oil/gas company windfall tax mirage.\n\nImpact on BP and Shell - less than 2% of  Earnings Before Interest Tax Depreciation and Amortisation\n\nBP paid corp tax once in the last six years.\nShell paid no tax on its North Sea oil/gas operations for 4 yrs.\n https://t.co/iVI3yjETzV"
 '@SsemuHassan Within 10/15 years Shell, BP, etc have a dead business model as we

In [311]:
# number of positive tweets where Vader and TextBlob are in agreement

len(df_bp[(df_bp['vader_label'] == df_bp['tblob_label']) & (df_bp['vader_label'] == 'pos')])

15

In [312]:
# positive tweets where Vader and TextBlob are in agreement

print(df_bp[(df_bp['vader_label'] == df_bp['tblob_label']) & (df_bp['vader_label'] == 'pos')]['text'].values)

['@JDCocchiarella There are *5* reasons: "The top five oil companies alone—Shell, ExxonMobil, BP, Chevron, and ConocoPhillips—brought in more than 200 percent more in profits than in the first quarter of 2021. That is a total of more than $35 billion in profits in just three months."  5-17-22'
 'I love the Just Transition stuff from ScotGov, I particularly love how they lie to Scots about their care in tackling fuel poverty but sold off our ability to manage that. Derrrrrrp.\nScottish government in line for near-£700m payday after windfarm auction https://t.co/c2cuG7dO3H'
 "@phil_woods Ha Ha, David Bellamy, is that the best you've got !! 'gwapple me gwapenuts'. You must work for Exxon BP or Shell."
 '@timburchett @POTUS Great question for @Shell, @exxonmobil, @bp_America, @Chevron, @conocophillips: Why was US oil production so much higher in late 2019? Clearly the capacity is there.'
 '@GaryLineker Brilliant news, extra few degrees in the UK is lovely! Keep up the good work Shell and B

In [313]:
# number of negative tweets where Vader and TextBlob are in agreement

len(df_bp[(df_bp['vader_label'] == df_bp['tblob_label']) & (df_bp['vader_label'] == 'neg')])

15

In [314]:
# negative tweets where Vader and TextBlob are in agreement

print(df_bp[(df_bp['vader_label'] == df_bp['tblob_label']) & (df_bp['vader_label'] == 'neg')]['text'].values)

['These BP and Shell adverts are a massive pisstake. Knock the price down we don’t give a shit about your sandwiches'
 "South Africa has only one operational oil refinery. It ran out of crude oil this weekend hence shut down.\n\nBP and Shell halted their operations indefinitely at Sapref in February.\n\nAstron Energy's Cape Town facility, shut its doors in 2020.\nhttps://t.co/7eSkTnPBen"
 'But business as usual for the climate crisis deniers &amp; green washers \u2066@Shell_UKLtd\u2069 \u2066@bp_UK\u2069 \u2066@GWPF_org\u2069 \nForest fires rage across Europe as heatwave sends temperatures soaring https://t.co/JULcGS68kE'
 '@SsemuHassan Within 10/15 years Shell, BP, etc have a dead business model as we go electric vehicles and green energy production. Combine this with their losses in COVID, drives their ‘unified backroom handshake’ born from greed. There is no justification to raise pump prices - its artificial'
 "Don't cruise by today, we've got cold beer &amp; carbs a plenty for you

#### 3.5. Emotion Analysis with DepecheMood

In [315]:
rs = textacy.resources.DepecheMood(lang="en", word_rep="lemma", min_freq=2)

moods = []

for doc in corpus:
    
    mood = sorted(rs.get_emotional_valence(doc).items(), key=lambda x: x[1], reverse=True)
    
    moods.append({'mood': mood})

In [316]:
df_bp['mood'] = pd.DataFrame(moods)

In [317]:
df_bp

Unnamed: 0,id,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score,tblob_label,mood
0,1548808881756745728,"When so-called markets are rigged by cartels, ...",0,0,0,0,2022-07-17 23:16:27+00:00,-0.3612,neg,0.285714,pos,"[(ANGRY, 0.16903192740123762), (AMUSED, 0.1457..."
1,1548802842621788161,@bp_plc What’s gets me with BP and Shell etc i...,0,0,0,0,2022-07-17 22:52:27+00:00,0.1511,neu,0.080093,neu,"[(INSPIRED, 0.17990644696104294), (AMUSED, 0.1..."
2,1548793871781576705,"THANK YOU, ExxonMobil, Shell, Chevron, BP, etc...",0,0,0,0,2022-07-17 22:16:48+00:00,0.7672,pos,0.000000,neu,"[(INSPIRED, 0.18376356818290385), (AMUSED, 0.1..."
3,1548784670342631427,@johnredwood Brexit and the global pandemic is...,0,0,0,0,2022-07-17 21:40:15+00:00,-0.3182,neg,0.000000,neu,"[(INSPIRED, 0.16555429056590398), (ANNOYED, 0...."
4,1548779691158519814,These BP and Shell adverts are a massive pisst...,0,0,0,0,2022-07-17 21:20:27+00:00,-0.5574,neg,-0.118519,neg,"[(AMUSED, 0.1680677858344841), (DONT_CARE, 0.1..."
...,...,...,...,...,...,...,...,...,...,...,...,...
95,1548372177413541888,@StansaidAirport @AuthorKimberley Pfffft I don...,0,0,1,0,2022-07-16 18:21:09+00:00,-0.3653,neg,-0.333333,neg,"[(INSPIRED, 0.1676397316954369), (AMUSED, 0.14..."
96,1548364807996223489,@SteveDavis21069 @mikepompeo Hundreds of permi...,0,1,1,0,2022-07-16 17:51:52+00:00,0.0000,neu,0.000000,neu,"[(AMUSED, 0.15846664742654343), (INSPIRED, 0.1..."
97,1548358056601260036,Saving on fuel right now is a huge issue My lo...,0,0,0,0,2022-07-16 17:25:02+00:00,-0.1263,neg,-0.102381,neg,"[(AMUSED, 0.16326635464345143), (INSPIRED, 0.1..."
98,1548351914416295938,"Each of the UK's 4 biggest oil companies (BP, ...",0,0,2,1,2022-07-16 17:00:38+00:00,-0.1027,neg,0.155556,neu,"[(INSPIRED, 0.15131485798598468), (ANGRY, 0.14..."


In [318]:
df_bp['mood'].map(lambda x: len(x)).value_counts()

8    100
Name: mood, dtype: int64

It was possible to analyse the emotion of all the 100 tweets.

In [319]:
df_bp['top1_mood'] = df_bp['mood'].map(lambda x: x[0][0])
df_bp['top2_mood'] = df_bp['mood'].map(lambda x: x[1][0])
df_bp['top3_mood'] = df_bp['mood'].map(lambda x: x[2][0])

In [320]:
df_bp.drop(['id', 'mood'], axis=1, inplace=True)

In [321]:
df_bp

Unnamed: 0,text,retweets,replies,likes,quotes,created_at,vader_score,vader_label,tblob_score,tblob_label,top1_mood,top2_mood,top3_mood
0,"When so-called markets are rigged by cartels, ...",0,0,0,0,2022-07-17 23:16:27+00:00,-0.3612,neg,0.285714,pos,ANGRY,AMUSED,ANNOYED
1,@bp_plc What’s gets me with BP and Shell etc i...,0,0,0,0,2022-07-17 22:52:27+00:00,0.1511,neu,0.080093,neu,INSPIRED,AMUSED,ANGRY
2,"THANK YOU, ExxonMobil, Shell, Chevron, BP, etc...",0,0,0,0,2022-07-17 22:16:48+00:00,0.7672,pos,0.000000,neu,INSPIRED,AMUSED,ANNOYED
3,@johnredwood Brexit and the global pandemic is...,0,0,0,0,2022-07-17 21:40:15+00:00,-0.3182,neg,0.000000,neu,INSPIRED,ANNOYED,AMUSED
4,These BP and Shell adverts are a massive pisst...,0,0,0,0,2022-07-17 21:20:27+00:00,-0.5574,neg,-0.118519,neg,AMUSED,DONT_CARE,INSPIRED
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,@StansaidAirport @AuthorKimberley Pfffft I don...,0,0,1,0,2022-07-16 18:21:09+00:00,-0.3653,neg,-0.333333,neg,INSPIRED,AMUSED,ANNOYED
96,@SteveDavis21069 @mikepompeo Hundreds of permi...,0,1,1,0,2022-07-16 17:51:52+00:00,0.0000,neu,0.000000,neu,AMUSED,INSPIRED,ANNOYED
97,Saving on fuel right now is a huge issue My lo...,0,0,0,0,2022-07-16 17:25:02+00:00,-0.1263,neg,-0.102381,neg,AMUSED,INSPIRED,ANNOYED
98,"Each of the UK's 4 biggest oil companies (BP, ...",0,0,2,1,2022-07-16 17:00:38+00:00,-0.1027,neg,0.155556,neu,INSPIRED,ANGRY,AMUSED


In [322]:
df_bp['top1_mood'].value_counts()

INSPIRED     61
AMUSED       21
ANGRY        10
SAD           5
DONT_CARE     1
ANNOYED       1
AFRAID        1
Name: top1_mood, dtype: int64

In [323]:
print(df_bp[df_bp['top1_mood'] == 'ANGRY']['text'].values)

["When so-called markets are rigged by cartels, monopoly, and a handful of insiders on Wall St.... they aren't markets, it's organized THEFT. \n\nRight Jeffrey?  It's called Econ 101.\n\n#GreenNewDeal \n#PriceControls\n\n@JeffBezos\n\n@APIenergy\n\n@exxonmobil\n\n@bp_America\n\n@Shell\n\n@Chevron https://t.co/YaztlZEuNZ"
 "UK's oil/gas company windfall tax mirage.\n\nImpact on BP and Shell - less than 2% of  Earnings Before Interest Tax Depreciation and Amortisation\n\nBP paid corp tax once in the last six years.\nShell paid no tax on its North Sea oil/gas operations for 4 yrs.\n https://t.co/iVI3yjETzV"
 '@financialjuice Stop blaming the oil producers and blame oil companies  Shell  BP etc with record profits !!'
 '@afowler06 @GaryLineker Less than Shell or BP would pay him to shill for them.'
 'These fires 🔥 are not caused by the #heatwave - they are caused by the insane Greed of the Fossil Fuel Industry and their lawyer, PR &amp; government enablers @shell @bp_plc @exxonmobil @Exxon

Nota: os resultados da análise de sentimento/emoção para este caso parecem também não ser tão coerentes como no primeiro caso.

Um dos factores que influencia esta questão prende-se com o facto de muitos dos tweets serem irónicos/sarcásticos. Analisados literalmente parecem ser positivos quando na verdade são negativos.

Por outro lado, uma vez que o DepecheMood faz uma análise para 8 emoções (5 negativas e 3 positivas), podemos também usar esta avaliação para "validar" a label atribuída pelo Vader e pelo TextBlob.

In [325]:
# exportar resultados para .csv

df_bp.to_csv('bp.csv', index=False)