In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/My Drive/Colab Notebooks/tcc')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [2]:
import tensorflow as tf
import pickle
import pandas as pd

from preprocessing import *

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_lg')


In [None]:
class EmotionDetect:
    def __init__(self, vectors_name, w_folder):
        self.w_folder = w_folder
        self.size_word2vec = 300
        self.max_sequences = 35
        # self.vectors = pickle.load(open('w2v_stop'+str(self.size_word2vec)+'.pkl', 'rb'))
        self.vectors = pickle.load(open('models/ft_'+str(self.size_word2vec)+'.pkl', 'rb'))
        self.emotions = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise']
        self.tokenizer = Tokenizer()
        self.loadModels()

    def model_gru(self, embedding_matrix, embed_size):
        # load json and create model
        json_file = open('models/bi-gru300.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        return tf.keras.models.model_from_json(loaded_model_json)

    def loadModels(self):
        self.models = dict()
        for s in self.emotions:
            self.models[s] = self.model_gru(self.vectors.wv.vectors, self.size_word2vec)
            self.models[s].load_weights('models/'+s+ '_weights.h5')
    
    def getIndex(self, t):
        try:
            return self.vectors.wv.vocab[t].index
        except:
            return 0

    def predict(self, text):
        x = self.tokenizer.cleanText(text)
        x = [self.getIndex(t) for t in x]
        x = tf.keras.preprocessing.sequence.pad_sequences([x], maxlen=self.max_sequences)
        return dict(zip(self.emotions, [self.models[s].predict(x)[0][0] for s in self.emotions]))

In [None]:
classifier = EmotionDetect('models/ft_300.pkl', 'models')

In [5]:
from datetime import timedelta, date
!pip install GetOldTweets3
import GetOldTweets3 as got

def daterange(start_date, end_date):
    for n in range(int ((end_date - start_date).days)):
        yield start_date + timedelta(n)

def get_data(word, year=2020, month=1):

    start_date = date(year, month, 1)
    end_date = date(year, month+1, 1)

    tweets = []
    for single_date in tqdm(daterange(start_date, end_date), total=(end_date - start_date).days):
        # print(single_date)
        day = single_date.strftime("%Y-%m-%d")
        dayPlus = (single_date + timedelta(days=1)).strftime("%Y-%m-%d")

        tweetCriteria = got.manager.TweetCriteria().setQuerySearch(word)\
                                            .setSince(day)\
                                            .setUntil(dayPlus)\
                                            .setLang('en')\
                                            .setTopTweets(True)\
                                            .setMaxTweets(1000)
        tweet = got.manager.TweetManager.getTweets(tweetCriteria)

        for t in (tweet):
            emo = classifier.predict(t.text)
            tweets.append({
                        'date': t.date,
                        'id': t.id,
                        'text': t.text,
                        'favorites': t.favorites,
                        'retweets': t.retweets,
                        'replies': t.replies,

                        'anger' : int(emo['anger'] > 0.11),
                        'disgust' : int(emo['disgust'] > 0.1),
                        'fear': int(emo['fear'] > 0.1),
                        'joy' : int(emo['joy'] > 0.1),
                        'sadness' : int(emo['sadness'] > 0.1),
                        'surprise': int(emo['surprise'] > 0.1),

                        'anger_pred': emo['anger'],
                        'disgust_pred' : emo['disgust'],
                        'fear_pred': emo['fear'],
                        'joy_pred': emo['joy'],
                        'sadness_pred': emo['sadness'],
                        'surprise_pred': emo['surprise']
                        })
        aux = pd.DataFrame(tweets)
        aux.to_pickle('data/data_autosave.csv')
    return tweets


Collecting GetOldTweets3
  Downloading https://files.pythonhosted.org/packages/ed/f4/a00c2a7c90801abc875325bb5416ce9090ac86d06a00cc887131bd73ba45/GetOldTweets3-0.0.11-py3-none-any.whl
Collecting pyquery>=1.2.10
  Downloading https://files.pythonhosted.org/packages/78/43/95d42e386c61cb639d1a0b94f0c0b9f0b7d6b981ad3c043a836c8b5bc68b/pyquery-1.4.1-py2.py3-none-any.whl
Collecting cssselect>0.7.9
  Downloading https://files.pythonhosted.org/packages/3b/d4/3b5c17f00cce85b9a1e6f91096e1cc8e8ede2e1be8e96b87ce1ed09e92c5/cssselect-1.1.0-py2.py3-none-any.whl
Installing collected packages: cssselect, pyquery, GetOldTweets3
Successfully installed GetOldTweets3-0.0.11 cssselect-1.1.0 pyquery-1.4.1


In [None]:
search = 'trump'
for y, m in month_year_iter(9, 2019, 4, 2020):
    name = str(y)+str(m)
    tweets = get_data(search, year=y, month=m)
    df = pd.DataFrame(tweets)
    df.to_pickle('data/tweets_'+search+'_'+name+'.csv')

    df['date'] = pd.to_datetime(df['date']).dt.normalize()
    result = df.set_index('date')[['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise']].resample("D").sum()
    plot = result.plot.bar(stacked=True, figsize=(20,10))
    plot.figure.savefig('figures/sent_'+search+'_'+name+'.jpg')

 30%|███       | 9/30 [37:38<1:29:09, 254.75s/it]

In [None]:
result

In [None]:
plot.figure.savefig('test.jpg')