In [0]:
#imports
import os
import sys
from pathlib import Path
import pickle

In [2]:
!pip install GetOldTweets3

Collecting GetOldTweets3
  Downloading https://files.pythonhosted.org/packages/ed/f4/a00c2a7c90801abc875325bb5416ce9090ac86d06a00cc887131bd73ba45/GetOldTweets3-0.0.11-py3-none-any.whl
Collecting pyquery>=1.2.10
  Downloading https://files.pythonhosted.org/packages/78/43/95d42e386c61cb639d1a0b94f0c0b9f0b7d6b981ad3c043a836c8b5bc68b/pyquery-1.4.1-py2.py3-none-any.whl
Collecting cssselect>0.7.9
  Downloading https://files.pythonhosted.org/packages/3b/d4/3b5c17f00cce85b9a1e6f91096e1cc8e8ede2e1be8e96b87ce1ed09e92c5/cssselect-1.1.0-py2.py3-none-any.whl
Installing collected packages: cssselect, pyquery, GetOldTweets3
Successfully installed GetOldTweets3-0.0.11 cssselect-1.1.0 pyquery-1.4.1


In [3]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

import re
import nltk
nltk.download('stopwords')
from time import time
!pip install emoji
from emoji import demojize

#Preprocessing functions. Again the preprocessing functions in old_dataset_data_and_models can also be used
def preprocess(texts, quiet=False):
  start = time()
  # Lowercasing
  texts = texts.str.lower()

  # Remove special chars
  texts = texts.str.replace(r"(http|@)\S+", "")
  texts = texts.apply(demojize)
  texts = texts.str.replace(r"::", ": :")
  texts = texts.str.replace(r"’", "'")
  texts = texts.str.replace(r"[^a-z\':_]", " ")

  # Remove repetitions
  pattern = re.compile(r"(.)\1{2,}", re.DOTALL)
  texts = texts.str.replace(pattern, r"\1")

  # Transform short negation form
  texts = texts.str.replace(r"(can't|cannot)", 'can not')
  texts = texts.str.replace(r"n't", ' not')

  # Remove stop words
  stopwords = nltk.corpus.stopwords.words('english')
  stopwords.remove('not')
  stopwords.remove('nor')
  stopwords.remove('no')
  texts = texts.apply(
    lambda x: ' '.join([word for word in x.split() if word not in stopwords])
  )

  if not quiet:
    print("Time to clean up: {:.2f} sec".format(time() - start))

  return texts

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
Collecting emoji
[?25l  Downloading https://files.pythonhosted.org/packages/40/8d/521be7f0091fe0f2ae690cc044faf43e3445e0ff33c574eae752dd7e39fa/emoji-0.5.4.tar.gz (43kB)
[K     |████████████████████████████████| 51kB 1.7MB/s 
[?25hBuilding wheels for collected packages: emoji
  Building wheel for emoji (setup.py) ... [?25l[?25hdone
  Created wheel for emoji: filename=emoji-0.5.4-cp36-none-any.whl size=42176 sha256=e33ca33a86c00aa09fae3e78169eebcaeb177a8bbb9bb76fc9686d1286f76e12
  Stored in directory: /root/.cache/pip/wheels/2a/a9/0a/4f8e8cce8074232aba240caca3fade315bb49fac68808d1a9c
Successfully built emoji
Installing collected packages: emoji
Successfully installed emoji-0.5.4


In [0]:
import GetOldTweets3 as got
import pandas as pd
import numpy as np
from tensorflow.keras.layers import Input, Embedding, SpatialDropout1D, LSTM
from tensorflow.keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D
from tensorflow.keras.layers import Bidirectional, Conv1D, Dense, concatenate
from tensorflow.keras.models import Model

#function for prediction of emotion of tweets of specific student
def predict_emotion_for_student(username,date,write="true"):
  tweetCriteria = got.manager.TweetCriteria().setUsername(username).setSince(date).setEmoji("unicode")
  tweets = got.manager.TweetManager.getTweets(tweetCriteria)

  if len(tweets)>0:

    data = []
    for tweet in tweets:
        data.append([tweet.id, tweet.username, tweet.text])
    df = pd.DataFrame(data=data, columns=['id', 'user', 'text'])
    #print(str(len(data)) + ' tweets')

    tokenizer_path = Path('tokenizer.pickle').resolve()
    with tokenizer_path.open('rb') as file:
      tokenizer = pickle.load(file)
    
    input_dim = min(tokenizer.num_words, len(tokenizer.word_index) + 1)
    num_classes = 4
    embedding_dim = 100
    input_length = 142
    lstm_units = 128
    lstm_dropout = 0.1
    recurrent_dropout = 0.1
    spatial_dropout=0.2
    filters=64
    kernel_size=4
    #print(input_dim)
    #print(len(tokenizer.word_index))

    input_layer = Input(shape=(input_length,))
    output_layer = Embedding(
    input_dim=input_dim,
    output_dim=50,
    input_shape=(input_length,)
    )(input_layer)

    output_layer = SpatialDropout1D(spatial_dropout)(output_layer)

    output_layer = Bidirectional(
    LSTM(128, return_sequences=True,
        dropout=lstm_dropout, recurrent_dropout=recurrent_dropout)
    )(output_layer)
    output_layer = Conv1D(filters, kernel_size=kernel_size, padding='valid',
                        kernel_initializer='glorot_uniform')(output_layer)

    avg_pool = GlobalAveragePooling1D()(output_layer)
    max_pool = GlobalMaxPooling1D()(output_layer)
    output_layer = concatenate([avg_pool, max_pool])

    output_layer = Dense(num_classes, activation='softmax')(output_layer)

    model = Model(input_layer, output_layer)
    model_weights_path = Path('model_weights88.h5').resolve()
    model.load_weights(model_weights_path.as_posix())

    encoder_path = Path('encoder.pickle').resolve()
    with encoder_path.open('rb') as file:
      encoder = pickle.load(file)

    cleaned_data = preprocess(df.text)
    sequences = [text.split() for text in cleaned_data]
    list_tokenized = tokenizer.texts_to_sequences(sequences)
    x_data = pad_sequences(list_tokenized, maxlen=100)

    y_pred = model.predict(x_data)

    #print(df.text)
    print(encoder.inverse_transform(y_pred[0:]))
    predictions = encoder.inverse_transform(y_pred[0:])

    rs_df = pd.DataFrame({"text":df.text, "label":predictions})
    if write:
      print(rs_df)
    return predictions
  
  lis = []
  return lis


In [5]:
pr = predict_emotion_for_student("mansi_sampat","2020-05-18")

Time to clean up: 0.18 sec
['joy' 'joy' 'joy' 'joy' 'anger' 'fear' 'anger' 'fear' 'joy']
                                                text  label
0  This is the same story everywhere. We SVNIT st...    joy
1               #twinglish #grammarhelp #writebetter    joy
2  Another attempt to improve some vocal skills🤞😅...    joy
3  I have been working on improving my verbal ski...    joy
4  The is irksome to witness the proclivity of th...  anger
5  Since our insti is so adamant about conducting...   fear
6  Additionally, our syllabus which would've take...  anger
7  Sir, I agree to the fact that students build u...   fear
8  We demand considering our point of view before...    joy


In [0]:
#Fetching of SVNIT student twitter handles
emotions = []
users = []
file = open('twitter_handles', 'r')
for line in file:
  users.append(line[:len(line)-1])

for user in users:
  print(user)
  em = predict_emotion_for_student(user,"2020-05-20",False)
  emotions.extend(em)

Maulikchevli
AdityaHirapara
Time to clean up: 0.00 sec
['joy']
KShah2207
therohitramesh
Time to clean up: 0.01 sec
['fear' 'joy' 'joy' 'joy' 'joy' 'sadness' 'sadness' 'joy' 'fear' 'joy'
 'anger']
DhavalR33750364
yashdabhade19
Aaditya27662146
ThampiAtheesh
shameel_zeshan
18raj98
Time to clean up: 0.01 sec
['sadness' 'joy' 'joy' 'joy' 'fear' 'joy' 'joy' 'joy' 'joy' 'joy' 'joy'
 'fear' 'joy' 'joy' 'joy' 'joy' 'joy' 'anger' 'joy']
MitanshuThakore
vaibhavgeek
Time to clean up: 0.01 sec
['joy' 'anger' 'anger' 'joy' 'fear' 'joy' 'joy' 'joy']
AniketSur4
saneJokerrr
Time to clean up: 0.01 sec
['joy' 'fear' 'joy' 'joy' 'joy' 'joy' 'joy' 'joy' 'joy' 'joy' 'joy' 'joy'
 'joy' 'anger' 'anger' 'anger' 'fear']
Sourab98
ShyamSB21598
vin_sonkar
Time to clean up: 0.00 sec
['joy' 'joy']
Yashpatels2
neetashpataria
yash109916
Rogink13
Time to clean up: 0.00 sec
['fear']
anasabbas
Time to clean up: 0.01 sec
['joy' 'joy' 'joy' 'joy' 'fear' 'joy' 'joy' 'fear']
ajayns_
Time to clean up: 0.01 sec
['fear' 'joy' '

In [0]:
number_joy = 0
number_sadness = 0
number_anger = 0
number_fear = 0
for emotion in emotions:
  if emotion == "joy":
    number_joy = number_joy + 1
  elif emotion == "sadness":
    number_sadness = number_sadness + 1
  elif emotion == "anger":
    number_anger = number_anger + 1
  elif emotion == "fear":
    number_fear = number_fear + 1 

In [0]:
#Percentage of emotions of SVNIT students
print("Anger = " + str(number_anger/len(emotions)))
print("Fear = " + str(number_fear/len(emotions)))
print("Joy = " + str(number_joy/len(emotions)))
print("Sadness = " + str(number_sadness/len(emotions)))

Anger = 0.1341772151898734
Fear = 0.15443037974683543
Joy = 0.6658227848101266
Sadness = 0.04556962025316456
