In [0]:
#imports of libraries required
import os
import re
import json
import pandas as pd
from pathlib import Path

In [0]:
#Loading of query_relations.json file. This file contains the hashtags and emojis associated with each emotion
relations_path = Path('query_relations.json')
with relations_path.open('r') as file:
    relations = json.load(file)

In [0]:
#If on colab, execute for installing GetOldTweets library
!pip install GetOldTweets3



In [0]:
emotion = 'sadness'
queries = [key for key, value in relations.items() if value == emotion]
print(queries)

[':crying_face:', '#sad', '#depressed', ':pensive_face:', ':loudly_crying_face:', ':broken_heart:', '#depression']


In [0]:
#Authentication tokens
consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

In [0]:
#Tweepy related imports
from tweepy import OAuthHandler, API, TweepError

In [0]:
#Authentication for tweepy
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = API(auth)
print('Successfully connected to the Twitter API.')

Successfully connected to the Twitter API.


In [0]:
#query for fetching tweets (hashtag or emoji)
query = '#depression'
max_requests = 90

In [0]:
#If on colab, execute for installing emoji library
!pip install emoji
from emoji import emojize



In [0]:
#Fetching tweets related to the hashtag query for each year in the years list. Execute when query is hashtag
import GetOldTweets3 as got
years = ["2015","2013","2017","2018","2019"]
tweets = []
for year in years:
  print(year)
  tweetCriteria = got.manager.TweetCriteria().setQuerySearch(query + " lang:en").setSince(year + "-01-01").setUntil(year+"-12-31").setEmoji("unicode").setMaxTweets(2000)
  tweets_year = got.manager.TweetManager.getTweets(tweetCriteria)
  tweets.extend(tweets_year)

2015
2013
2017
2018
2019


In [0]:
#Fetching tweets related to the emoji query. Execute when query is an emoji
q = emojize(query) + ' -filter:retweets'
searched_tweets = []
last_id = -1
request_count = 0
while request_count < max_requests:
    print(request_count)
    try:
        new_tweets = api.search(q=q,
                                lang='en',
                                count=100,
                                max_id=str(last_id - 1),
                                tweet_mode='extended')
        if not new_tweets:
            break
        searched_tweets.extend(new_tweets)
        last_id = new_tweets[-1].id
        request_count += 1
    except TweepError as e:
        print(e)
        break

0
1


In [0]:
#Converted into dataframe
data = []
for tweet in tweets:
    data.append([tweet.id, tweet.username, tweet.text])
df = pd.DataFrame(data=data, columns=['id', 'user', 'text'])
print(str(len(data)) + ' ' + query + ' tweets')

10000 #depression tweets


In [0]:
df.head()

Unnamed: 0,id,user,text
0,682350216482258944,giantrat,2015: when music destroyed #mentalhealth stigm...
1,682350216436137985,AlignTherapyUK,2015: when music destroyed #mentalhealth stigm...
2,682349307853406208,singforgood,Be happy in 2016. Enjoy a special #HealthyMeSu...
3,682349295396323328,CreativityAust,Be happy in 2016. Enjoy a special #HealthyMeSu...
4,682348727126876160,facebookguide2,RT screwdepblog Tell me which cover looks bett...


In [0]:
#Save to tweepy folder
PATH = Path('/tweepy').resolve()
filename = query + '.csv'
df.to_csv(os.path.join(PATH, filename), index=None)
print('Saved under: "' + PATH.as_posix() + '"')