# Fetch Tweets

Donwload and save tweets, using a **query** value

In [2]:
from dotenv import load_dotenv
from pathlib import Path

env_path = Path('../.env').resolve()
load_dotenv(dotenv_path=env_path)

True

## API access

First of all, we'll connect to the Twitter API

In [3]:
import os

In [4]:
consumer_key = os.getenv("CONSUMER_KEY")
consumer_secret = os.getenv("CONSUMER_SECRET")
access_token = os.getenv("ACCESS_TOKEN")
access_token_secret = os.getenv("ACCESS_TOKEN_SECRET")

In [5]:
from tweepy import OAuthHandler, API, TweepError

In [6]:
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = API(auth)
print('Successfully connected to the Twitter API.')

Successfully connected to the Twitter API.


## Search Tweets

Now we can define our query and search for the tweets containing it.

- **query**: *hashtag* or *emoji* that will be used to fetch the tweets
- **max_requests**: Maximum number of requests to the API.
    - Restriction: 180 requests / 15 min window

In [7]:
query = '#witty'
max_requests = 180

In [8]:
# Converts aliases to the real emoji representation (e.g. :thumbs_up: => 👍)

from emoji import emojize

In [9]:
q = emojize(query) + ' -filter:retweets'
searched_tweets = []
last_id = -1
request_count = 0
while request_count < max_requests:
    try:
        new_tweets = api.search(q=q,
                                lang='en',
                                count=100,
                                max_id=str(last_id - 1),
                                tweet_mode='extended')
        if not new_tweets:
            break
        searched_tweets.extend(new_tweets)
        last_id = new_tweets[-1].id
        request_count += 1
    except TweepError as e:
        print(e)
        break

## Format and save

Format the API data to the desired structure and save a `.csv` file

In [10]:
import pandas as pd

In [11]:
data = []
for tweet in searched_tweets:
    data.append([tweet.id, tweet.created_at, tweet.user.screen_name, tweet.full_text])
df = pd.DataFrame(data=data, columns=['id', 'date', 'user', 'text'])
print(str(len(data)) + ' ' + query + ' tweets')

216 #witty tweets


In [12]:
df.head()

Unnamed: 0,id,date,user,text
0,1338550821454295055,2020-12-14 18:25:59,BlackCloud1966,You FAILED on this vaccine hype.\n#Witty\nSo w...
1,1338526755410284545,2020-12-14 16:50:21,paraelwhatsapp,ornaments via /r/funny https://t.co/Y1HiXq2gd9...
2,1338526747130814467,2020-12-14 16:50:19,paraelwhatsapp,Between Two Ferns bloobers are hilarious and w...
3,1338511666745630730,2020-12-14 15:50:24,paraelwhatsapp,Husky (metal) Breakdown via /r/funny https://t...
4,1338511660412231683,2020-12-14 15:50:22,paraelwhatsapp,"Once you see Cookie Monster, you can’t unsee i..."


In [13]:
PATH = Path('../datasets/tweepy').resolve()
filename = query + '.csv'
df.to_csv(os.path.join(PATH, filename), index=None)
print('Saved under: "' + PATH.as_posix() + '"')

Saved under: "C:/Users/leena/Desktop/Python Projects/emotion-from-tweet/datasets/tweepy"
