In [1]:
import tweepy
import pandas as pd

In [10]:
from dotenv import dotenv_values

env_values = dotenv_values(".env") 

# Twitter API credentials   
API_KEY = env_values["API_KEY"]
API_SECRET = env_values["API_SECRET"]
ACCESS_TOKEN = env_values["ACCESS_TOKEN"]
ACCESS_SECRET = env_values["ACCESS_SECRET"]

# authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(API_KEY, API_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
api = tweepy.API(auth,wait_on_rate_limit=True)

In [3]:
tweets = []

def username_tweets_to_csv(username,count):
    """Scrape tweets from a Twitter user"""
    try:      
        # Creation of query method using parameters
        tweets = tweepy.Cursor(api.user_timeline,id=username).items(count)

        # Pulling information from tweets iterable object
        tweets_list = [[tweet.created_at, tweet.id, tweet.text] for tweet in tweets]

        # Creation of dataframe from tweets list
        # Add or remove columns as you remove tweet information
        tweets_df = pd.DataFrame(tweets_list,columns=['Datetime', 'Tweet Id', 'Text'])

        # Converting dataframe to CSV 
        tweets_df.to_csv('{}-tweets.csv'.format(username), sep=',', index = False)

    except BaseException as e:
          print('failed on_status,',str(e))
          time.sleep(3)

In [4]:
# Input username to scrape tweets and name csv file
# Max recent tweets pulls x amount of most recent tweets from that user
username = 'jack'
count = 150

# Calling function to turn username's past X amount of tweets into a CSV file
username_tweets_to_csv(username, count)

## Query by Text Search
Function is focused on completing the query then providing a CSV file of that query using pandas

In [5]:
tweets = []

def text_query_to_csv(text_query,count):
    """Scrape tweets from Twitter's API using a text query"""
    try:
        # Creation of query method using parameters
        tweets = tweepy.Cursor(api.search,q=text_query).items(count)


        all_tweet_text = set()
        tweets_list = []
        
        #tweets_list = [[tweet.created_at, tweet.id, tweet.text.encode("utf-8")] for tweet in tweets]
        for tweet in tweets:
            encoded_text = tweet.text
            if encoded_text not in all_tweet_text: #check for duplicates
                all_tweet_text.add(encoded_text)
                tweets_list.append([tweet.created_at, tweet.id, encoded_text])

        # Creation of dataframe from tweets list
        # Add or remove columns as you remove tweet information
        tweets_df = pd.DataFrame(tweets_list,columns=['Datetime', 'Tweet Id', 'Text'])

        # Converting dataframe to CSV 
        tweets_df.to_csv('data/{}-tweets.csv'.format(text_query), sep=',', index = False)

    except BaseException as e:
        print('failed on status,',str(e))
        time.sleep(5)

In [6]:

search_texts = ['Reddit', 'Twitter', 'Soccer', 'Basketball', 'Football', 'USA Election 2020', 'Americans', 'Music']

count = 150

for search_text in search_texts:
    text_query_to_csv(search_text, count)

In [9]:
df = pd.read_csv('data/Soccer-tweets.csv', encoding='utf8')
df.head()

Unnamed: 0,Datetime,Tweet Id,Text
0,2021-11-05 07:33:25,1456525042104213509,RT @Pokemon_cojp: 『ポケモン ブリリアントダイヤモンド・シャイニングパール...
1,2021-11-05 07:33:23,1456525032461635586,RT @SportswaveAndre: PHOTO ~ 4 November 2021 p...
2,2021-11-05 07:33:21,1456525025264214022,@bellasoccer_ Huge W KCP Bella soccer
3,2021-11-05 07:33:14,1456524995719467011,RT @SportswaveAndre: PHOTO ~ #Soccer ~ Wins fo...
4,2021-11-05 07:33:11,1456524981546934274,RT @SportswaveAndre: PHOTO ~ #Soccer ~ Draws f...
