# This file extracts tweets for a given period for a stock
# It then gets details needed for the sentiment analysis
# It saves these to a file tweetdetails.csv
# It also gets corresponding data for the period for the same stock from a financial instituion such as Yahoo Finance / Nasdaq
# and saves to a file

In [137]:
import tweepy as twitter
import os
import dotenv

import pandas as pd

from dotenv import load_dotenv

In [None]:
# get the API access keys from file .env

load_dotenv()  # take environment variables from .env.
config = dotenv.dotenv_values(".env")   # config = {"USER": "foo", "EMAIL": "foo@example.org"}

print(config)

# keys and tokens from the Twitter Dev Console
consumer_key = config.get('consumer_key')
consumer_secret = config.get('consumer_secret')
access_token = config.get('access_token')
access_token_secret = config.get('access_token_secret')
bearer_token = config.get('bearer_token')


setup access info for Twitter


In [139]:
auth = twitter.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
auth.access_token = access_token
API = twitter.API(auth)

client = twitter.Client(bearer_token)
print(client)


<tweepy.client.Client object at 0x7fa44aacf4c0>


In [140]:
# function to get tweet details
def getTweetDetails():

    # Replace with your own search query
    query = 'TSLA'

    # Replace with time period of your choice
    start_time = '2022-04-11T00:00:00Z'

    # Replace with time period of your choice
    end_time = '2022-04-17T00:00:00Z'

    tweets = client.search_recent_tweets(query=query, tweet_fields=['context_annotations', 'author_id',
                                                                    'created_at', 'entities', 'public_metrics'],
                                  user_fields=['id', 'username', 'public_metrics', 'verified'], expansions='author_id',
                                  start_time=start_time,
                                  end_time=end_time, max_results=100)

    # tweets = API.search_all_tweets(query=query, tweet_fields=['context_annotations', 'created_at'],

    # for each tweet get the user details as well
    users = {u["id"]: u for u in tweets.includes['users']}

    # Creating DataFrame using pandas
    db = pd.DataFrame(columns=['tweet_created_at',
                               'tweet_user_id',
                               'tweet_entities',
                               'tweet_public_metrics',
                               'tweet_text',
                               'tweet_annotations',
                               'tweet_user_id',
                               'tweet_user_name',
                               'tweet_user_metrics',
                               'tweet_user_verified'])

    # Counter to maintain Tweet Count
    n = 1

    for tweet in tweets.data:
        tweet_created_at = tweet.created_at
        tweet_user_id = tweet.author_id
        tweet_entities = tweet.entities
        tweet_public_metrics = tweet.public_metrics
        tweet_text = tweet.text
        if len(tweet.context_annotations) > 0:
            tweet_annotations = tweet.context_annotations
        #print("======================USER DETAILS===================================")
        if users[tweet.author_id]:
            user = users[tweet.author_id]
            tweet_user_id = user.id
            tweet_user_name = user.name
            tweet_user_metrics = user.public_metrics
            tweet_user_verified =  user.verified
        #print("================================NEXT TWEET====================================")


        # Here we are appending all the
        # extracted information in the DataFrame
        nth_tweet = [tweet_created_at,
                     tweet_user_id,
                     tweet_entities,
                     tweet_public_metrics,
                     tweet_text,
                     tweet_annotations,
                     tweet_user_id,
                     tweet_user_name,
                     tweet_user_metrics,
                     tweet_user_verified]
        db.loc[len(db)] = nth_tweet

        # Function call to print tweet data on screen
        n = n+1
    filename = 'tweet_details.csv'

    # we will save our database as a CSV file.
    db.to_csv(filename)


In [141]:

# function to perform data extraction
def getTweets(words, date_since, numtweet):

        # Creating DataFrame using pandas
        db = pd.DataFrame(columns=['username',
                                   'description',
                                   'location',
                                   'following',
                                   'followers',
                                   'totaltweets',
                                   'retweetcount',
                                   'text',
                                   'hashtags'])

        # We are using .Cursor() to search
        # through twitter for the required tweets.
        # The number of tweets can be
        # restricted using .items(number of tweets)
        tweets = twitter.Cursor(API.search_tweets,
                               words, lang="en",
                               since_id=date_since,
                               tweet_mode='extended').items(numtweet)


        # .Cursor() returns an iterable object. Each item in
        # the iterator has various attributes
        # that you can access to
        # get information about each tweet
        list_tweets = [tweet for tweet in tweets]

        # Counter to maintain Tweet Count
        i = 1

        # we will iterate over each tweet in the
        # list for extracting information about each tweet
        for tweet in list_tweets:
                username = tweet.user.screen_name
                description = tweet.user.description
                location = tweet.user.location
                following = tweet.user.friends_count
                followers = tweet.user.followers_count
                totaltweets = tweet.user.statuses_count
                retweetcount = tweet.retweet_count
                hashtags = tweet.entities['hashtags']

                # Retweets can be distinguished by
                # a retweeted_status attribute,
                # in case it is an invalid reference,
                # except block will be executed
                try:
                        text = tweet.retweeted_status.full_text
                except AttributeError:
                        text = tweet.full_text
                hashtext = list()
                for j in range(0, len(hashtags)):
                        hashtext.append(hashtags[j]['text'])

                # Here we are appending all the
                # extracted information in the DataFrame
                ith_tweet = [username, description,
                             location, following,
                             followers, totaltweets,
                             retweetcount, text, hashtext]
                db.loc[len(db)] = ith_tweet

                # Function call to print tweet data on screen
                printtweetdata(i, ith_tweet)
                i = i+1
        filename = 'tweet__cursor_details.csv'

        # we will save our database as a CSV file.
        db.to_csv(filename)



In [142]:
def main():

    getTweetDetails()

    aapl_tweets = API.search_tweets(q = '$AAPL')

    # Enter Hashtag and initial date
    # Enter Twitter Stock to search for
    words = "TSLA"
    # Enter Date since The Tweets are required in yyyy-mm--dd"
    date_since = "2022-03-01"

    # number of tweets you want to extract in one run
    numtweet = 100
    getTweets(words, date_since, numtweet)


    # SaveData
    # what is current directory?
    cwd = os.getcwd()
    print("Current directory = " + cwd)
    # save it to a file in data_sets directory
    file2 = open(r"./datasets/twitter_api_search_query_results.txt","w+")
    # write all tweets to the file
    for tweet in aapl_tweets:
        file2.write("{}\n".format(tweet))
        #print(tweet['created_at'], tweet['id'], tweet['user']['id'], tweet['user']['name'], tweet['user']['followers_count'], tweet['text'])
    # close the file handle
    file2.close()
    
# function to display data of each tweet
def printtweetdata(n, ith_tweet):
        print()
        print(f"Tweet {n}:")
        print(f"Username:{ith_tweet[0]}")
        print(f"Description:{ith_tweet[1]}")
        print(f"Location:{ith_tweet[2]}")
        print(f"Following Count:{ith_tweet[3]}")
        print(f"Follower Count:{ith_tweet[4]}")
        print(f"Total Tweets:{ith_tweet[5]}")
        print(f"Retweet Count:{ith_tweet[6]}")
        print(f"Tweet Text:{ith_tweet[7]}")
        print(f"Hashtags Used:{ith_tweet[8]}")
        

if __name__ == "__main__":
    # calling main function
    main()



Tweet 1:
Username:sandyorgn
Description:retired veteran/day and swing trader#livingthedream #TTGlifetime
Location:Sandy, OR
Following Count:255
Follower Count:72
Total Tweets:1557
Retweet Count:2
Tweet Text:*HUGE PROBLEM* 

THIS IS NOT 2020 or 2021's STOCK MARKET!
 
Traders Need To Be Aware of Top 10 Trading Mistakes to Avoid w/ This Week's Stock Market &amp; Trading Predictions LIVE @ 8:30pm ET 4/17/22 on YouTube!
 
Watch Live: https://t.co/Vu33l7GXrK

#ElonMuskBuyTwitter $twtr $tsla https://t.co/jWmxubgDQV
Hashtags Used:[]

Tweet 2:
Username:frozenlovedmeso
Description:
Location:
Following Count:774
Follower Count:94
Total Tweets:25521
Retweet Count:160
Tweet Text:Between April 1 and April 5, Elon Musk lost an important motion in federal court—in secret. As you can see, Document 387 is missing from the docket. As of today, we know it's a court order from Judge Chen that was lodged under seal. $TSLA https://t.co/MyBxnrpNl8
Hashtags Used:[]

Tweet 3:
Username:jtaylor668
Description:
L