# this file holds how to access twitter, reddit and yahoo finance code for datasets
#
# Twitter access code - get tweets for a single stock

In [104]:
import tweepy as twitter
import os
import dotenv

import pandas as pd

from dotenv import load_dotenv

In [None]:
# get the API access keys from file .env

load_dotenv()  # take environment variables from .env.
config = dotenv.dotenv_values(".env")   # config = {"USER": "foo", "EMAIL": "foo@example.org"}

print(config)

# keys and tokens from the Twitter Dev Console
consumer_key = config.get('consumer_key')
consumer_secret = config.get('consumer_secret')
access_token = config.get('access_token')
access_token_secret = config.get('access_token_secret')
bearer_token = config.get('bearer_token')


setup access info for Twitter


In [106]:
auth = twitter.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
auth.access_token = access_token
API = twitter.API(auth)

client = twitter.Client(bearer_token)
print(client)


<tweepy.client.Client object at 0x7fa44b02ee90>


In [110]:
# function to get tweet details
def getTweetDetails():

    # Replace with your own search query
    query = 'TSLA'

    # Replace with time period of your choice
    start_time = '2022-04-11T00:00:00Z'

    # Replace with time period of your choice
    end_time = '2022-04-17T00:00:00Z'

    tweets = client.search_recent_tweets(query=query, tweet_fields=['context_annotations', 'author_id',
                                                                    'created_at', 'entities', 'public_metrics'],
                                  user_fields=['id', 'username', 'public_metrics', 'verified'], expansions='author_id',
                                  start_time=start_time,
                                  end_time=end_time, max_results=100)

    # tweets = API.search_all_tweets(query=query, tweet_fields=['context_annotations', 'created_at'],
    #tweets = client.search_recent_tweets(query=query, tweet_fields=['context_annotations', 'created_at'],
    #                                 user_fields=['profile_image_url'], expansions='author_id', max_results=100)

    # for each tweet get the user details as well
    users = {u["id"]: u for u in tweets.includes['users']}

    for tweet in tweets.data:
        print("CREATED AT ====> ", tweet.created_at)
        print("AUTHOR Id ====> ", tweet.author_id)
        print("ENTITIES ====> ", tweet.entities)
        print("PUBLIC METRICS ====> ", tweet.public_metrics)
        print("TWEET TEXT ====> ", tweet.text)
        if len(tweet.context_annotations) > 0:
            print("TWEET ANNOTATIONS ====> ", tweet.context_annotations)
        print("======================USER DETAILS===================================")
        if users[tweet.author_id]:
            user = users[tweet.author_id]
            print("USER ID ====> ", user.id)
            print("USER name ====> ", user.name)
            print("USER PUBLIC METRICS ====> ", user.public_metrics)
            print("USER VERIFIED ====> ", user.verified)
        print("================================NEXT TWEET====================================")






In [111]:

# function to perform data extraction
def getTweets(words, date_since, numtweet):

        # Creating DataFrame using pandas
        db = pd.DataFrame(columns=['username',
                                   'description',
                                   'location',
                                   'following',
                                   'followers',
                                   'totaltweets',
                                   'retweetcount',
                                   'text',
                                   'hashtags'])

        # We are using .Cursor() to search
        # through twitter for the required tweets.
        # The number of tweets can be
        # restricted using .items(number of tweets)
        tweets = twitter.Cursor(API.search_tweets,
                               words, lang="en",
                               since_id=date_since,
                               tweet_mode='extended').items(numtweet)


        # .Cursor() returns an iterable object. Each item in
        # the iterator has various attributes
        # that you can access to
        # get information about each tweet
        list_tweets = [tweet for tweet in tweets]

        # Counter to maintain Tweet Count
        i = 1

        # we will iterate over each tweet in the
        # list for extracting information about each tweet
        for tweet in list_tweets:
                username = tweet.user.screen_name
                description = tweet.user.description
                location = tweet.user.location
                following = tweet.user.friends_count
                followers = tweet.user.followers_count
                totaltweets = tweet.user.statuses_count
                retweetcount = tweet.retweet_count
                hashtags = tweet.entities['hashtags']

                # Retweets can be distinguished by
                # a retweeted_status attribute,
                # in case it is an invalid reference,
                # except block will be executed
                try:
                        text = tweet.retweeted_status.full_text
                except AttributeError:
                        text = tweet.full_text
                hashtext = list()
                for j in range(0, len(hashtags)):
                        hashtext.append(hashtags[j]['text'])

                # Here we are appending all the
                # extracted information in the DataFrame
                ith_tweet = [username, description,
                             location, following,
                             followers, totaltweets,
                             retweetcount, text, hashtext]
                db.loc[len(db)] = ith_tweet

                # Function call to print tweet data on screen
                printtweetdata(i, ith_tweet)
                i = i+1
        filename = 'tweet_details.csv'

        # we will save our database as a CSV file.
        db.to_csv(filename)



In [112]:
def main():

    getTweetDetails()

    aapl_tweets = API.search_tweets(q = '$AAPL')

    # Enter Hashtag and initial date
    # Enter Twitter Stock to search for
    words = "TSLA"
    # Enter Date since The Tweets are required in yyyy-mm--dd"
    date_since = "2022-03-01"

    # number of tweets you want to extract in one run
    numtweet = 100
    getTweets(words, date_since, numtweet)


    # SaveData
    # what is current directory?
    cwd = os.getcwd()
    print("Current directory = " + cwd)
    # save it to a file in data_sets directory
    file2 = open(r"./datasets/twitter_api_search_query_results.txt","w+")
    # write all tweets to the file
    for tweet in aapl_tweets:
        file2.write("{}\n".format(tweet))
        #print(tweet['created_at'], tweet['id'], tweet['user']['id'], tweet['user']['name'], tweet['user']['followers_count'], tweet['text'])
    # close the file handle
    file2.close()
    
# function to display data of each tweet
def printtweetdata(n, ith_tweet):
        print()
        print(f"Tweet {n}:")
        print(f"Username:{ith_tweet[0]}")
        print(f"Description:{ith_tweet[1]}")
        print(f"Location:{ith_tweet[2]}")
        print(f"Following Count:{ith_tweet[3]}")
        print(f"Follower Count:{ith_tweet[4]}")
        print(f"Total Tweets:{ith_tweet[5]}")
        print(f"Retweet Count:{ith_tweet[6]}")
        print(f"Tweet Text:{ith_tweet[7]}")
        print(f"Hashtags Used:{ith_tweet[8]}")
        

if __name__ == "__main__":
    # calling main function
    main()
    

CREATED AT ====>  2022-04-16 23:59:51+00:00
AUTHOR Id ====>  213115518
ENTITIES ====>  {'annotations': [{'start': 89, 'end': 91, 'probability': 0.4939, 'type': 'Organization', 'normalized_text': 'CCP'}, {'start': 104, 'end': 108, 'probability': 0.622, 'type': 'Organization', 'normalized_text': 'Tesla'}, {'start': 113, 'end': 120, 'probability': 0.9217, 'type': 'Place', 'normalized_text': 'Shanghai'}], 'mentions': [{'start': 3, 'end': 16, 'username': 'passthebeano', 'id': '2693713662'}, {'start': 18, 'end': 28, 'username': 'wolfejosh', 'id': '15370019'}]}
PUBLIC METRICS ====>  {'retweet_count': 1, 'reply_count': 0, 'like_count': 0, 'quote_count': 0}
TWEET TEXT ====>  RT @passthebeano: @wolfejosh Few believe this will happen. That said, in my opinion, the CCP only wooed Tesla to Shanghai to sample the sec…
TWEET ANNOTATIONS ====>  [{'domain': {'id': '45', 'name': 'Brand Vertical', 'description': 'Top level entities that describe a Brands industry'}, 'entity': {'id': '781972125171060736',