In [None]:
from twitter_config import *
import tweepy as tw
import datetime
import pandas as pd
from tqdm import tqdm, notebook

#### Setting up API 

In [None]:
auth= tw.OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_token,access_token_secret)

api = tw.API(auth, wait_on_rate_limit=True)

In [None]:
today = datetime.date.today()
yesterday= today - datetime.timedelta(days=1)
start_date = '2021-01-21'
today, yesterday

In [None]:
# extracting only verified users to avoid bots and remove retweets which may inflate the sentiment due to retweets
tweets_list = tw.Cursor(api.search_tweets, 
                        q="#nft -filter:retweets filter:verified since:" + str(start_date)+ " until:" + str(today),
                        tweet_mode='extended', 
                        lang='en').items()

In [None]:
tweets_copy = []
for tweet in tqdm(tweets_list):
     tweets_copy.append(tweet)

In [None]:
# Saving into dataframe
tweets_df = pd.DataFrame()
for tweet in tqdm(tweets_copy):
    hashtags = []
    try:
        for hashtag in tweet.entities["hashtags"]:
            hashtags.append(hashtag["text"])
        text = api.get_status(id=tweet.id, tweet_mode='extended').full_text
    except:
        pass
    tweets_df = tweets_df.append(pd.DataFrame({'user_name': tweet.user.name, 
                                               'user_location': tweet.user.location,\
                                               'user_description': tweet.user.description,
                                               'user_created': tweet.user.created_at,
                                               'user_followers': tweet.user.followers_count,
                                               'user_friends': tweet.user.friends_count,
                                               'user_favourites': tweet.user.favourites_count,
                                               'user_verified': tweet.user.verified,
                                               'date': tweet.created_at,
                                               'text': text, 
                                               'hashtags': [hashtags if hashtags else None],
                                               'source': tweet.source,
                                               'is_retweet': tweet.retweeted}, index=[0]))

In [None]:
tweets_df

In [None]:
print('earliest datetime: ',min(tweets_df.date), '\n', 'latest datetime: ', max(tweets_df.date))

In [None]:
## initial save
# tweets_df.to_csv("./dataset/nft_tweets_20220527.csv", index=False)

Incremental load

In [None]:
tweets_old_df = pd.read_csv("./dataset/nft_tweets_20220527.csv", parse_dates=['date'])
print(f"past tweets: {tweets_old_df.shape}")
print('earliest datetime: ',min(tweets_old_df.date), '\n', 'latest datetime: ', max(tweets_old_df.date))

In [None]:
tweets_all_df = pd.concat([tweets_old_df, tweets_df], axis=0)
print(f"new tweets: {tweets_df.shape[0]} past tweets: {tweets_old_df.shape[0]} all tweets: {tweets_all_df.shape[0]}")

In [None]:
# remove duplicates
tweets_all_df.drop_duplicates(subset = ["user_name", "date", "text"], inplace=True)
print(f"all tweets: {tweets_all_df.shape}")

In [None]:
tweets_all_df.to_csv("./dataset/nft_tweets_20220527.csv", index=False)

In [None]:
tweets_all_df

In [None]:
print('earliest datetime: ',min(tweets_all_df.date), '\n', 'latest datetime: ', max(tweets_all_df.date))