In [None]:
import tweepy           
import pandas as pd     
import numpy as np
import time
from datetime import datetime


def twitter_setup():
    """
    Function to configure the Twitter API with access codes.
    """
    # Tweepy Access codes
    CONSUMER_KEY = '' 
    CONSUMER_SECRET = ''
    ACCESS_TOKEN = ''
    ACCESS_SECRET = ''
    
    # authentication and access using keys
    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)

    # access to the API
    api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True)
    return api 


# creates a extractor object
extractor = twitter_setup()

# hashtag to get tweets
hashtag = "#8M"

# final time of extraction
stop_time = datetime(2019, 5, 9, 10, 55, 0)

# some variables to initialize
tweets_replies = []
df_list_final = []
i = 1
count = 0

while datetime.now() < stop_time:
    
    # method to extract tweets, it has some parameters to select keywords in tweets, language of tweets, retweets flag, etc.
    temp = extractor.search(q = hashtag, result_type="recent", lang = "es", include_rts = False, exclude_replies = True, include_entities = True)
    tweets_replies.append(temp) 
    for tweets in tweets_replies:
        data_iter = pd.DataFrame(data=[elem.text for elem in tweets], columns=['tweets'])
    
    # creates all columns of the final dataframe, iterating on each corresponding label for each tweet
    data_iter['tweet_long'] = np.array([len(tweet.text) for tweet in tweets])
    data_iter['id'] = np.array([tweet.id for tweet in tweets])
    data_iter['created_date'] = np.array([tweet.created_at for tweet in tweets])
    data_iter['source'] = np.array([tweet.source for tweet in tweets])
    data_iter['likes'] = np.array([tweet.favorite_count for tweet in tweets])
    data_iter['RTs'] = np.array([tweet.retweet_count for tweet in tweets])
    data_iter['language'] = np.array([tweet.lang for tweet in tweets])
    data_iter['place'] = np.array([tweet.place for tweet in tweets])
    data_iter['user_id'] = np.array([tweet.user.id for tweet in tweets])
    data_iter['user_name'] = np.array([tweet.user.name for tweet in tweets])
    data_iter['user_description'] = np.array([tweet.user.description for tweet in tweets])
    data_iter['followers'] = np.array([tweet.user.followers_count for tweet in tweets])
    data_iter['followings'] = np.array([tweet.user.friends_count for tweet in tweets])
    data_iter['user_lists_member'] = np.array([tweet.user.listed_count for tweet in tweets])
    data_iter['user_total_favourites_count'] = np.array([tweet.user.favourites_count for tweet in tweets])
    data_iter['user_statuses_count'] = np.array([tweet.user.statuses_count for tweet in tweets])
    data_iter['user_created_account'] = np.array([tweet.user.created_at for tweet in tweets])
    data_iter['user_location'] = np.array([tweet.user.location for tweet in tweets])
    data_iter['user_lang'] = np.array([tweet.user.lang for tweet in tweets])
    
    # updates the number of extracted tweets
    count += data_iter.shape[0]
    
    # concatenates to the final dataframe
    df_list_final.append(data_iter)
    
    i += 1
    if i % 5 == 0:
        print("Iteration number %d, %d tweets" % (i, count))
    time.sleep(30)
    
print("----- End of proccess -----")

# gets the final dataframe with all columns and extracted tweets
final_df = pd.concat(df_list_final, axis = 0, join = "outer")

# saves dataframe to .csv file
final_df.to_csv('raw_tweets.csv', sep='|', header=True, index=False)