# Part 1 - Scrapping Tweets

## 1. Importing Libraries
### 1.1 Libraries

In [2]:
pip install tweepy

Collecting tweepy
  Downloading tweepy-4.14.0-py3-none-any.whl (98 kB)
Collecting requests-oauthlib<2,>=1.2.0
  Downloading requests_oauthlib-1.3.1-py2.py3-none-any.whl (23 kB)
Collecting oauthlib<4,>=3.2.0
  Downloading oauthlib-3.2.2-py3-none-any.whl (151 kB)
Installing collected packages: oauthlib, requests-oauthlib, tweepy
Successfully installed oauthlib-3.2.2 requests-oauthlib-1.3.1 tweepy-4.14.0
Note: you may need to restart the kernel to use updated packages.


In [1]:
# Library to access tweets using twitter API
import tweepy

# Library to provide data structures(dataframes)
import pandas as pd

# Package for time taken
from time import time 

#Libraries for console with widgets/progress bar
import ipywidgets as widgets
from tqdm.notebook import tqdm

#pretty printing
from pprint import pprint

### 1.2 Configurations

In [3]:
# Importing Credentials from config file
from configs import configs

#Directory to store the tweets data
tweets_df_path = "../Data/tweets_data.csv"

## 2. Getting Tweets

### 2.1 Setting up Twitter Credentials 

In [4]:
# Accessing twitter using tweepy with keys and tokens
auth = tweepy.OAuthHandler(
                            configs["twitter_credentials"]["consumer_key"],
                            configs["twitter_credentials"]["consumer_secret"]
                          )

auth.set_access_token(
                        configs["twitter_credentials"]["access_token"],
                        configs["twitter_credentials"]["access_token_secret"]
                     )

# Tweepy module to extract tweets
api = tweepy.API(
                    auth,
                    wait_on_rate_limit=True
                )

### 2.2 Scanning Tweets

In [5]:
#Dictionary
tweet_parameters = {}

# Fetching tweets
def pull_tweets(_):
    
    """ 
    This function scans for tweets based on the given parameters and stores it into the dataframe for further processing.

    Output:
    tweet_df: Dataframe that displays statsitcal information about the tweets and stores the tweets.
    """

    #Creating a empty dataframe with headers
    tweet_df = pd.DataFrame(
                                columns=[
                                            "tweet",
                                            "date",
                                            "author",
                                            "hashtags",
                                            "followers_count",
                                            "friends_count",
                                            "coordinates",
                                            "retweet_count",
                                            "favorite_count"
                                        ]
                           )
    
    
    #Counting tweets
    counter = 0
    
    # time taken
    start_time = time()
    
    #Restricting with required elements from the raw data.
    for tweet in tqdm(tweepy.Cursor(
                                       api.search_tweets,
                                       q = tweet_parameters["query_string"] + ' -filter:retweets',
                                       result_type ='recent',
                                       lang='en',
                                       tweet_mode='extended'
                                   ).items(), total=tweet_parameters["tweet_count"]):

        
        # Appending the names from JSON dictionary from twitter and assigning it to dataframe
        tweet_df = tweet_df.append(
                                    {
                                        "tweet": tweet.full_text,
                                        "date" : tweet.created_at,
                                        "author": tweet.user.screen_name,
                                        "hashtags": [hashtag['text'] for hashtag in tweet._json["entities"]["hashtags"]],
                                        "followers_count": tweet.user.followers_count,
                                        "friends_count": tweet.user.friends_count,
                                        "retweet_count": tweet.retweet_count,
                                        "favorite_count": tweet.favorite_count
                                        
                                    }, ignore_index = True
                                  )
        
        counter += 1
        
        # Condition to stop if Tweet count 
        if counter == tweet_parameters["tweet_count"]:
            break
    #Calculating time      
    total_time = time() - start_time
    print(f"{counter} tweets scraped in {total_time} seconds.")
    
    # condition to check df empty or not - write to csv
    if not tweet_df.empty:
        tweet_df.to_csv(tweets_df_path, index=False)
        print("Tweet Dataframe Exported with text.")
        
    else:
        print("Dataframe emtpy. No data to save!")
               
     
    
def save_parameters(tweet_count, query_string):
    """
    This function reads and saves the parameters for tweet extraction
    Parameters:
    tweet_count: The count of tweets to define/fetch
    query_string: The string taken as input keyword.

    """
    
    global tweet_parameters
    tweet_parameters["tweet_count"] = tweet_count
    tweet_parameters["query_string"] = query_string

### 2.3 Console / Controller

In [6]:
console = widgets.interactive(
                            save_parameters,

                            query_string = widgets.Text(
                                                            value='bitcoin', # Default Value.
                                                            description="Query String"
                                                        ),


                            tweet_count = widgets.IntSlider(           
                                                                min=0, # Minimum values for the slider
                                                                max=25000, # Maximum value for the slider.
                                                                step=50, # step size of an iteration on the slider, 
                                                                            #and the slider will increase in multiples of 1000.
                                                                value=10, # Default Value.
                                                                description="Tweet Count"
                                                           ),



                         )


# Console components
button = widgets.Button(
                        description='SCAN',
                        tooltip='Scan Tweets'
                   ) 

#Process & display
button.on_click(pull_tweets) # Links the button to the function 
display(console) # Displays the console.
display(button) # Displaying the button.

interactive(children=(IntSlider(value=10, description='Tweet Count', max=25000, step=50), Text(value='bitcoin'…

Button(description='SCAN', style=ButtonStyle(), tooltip='Scan Tweets')