# IMPORTS

In [1]:
import re 
import os
import tweepy 
import pandas as pd
from tweepy import OAuthHandler 
from textblob import TextBlob

# Get Credentials

    credentials are required for the tweepy api which allows us extract tweets from twitter.
    In order to extract tweets we need a developer account in twitter and they provide us with four keys
    these four keys are unique for each user and i have saved my keys in a text file 'Credential.txt'
    
    if you dont want to save your access keys just copy paste the access keys as strings
    ex:
    consumer_key = "XXXXXXXXXXXXXXXXXXX' 
    consumer_secret = "XXXXXXXXXXXXXXX'
    access_key = 'XXXXXXXXXXXX'
    access_secret = 'XXXXXXXXXXX'
    
    Note:
    Python reads the newline characters as well in text files.
    So to Exclude the newline charecters from keys we used ".rstrip()"

In [2]:
keyFile = open('Credentials.txt', 'r')
consumer_key = keyFile.readline().rstrip() 
consumer_secret = keyFile.readline().rstrip()
access_key = keyFile.readline().rstrip()
access_secret = keyFile.readline().rstrip()
keyFile.close()

# Authentication

    connecting with the tweepy Api

In [3]:
try: 
    # create OAuthHandler object 
    auth = OAuthHandler(consumer_key, consumer_secret) 
    # set access token and secret 
    auth.set_access_token(access_key, access_secret)
    # create tweepy API object to fetch tweets 
    api = tweepy.API(auth,wait_on_rate_limit=True,wait_on_rate_limit_notify= True) 
except: 
    print("Error: Authentication Failed")

# Extract Tweets Using a Keyword

# Function For cleaning Tweets

    removing special charecters, hyperlinks,user names , numbers  etc.
    using simple regular expression statements.  
    this is needed as the cleaned tweet is fed to Text blob model which gives us a sentiment using sentiment polarity.
    so by this method we create a data set with tweets and their respective sentiments (as labels) 
    this dataset is the used to train our classification algorithm.

In [4]:
def clean_tweet(tweet): 
    
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(#[0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

# Function to get Tweet Sentiment

    the function classifies sentiment of passed tweet using textblob's sentiment method 

In [5]:
def get_tweet_sentiment(tweet): 
    
    # create TextBlob object of passed tweet text 
    analysis = TextBlob(clean_tweet(tweet)) 
    # set sentiment 
    if analysis.sentiment.polarity > 0: 
        return 'positive'
    elif analysis.sentiment.polarity == 0: 
        return 'neutral'
    else: 
        return 'negative'

# Get the Tweets And append in list along with its attributes

In [6]:
tweets_list = []  # empty list for collecting tweets
# list containing serach query for which we want related tweets
text_query = ["Donald Trump -filter:retweets","Joe Biden -filter:retweets","Bitcoin -filter:retweets","Barack Obama -filter:retweets","Election 2020 -filter:retweets","Corona Virus -filter:retweets","Black lives matter -filter:retweets","IPL 2020 -filter:retweets","Arnab Goswami -filter:retweets","Champions league -filter:retweets","first presidential debate -filter:retweets"]

In [7]:
for key in text_query:
    tweets = tweepy.Cursor(api.search, q=key)
    for tweet in api.search(q=key,count= 100,tweet_mode = 'extended',lang="en"):   #extended gives us the entire tweet , by deafult tweets are truncated
        sentiment = get_tweet_sentiment(tweet.full_text)
        tweets_list.append((tweet.id,tweet.created_at,tweet.user.name,tweet.full_text,sentiment))

# Api.search returns 15 tweets per search keyword
    
    default : 15 tweets ; max : 100

In [8]:
len(tweets_list)

1100

In [9]:
df = pd.DataFrame(tweets_list,columns = ['ID' , 'Date- Time', 'User_Name','Tweet','Sentiment'])

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100 entries, 0 to 1099
Data columns (total 5 columns):
ID            1100 non-null int64
Date- Time    1100 non-null datetime64[ns]
User_Name     1100 non-null object
Tweet         1100 non-null object
Sentiment     1100 non-null object
dtypes: datetime64[ns](1), int64(1), object(3)
memory usage: 43.0+ KB


In [12]:
pd.set_option('display.max_colwidth', -1) # Setting this so we can see the full content of cells

In [13]:
df.head(50)

Unnamed: 0,ID,Date- Time,User_Name,Tweet,Sentiment
0,1324044296375443456,2020-11-04 17:42:14,ACAB BLM,"He “promised” to avoid claiming victory until everything was said and done.... he is currently claiming victory \nDonald Trump is a liar and always has been \nHe lied his way through his 2016 campaign, for the past 4 years, and will continue to do so as long as he has a platform",negative
1,1324044294584455171,2020-11-04 17:42:14,USA Decides🇱🇷,#USAElections2020 | @JoeBiden takes narrow lead in key states as Donald Trump suggests fraud https://t.co/Gj2XxFxdex,negative
2,1324044294416592896,2020-11-04 17:42:13,David Foster Evans,Suspend Donald J. Trump's account for fraud. @Jack,neutral
3,1324044293963575301,2020-11-04 17:42:13,Stephen S,"@BBCANRyan @KevinRobMartin would make sense that a heart would be the nail in the coffin for Donald Trump... you know, because he doesn't have one?",neutral
4,1324044292827041792,2020-11-04 17:42:13,9News Adelaide,"WHAT YOU NEED TO KNOW - Follow the #9News #LIVE blog of the #USElection2020 for interactive results as they come in, livestream special coverage, breaking news and key developments as President Donald Trump and Joe Biden battle it out. #Election2020 #9News\nhttps://t.co/DGA0KotgEI",positive
5,1324044290121699329,2020-11-04 17:42:12,#Giste ‘wa’ Kabiro,Donald Trump will be sworn in as the People’s President early in January,positive
6,1324044288657969158,2020-11-04 17:42:12,Alain,"US Election: Tense wait as winner remains unclear Donald Trump and his rival Joe Biden are neck-and-neck in key swing-states. The final result may not emerge for days. Also, Ethiopia's PM orders a military response to 'attack' in Tigray state, and why … https://t.co/AjOd3la2pg",negative
7,1324044286770499585,2020-11-04 17:42:12,Hunter Webb,"I don’t care which side you’re on, the fact that Donald Trump said to stop counting all the mail in votes that haven’t been processed yet, gives me less hope of unity after if either side wins",positive
8,1324044286246166528,2020-11-04 17:42:12,gay erik (ho ho HO)🎄🎅🎄,"What if an ancient Greek titan is taking over the country in the form of Donald Trump, but we just don't know because of a Percy Jackson style Mist.",neutral
9,1324044285034070018,2020-11-04 17:42:11,the Guy,"I said it from day one;\nThe 2020 election was going to be Donald trump vs coronavirus.\n\nEveryone knew Biden never stood a fighting chance, fair and square. This was fantastically well orchestrated plot but God pass them",positive


# Save the content of the dataframe in a csv file

In [14]:
# if file does not exist write header 
if not os.path.isfile('Tweetlabelled.csv'):
   df.to_csv('Tweetlabelled.csv', header='column_names',index=False)
else: # else it exists so append without mentioning the header
   df.to_csv('Tweetlabelled.csv', mode='a', header=False,index=False)

In [15]:
# Display the csv file and check its contents

In [16]:
View_tweets = pd.read_csv("Tweetlabelled.csv")

In [17]:
View_tweets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30528 entries, 0 to 30527
Data columns (total 5 columns):
ID            30528 non-null int64
Date- Time    30528 non-null object
User_Name     30525 non-null object
Tweet         30528 non-null object
Sentiment     30528 non-null object
dtypes: int64(1), object(4)
memory usage: 1.2+ MB


In [18]:
View_tweets.tail(50)

Unnamed: 0,ID,Date- Time,User_Name,Tweet,Sentiment
30478,1323852328282152960,2020-11-04 04:59:25,give the gift of Yourself,"@coolp1np Joe Biden: \n- ""He’s elected to the next election.""\n\nFirst Presidential Debate 2020",positive
30479,1323850142118825984,2020-11-04 04:50:44,Destiny White,"Today, was the first time ever voring. Waiting for the results from this presidential debate debate. #Election2020 #HarlowF20",positive
30480,1323848158544351233,2020-11-04 04:42:51,Darren Jackson,@Drizzy__Drae @JoeBiden I had to block some of these Trump supporters after the first presidential debate and then I got into a fight today at the polls with this lady that had a Trump hat and told me to vote for him. I told her that I vote for whoever I want to vote,positive
30481,1323847143589892098,2020-11-04 04:38:49,✨,"This is Too on the nose, but my first political memory is watching a presidential debate at age 5 or 6, sitting on the foot of my parents' bed, and saying that I wanted Jesse Jackson to be president because I thought it would be good for America to have a Black president",positive
30482,1323846034506895361,2020-11-04 04:34:25,Timberflake,I can't believe yall really votes for trump after the first presidential debate,positive
30483,1323842940276510722,2020-11-04 04:22:07,Connor | annus,"@satboi10 @Mavis87Williams @NotSuperIsTaken @WorshipLordElmo during the first presidential debate when he said ""Proud Boys stand back and stand by"" when asked to denounce white supremacy. Don't argue if you're obviously not educated.",positive
30484,1323842247243292672,2020-11-04 04:19:22,Tammie 💚,"""DEBATE NIGHT 2020!"" — A Bad Lip Reading of the First Presidential Debat... https://t.co/QUjStnOhSK via @YouTube",negative
30485,1323841913305337857,2020-11-04 04:18:02,𝕕𝕠𝕞𝕚𝕟𝕚𝕜 🧸🎈,during the first presidential debate.. you already know biden wanted to pimp slap trump for talking bad bout his son. shiiii i woulda done the same thing. i feel for biden. \n\n#BidenHarris2020 💙,negative
30486,1323837509105844224,2020-11-04 04:00:32,Olivia Paschal,my PBS app just started replaying the screaming match from the first presidential debate for no apparent reason and that has caused my blood pressure to spike like just about nothing else tonight,positive
30487,1323837154791989253,2020-11-04 03:59:08,Jane,PBS just said “first presidential debate” and my history loving ass thought they were gonna show Nixon and Kennedy but they just showed trump and biden interrupting each other,positive


# how to collect tweets :

        run the multiple times with differnt keywords or serach queries.
        The final csv file will now contain all tweets that we extracted from differnt keywords, however
        it is possible that two differnt queries can give as the same tweet . 
        for example virat kholi and cricket may give us same tweets
        so the csv file will contain duplicates and we need to delete those dplicate entries from our csv file or dataset.