In [25]:
#General imports of libraries
import tweepy
import pandas as pd
import numpy as np
from IPython.display import display

In [None]:
#Application Setup
#Make a file called credentials.py
# Twitter App access keys for @user 
#Create an account and application; To be fetched from https://apps.twitter.com/
# Consume:
CONSUMER_KEY    = ''
CONSUMER_SECRET = ''

# Access:
ACCESS_TOKEN  = ''
ACCESS_SECRET = ''

In [26]:
#Import Access Keys
from credentials import *   # This will allow us to use the keys as variables

In [27]:
# API's setup:
def twitter_setup():
    """
    Utility function to setup the Twitter's API
    with our access keys provided.
    """
    # Authentication and access using keys:
    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)

    # Return API with authentication:
    api = tweepy.API(auth)
    return api

In [17]:
#Tweets Extraction
# We create an extractor object:
extractor = twitter_setup()

# We create a tweet list as follows:
tweets = extractor.user_timeline(screen_name="lakshyas90", count=200)
#tweets = extractor.search('#GandhiJayanti -filter:retweets',rpp=10)
#tweets = extractor.search('#IndiaBuildsActions')

print("Number of tweets extracted: {}.\n".format(len(tweets)))

# We print the most recent 5 tweets:
print("5 recent tweets:\n")
for tweet in tweets[:5]:
    print(tweet.text)
    print()

TweepError: Failed to send request: HTTPSConnectionPool(host='api.twitter.com', port=443): Read timed out.

In [28]:
#Creating a (pandas) DataFrame
# We create a pandas dataframe as follows:
data = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])

# We display the first 10 elements of the dataframe:
display(data.head(10))

Unnamed: 0,Tweets
0,Just spoke to President-Elect Andres Manuel Lo...
1,I see it each time I go out to Rallies in orde...
2,"Mexico, Canada and the United States are a gre..."
3,"Blowout numbers on New Jobs and, separately, S..."
4,Thank you to Congressman Tom Reed of New York ...
5,Thank you Governor Phil Bryant - it was my gre...
6,https://t.co/w04sqlMIYm
7,The Stock Market just reached an All-Time High...
8,The Failing New York Times did something I hav...
9,Congressman @PeteSessions of Texas is a true f...


In [None]:
# Internal methods of a single tweet object:
print(dir(tweets[0]))

In [14]:
# We print info from the first tweet:
print(tweets[0].id)
print(tweets[0].created_at)
print(tweets[0].source)
print(tweets[0].favorite_count)
print(tweets[0].retweet_count)
print(tweets[0].geo)
print(tweets[0].coordinates)
print(tweets[0].entities)

1047495435760427008
2018-10-03 14:35:48
Twitter for iPhone
27015
5874
None
None
{u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': []}


In [None]:
# We add relevant data:
data['len']  = np.array([len(tweet.text) for tweet in tweets])
data['ID']   = np.array([tweet.id for tweet in tweets])
data['Date'] = np.array([tweet.created_at for tweet in tweets])
data['Source'] = np.array([tweet.source for tweet in tweets])
data['Likes']  = np.array([tweet.favorite_count for tweet in tweets])
data['RTs']    = np.array([tweet.retweet_count for tweet in tweets])

In [16]:
# Display of first 10 elements from dataframe:
display(data.head(10))

Unnamed: 0,Tweets,len,ID,Date,Source,Likes,RTs
0,Just spoke to President-Elect Andres Manuel Lo...,108,1047495435760427008,2018-10-03 14:35:48,Twitter for iPhone,27015,5874
1,I see it each time I go out to Rallies in orde...,139,1047493831426498561,2018-10-03 14:29:25,Twitter for iPhone,41290,11341
2,"Mexico, Canada and the United States are a gre...",140,1047489875614883841,2018-10-03 14:13:42,Twitter for iPhone,29710,6791
3,"Blowout numbers on New Jobs and, separately, S...",65,1047487803746074624,2018-10-03 14:05:28,Twitter for iPhone,27837,6399
4,Thank you to Congressman Tom Reed of New York ...,140,1047486972976095233,2018-10-03 14:02:10,Twitter for iPhone,21703,4903
5,Thank you Governor Phil Bryant - it was my gre...,97,1047477260624773121,2018-10-03 13:23:34,Twitter for iPhone,24366,5452
6,https://t.co/w04sqlMIYm,23,1047475390573371395,2018-10-03 13:16:08,Twitter for iPhone,21742,5835
7,The Stock Market just reached an All-Time High...,139,1047472342354604033,2018-10-03 13:04:02,Twitter for iPhone,47111,11509
8,The Failing New York Times did something I hav...,140,1047469711938736128,2018-10-03 12:53:35,Twitter for iPhone,42552,10542
9,Congressman @PeteSessions of Texas is a true f...,140,1047465256023445504,2018-10-03 12:35:52,Twitter for iPhone,31620,7870


In [20]:
#Sentiment analysis
#Importing textblob
from textblob import TextBlob
import re

def clean_tweet(tweet):
    '''
    Utility function to clean the text in a tweet by removing 
    links and special characters using regex.
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

def analyze_sentiment(tweet):
    '''
    Utility function to classify the polarity of a tweet
    using textblob.
    '''
    analysis = TextBlob(clean_tweet(tweet))
    if analysis.sentiment.polarity > 0:
        return 1
    elif analysis.sentiment.polarity == 0:
        return 0
    else:
        return -1

In [21]:
# We create a column with the result of the analysis:
data['SA'] = np.array([ analyze_sentiment(tweet) for tweet in data['Tweets'] ])

# We display the updated dataframe with the new column:
display(data.head(10))

Unnamed: 0,Tweets,len,ID,Date,Source,Likes,RTs,SA
0,Just spoke to President-Elect Andres Manuel Lo...,108,1047495435760427008,2018-10-03 14:35:48,Twitter for iPhone,27015,5874,1
1,I see it each time I go out to Rallies in orde...,139,1047493831426498561,2018-10-03 14:29:25,Twitter for iPhone,41290,11341,1
2,"Mexico, Canada and the United States are a gre...",140,1047489875614883841,2018-10-03 14:13:42,Twitter for iPhone,29710,6791,1
3,"Blowout numbers on New Jobs and, separately, S...",65,1047487803746074624,2018-10-03 14:05:28,Twitter for iPhone,27837,6399,1
4,Thank you to Congressman Tom Reed of New York ...,140,1047486972976095233,2018-10-03 14:02:10,Twitter for iPhone,21703,4903,1
5,Thank you Governor Phil Bryant - it was my gre...,97,1047477260624773121,2018-10-03 13:23:34,Twitter for iPhone,24366,5452,1
6,https://t.co/w04sqlMIYm,23,1047475390573371395,2018-10-03 13:16:08,Twitter for iPhone,21742,5835,0
7,The Stock Market just reached an All-Time High...,139,1047472342354604033,2018-10-03 13:04:02,Twitter for iPhone,47111,11509,1
8,The Failing New York Times did something I hav...,140,1047469711938736128,2018-10-03 12:53:35,Twitter for iPhone,42552,10542,1
9,Congressman @PeteSessions of Texas is a true f...,140,1047465256023445504,2018-10-03 12:35:52,Twitter for iPhone,31620,7870,1


In [22]:
# Analyzing the results
# We construct lists with classified tweets:

pos_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] > 0]
neu_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] == 0]
neg_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] < 0]

In [23]:
# We print percentages:

print("Percentage of positive tweets: {}%".format(len(pos_tweets)*100/len(data['Tweets'])))
print("Percentage of neutral tweets: {}%".format(len(neu_tweets)*100/len(data['Tweets'])))
print("Percentage de negative tweets: {}%".format(len(neg_tweets)*100/len(data['Tweets'])))

Percentage of positive tweets: 59%
Percentage of neutral tweets: 32%
Percentage de negative tweets: 8%
