In [1]:
# Importing necessary libraries for Twitter Sentiment Analysis

import numpy as np
import pandas as pd
import json
import warnings
warnings.filterwarnings('ignore')
import tweepy
import openpyxl
from tweepy import OAuthHandler
from textblob import TextBlob
import nltk
from nltk.tokenize import wordpunct_tokenize

In [12]:
'''For getting access to Twitter data, Developer Account is required.
   Since credentials cannot be revealed, 
   I have saved the credentials in .csv file and have imported the .csv file as pandas DataFrame'''

cred = pd.read_csv('cred.csv')

In [4]:
'''From pandas DataFrame, I have extracted the credentials needed for accessing the Twitter data'''

consumer_key=cred.iloc[0][2]

consumer_secret=cred.iloc[1][2]

bearer_token=cred.iloc[2][2]

access_token=cred.iloc[3][2]

access_token_secret=cred.iloc[4][2]

In [5]:
# Call Twitter API
auth = OAuthHandler(consumer_key, consumer_secret)

In [6]:
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

## api.search will only give first 100 tweets. 
## Hence, use tweepy.Cursor() which will help us extract as many tweets as we want.

### Generating the blank Dataframe

In [7]:
df = pd.DataFrame(columns=['Tweets', 'User','User_statuses_count', 'User_followers', 'User_location','User_verified',
                           'rt_count','tweet_date'])
df

Unnamed: 0,Tweets,User,User_statuses_count,User_followers,User_location,User_verified,rt_count,tweet_date


#### Defining the functions to get data from Twitter, clean the tweets and analyze the tweets

In [8]:
# For getting the data from Twitter and generating dataframe

def stream(data,num):
    i = 0
    for tweet in tweepy.Cursor(api.search, q=data, count = 100, lang='en').items():
        print(i, end='\r')
        df.loc[i, 'Tweets'] = tweet.text
        df.loc[i, 'User'] = tweet.user.name
        df.loc[i, 'User_statuses_count'] = tweet.user.statuses_count
        df.loc[i, 'User_followers'] = tweet.user.followers_count
        df.loc[i, 'User_location'] = tweet.user.location
        df.loc[i, 'User_verified'] = tweet.user.verified
        df.loc[i, 'fav_count'] = tweet.favorite_count
        df.loc[i, 'rt_count'] = tweet.retweet_count
        df.loc[i, 'tweet_date'] = tweet.created_at        
        i = i+1
        if i == num:
            break
        else:
            pass

        
# Process for cleaning the text data

stopwords = nltk.corpus.stopwords.words('english')
import string
pun = string.punctuation.replace('.','')

for i in range(0,len(pun)):
    stopwords.append(pun[i])
stopwords.append('RT')    #as RT is re-tweet in Twitter

def remove(arg):
    a = wordpunct_tokenize(arg)
    b = []
    for i in a:
        if i not in stopwords:
            b.append(i)
    return ' '.join(b)        
        
    
# Defining the Subjectivity and Polarity
        
def subjectivity(arg):
    blob = TextBlob(arg)
    if blob.subjectivity > 0.66:
        return 'high subjectivity'
    elif blob.subjectivity > 0.33:
        return 'medium subjectivity'
    else:
        return 'low subjectivity'
    
def polarity(arg):
    blob = TextBlob(arg)
    if blob.polarity > 0:
        return 'positive sentiment'
    elif blob.polarity < 0:
        return 'negative sentiment'
    else:
        return 'neutral sentiment'

## Aligning the processes for getting the analysis

In [9]:
def process():
    query = str(input('Please enter what you want to research about: '))
    print()
    num = int(input('Please enter the total number of tweets you want analyze: '))
    print()
    stream([query],num)
    df['clean'] = df['Tweets'].apply(lambda x: remove(x))
    df['Subjectivity'] = df['clean'].apply(lambda x: subjectivity(x))
    df['Sentiment'] = df['clean'].apply(lambda x: polarity(x))
    df.to_excel('tweet analysis of {}.xlsx'.format(query))
    positive = len(df[df['Sentiment'] == 'positive sentiment'])
    negative = len(df[df['Sentiment'] == 'negative sentiment'])
    neutral = len(df[df['Sentiment'] == 'neutral sentiment'])
    print()
    return print('Process Completed. \n\n Total Positive Sentiments: {}/{}, \n Total Negative Sentiments: {}/{}, \n Total Neutral Sentiments : {}/{} '.format(positive,num, negative,num, neutral,num))

In [10]:
# Example 1
process()

Please enter what you want to research about: Bengal Elections

Please enter the total number of tweets you want analyze: 500

499
Process Completed. 

 Total Positive Sentiments: 185/500, 
 Total Negative Sentiments: 121/500, 
 Total Neutral Sentiments : 194/500 


In [11]:
# Example 2
process()

Please enter what you want to research about: rising covid cases

Please enter the total number of tweets you want analyze: 300

299
Process Completed. 

 Total Positive Sentiments: 183/300, 
 Total Negative Sentiments: 104/300, 
 Total Neutral Sentiments : 213/300 
