Python Code to Perform Sentiment Analysis

In [3]:
##import required libraries and define functions

import tweepy
import pandas as pd
import config
import re
from datetime import datetime
from datetime import timedelta
import snscrape.modules.twitter as snstwitter

from textblob import TextBlob
import matplotlib.pyplot as plt


tqdm.pandas()

##function for cleaning tweets
def cleanTweet(text):
    text=re.sub(r'@[A-Za-z0-9_]+','',text) ## remove @mentions
    text=re.sub(r'\#','',text)   ## remove hash tag
    text=re.sub(r'RT[\s]+','',text) ##remove RT
    text=re.sub(r'https?:\/\/\S+','',text) ##remove hyperlink
    text=re.sub(r'\n',' ',text) ##remove next line character
    return text

##functions to get subjectivity and Polarity
def getSubjectivity(text):
    return TextBlob(str(text)).sentiment.subjectivity

def getPolarity(text):
    return TextBlob(str(text)).sentiment.polarity


##function to get analysis based on score
def getAnalysis(score):
    if float(score) == None:
        raise Exception("Null Object Passed")

    if float(score) < 0:
        return 'Negative'
    elif float(score) == 0:
        return 'Neutral'
    else:
        return 'Positive'

##client = tweepy.Client(bearer_token=config.twitter_bearertoken)


In [4]:
##scrape data from twitter
query = "Joe Biden until:2021-01-01 since:2020-01-01"
max_tweets = 50000
tweet_content=[]

for i, tweets in enumerate(snstwitter.TwitterSearchScraper(query).get_items()):
    if i == max_tweets:
        break
    else:
        tweet_content.append((str(tweets.id), tweets.date, str(tweets.content), str(
            tweets.lang), str(tweets.sourceLabel), str(query),  datetime.utcnow()))

df = pd.DataFrame(tweet_content, columns=['tweet_id', 'tweet_created_at', 'tweet_text',
                  'tweet_lang', 'tweet_source', 'tweets_search_query', 'tweets_scrapped'])

tweet_content = None


In [4]:
df['tweet_clean_text'] = df['tweet_text'].apply(cleanTweet)

##apply functions to get subjectivity and polarity
df['tweet_Subjectivity'] = df['tweet_clean_text'].apply(getSubjectivity)
df['tweet_Polarity'] = df['tweet_clean_text'].apply(getPolarity)
df['tweet_Analysis'] = df['tweet_Polarity'].apply(getAnalysis)


In [5]:
df

Unnamed: 0,tweet_id,tweet_created_at,tweet_text,tweet_lang,tweet_source,tweets_search_query,tweets_scrapped,tweet_clean_text,tweet_Subjectivity,tweet_Polarity,tweet_Analysis
0,1381034237550268420,2021-04-10 23:59:55,Joe Biden is such a manipulative asshole bro. ...,en,Twitter for iPhone,Joe Biden until:2021-04-11 since:2021-01-01,2022-08-28 20:09:32.302,Joe Biden is such a manipulative asshole bro. ...,0.518333,-0.14500,Negative
1,1381034234870046722,2021-04-10 23:59:54,@POTUS Jack ass!!!,en,Twitter for iPhone,Joe Biden until:2021-04-11 since:2021-01-01,2022-08-28 20:09:32.302,Jack ass!!!,0.000000,0.00000,Neutral
2,1381034225894256647,2021-04-10 23:59:52,"@DaniiellePa Harris. After she dropped out, my...",en,Twitter Web App,Joe Biden until:2021-04-11 since:2021-01-01,2022-08-28 20:09:32.302,"Harris. After she dropped out, my favorite ad...",1.000000,0.25000,Positive
3,1381034222777991180,2021-04-10 23:59:51,@POTUS What???????? https://t.co/BqaMYGt421,en,Twitter for Android,Joe Biden until:2021-04-11 since:2021-01-01,2022-08-28 20:09:32.302,What????????,0.000000,0.00000,Neutral
4,1381034221163143171,2021-04-10 23:59:51,@POTUS https://t.co/FTT8tfMd27,qme,Twitter for iPhone,Joe Biden until:2021-04-11 since:2021-01-01,2022-08-28 20:09:32.302,,0.000000,0.00000,Neutral
...,...,...,...,...,...,...,...,...,...,...,...
99995,1344516329676677122,2020-12-31 05:30:47,@JoeBiden “Let me wave my magic wand! Ignore t...,en,Twitter for iPhone,Joe Biden until:2021-01-01 since:2020-01-01,2022-08-28 21:48:52.887,“Let me wave my magic wand! Ignore the fact t...,0.850000,0.50625,Positive
99996,1344516324341460994,2020-12-31 05:30:46,@dawndaledoxidoc @JoeBiden Yup,und,Twitter for iPhone,Joe Biden until:2021-01-01 since:2020-01-01,2022-08-28 21:48:52.887,Yup,0.000000,0.00000,Neutral
99997,1344516315927687168,2020-12-31 05:30:44,@JoeBiden @realDonaldTrump\n\nB6lJQub5TXciIIfg...,in,Twitter for Android,Joe Biden until:2021-01-01 since:2020-01-01,2022-08-28 21:48:52.887,B6lJQub5TXciIIfg6HbxfKC/1pv+PehDz0WuV8w/N5k=,0.000000,0.00000,Neutral
99998,1344516308503846914,2020-12-31 05:30:42,@Exclusiv1chat @JoeBiden @BidenInaugural Well ...,en,Twitter Web App,Joe Biden until:2021-01-01 since:2020-01-01,2022-08-28 21:48:53.507,"Well put, thank you.",0.000000,0.00000,Neutral


Code to Dump data into Mongodb

In [5]:
###Dump collected data into mongodb

from pymongo import MongoClient
import config

client=MongoClient(config.mdb_string)
mydatabase=client[config.mdb_db]
collection = mydatabase["JoeBiden"]
dump_df=df
collection.insert_many(dump_df.to_dict("records"))


<pymongo.results.InsertManyResult at 0x1fa61126fd0>

In [1]:
##load data from mongodb to pd
from pymongo import MongoClient
import pandas as pd
import config

client = MongoClient(config.mdb_string)
mydatabase = client[config.mdb_db]
collection = mydatabase["JoeBiden"]
all_record=collection.find()

df = pd.DataFrame(list(collection.find()))
df=df.drop(columns=['_id'])
df


Unnamed: 0,tweet_id,tweet_created_at,tweet_text,tweet_lang,tweet_source,tweets_search_query,tweets_scrapped
0,1381034237550268420,2021-04-10 23:59:55,Joe Biden is such a manipulative asshole bro. ...,en,Twitter for iPhone,Joe Biden until:2021-04-11 since:2021-01-01,2022-08-28 20:09:32.302
1,1381034234870046722,2021-04-10 23:59:54,@POTUS Jack ass!!!,en,Twitter for iPhone,Joe Biden until:2021-04-11 since:2021-01-01,2022-08-28 20:09:32.302
2,1381034225894256647,2021-04-10 23:59:52,"@DaniiellePa Harris. After she dropped out, my...",en,Twitter Web App,Joe Biden until:2021-04-11 since:2021-01-01,2022-08-28 20:09:32.302
3,1381034222777991180,2021-04-10 23:59:51,@POTUS What???????? https://t.co/BqaMYGt421,en,Twitter for Android,Joe Biden until:2021-04-11 since:2021-01-01,2022-08-28 20:09:32.302
4,1381034221163143171,2021-04-10 23:59:51,@POTUS https://t.co/FTT8tfMd27,qme,Twitter for iPhone,Joe Biden until:2021-04-11 since:2021-01-01,2022-08-28 20:09:32.302
...,...,...,...,...,...,...,...
99995,1344516329676677122,2020-12-31 05:30:47,@JoeBiden “Let me wave my magic wand! Ignore t...,en,Twitter for iPhone,Joe Biden until:2021-01-01 since:2020-01-01,2022-08-28 21:48:52.887
99996,1344516324341460994,2020-12-31 05:30:46,@dawndaledoxidoc @JoeBiden Yup,und,Twitter for iPhone,Joe Biden until:2021-01-01 since:2020-01-01,2022-08-28 21:48:52.887
99997,1344516315927687168,2020-12-31 05:30:44,@JoeBiden @realDonaldTrump\n\nB6lJQub5TXciIIfg...,in,Twitter for Android,Joe Biden until:2021-01-01 since:2020-01-01,2022-08-28 21:48:52.887
99998,1344516308503846914,2020-12-31 05:30:42,@Exclusiv1chat @JoeBiden @BidenInaugural Well ...,en,Twitter Web App,Joe Biden until:2021-01-01 since:2020-01-01,2022-08-28 21:48:53.507


In [17]:
from time import strftime


df['tweet_created_at'].apply(lambda x: x.strftime("%y%m")).unique()


array(['2104', '2112', '2208', '2012'], dtype=object)

In [23]:
import ntlk

In [21]:
from datetime import timedelta

timedelta(days=1)

datetime.timedelta(days=1)