# Twitter Hashtag Analysis
### #NG30DaysofLearning

## Data Gathering

In [50]:
# installing snscrape
!pip install --upgrade snscrape



In [51]:
# importing needed libraries
import pandas as pd
import snscrape.modules.twitter as sntwitter
from textblob import TextBlob

In [None]:
# scrape using snscrape
query = "(#30DaysOfLearning OR #NG30DaysOfLearning) until:2022-07-01 since:2022-05-01"
tweets = []
limit = 30000


for tweet in sntwitter.TwitterHashtagScraper(query).get_items():
    
    if len(tweets) == limit:
        break
    else:
        tweets.append([tweet.date, tweet.url, tweet.user.username, tweet.sourceLabel, tweet.user.location, tweet.content, 
                       tweet.likeCount, tweet.retweetCount,  tweet.quoteCount, tweet.replyCount])
        
df = pd.DataFrame(tweets, columns=['Date', 'TweetURL','User', 'Source', 'Location', 'Tweet', 'Likes_Count','Retweet_Count', 
                                   'Quote_Count', 'Reply_Count'])

# read dataframe to csv
df.to_csv('30DLTweets.csv')

## Data Assessment

### Visual Assessment

In [54]:
# read the csv file into a pandas dataframe
data = pd.read_csv('30DLTweets.csv')

# first five hashtag tweets
data

Unnamed: 0.1,Unnamed: 0,Date,TweetURL,User,Source,Location,Tweet,Likes_Count,Retweet_Count,Quote_Count,Reply_Count
0,0,2022-06-30 23:41:31+00:00,https://twitter.com/Oloko_Burn_Fire/status/154...,Oloko_Burn_Fire,Twitter for Android,school of Life,**********July 2022**********\n- Tailwind CSS‼...,5,2,1,2
1,1,2022-06-30 23:02:00+00:00,https://twitter.com/bossking_uee/status/154264...,bossking_uee,Twitter for iPhone,"Lagos, Nigeria","You quit, you lose. You keep going, you you ma...",0,0,0,0
2,2,2022-06-30 22:49:03+00:00,https://twitter.com/bossking_uee/status/154264...,bossking_uee,Twitter for iPhone,"Lagos, Nigeria",Data Analysis\n#30daysoflearning,0,0,0,0
3,3,2022-06-30 22:44:02+00:00,https://twitter.com/Fabulousnani01/status/1542...,Fabulousnani01,Twitter for Android,,Tomorrow's Session on #30daysoflearning is on ...,2,0,0,0
4,4,2022-06-30 21:49:16+00:00,https://twitter.com/debunmiT/status/1542626348...,debunmiT,Twitter for Android,,To everyone involved in this I want to say a b...,136,8,0,15
...,...,...,...,...,...,...,...,...,...,...,...
785,785,2022-05-10 17:30:02+00:00,https://twitter.com/TheOyinbooke/status/152407...,TheOyinbooke,Twitter for Android,Global,I love playing football but you know what is m...,27,9,0,1
786,786,2022-05-10 08:26:54+00:00,https://twitter.com/LibraryImpactNG/status/152...,LibraryImpactNG,Twitter for Android,"Lagos, Nigeria.",Why not join the #NG30daysofLearning while wai...,0,0,0,0
787,787,2022-05-09 17:11:34+00:00,https://twitter.com/TheOyinbooke/status/152371...,TheOyinbooke,Twitter for Android,Global,"Since I read the ASUU Strike announcement, I h...",137,40,1,9
788,788,2022-05-09 16:00:25+00:00,https://twitter.com/TheOyinbooke/status/152369...,TheOyinbooke,Twitter for Android,Global,"April 2022, @DavidAbu_, @japhletnwamu, @Cinnam...",88,47,3,6


### Programmatic Assessment

In [55]:
data.shape

(790, 11)

In [56]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 790 entries, 0 to 789
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Unnamed: 0     790 non-null    int64 
 1   Date           790 non-null    object
 2   TweetURL       790 non-null    object
 3   User           790 non-null    object
 4   Source         790 non-null    object
 5   Location       627 non-null    object
 6   Tweet          790 non-null    object
 7   Likes_Count    790 non-null    int64 
 8   Retweet_Count  790 non-null    int64 
 9   Quote_Count    790 non-null    int64 
 10  Reply_Count    790 non-null    int64 
dtypes: int64(5), object(6)
memory usage: 68.0+ KB


In [57]:
data.nunique()

Unnamed: 0       790
Date             789
TweetURL         790
User             222
Source             6
Location          93
Tweet            787
Likes_Count      108
Retweet_Count     44
Quote_Count        8
Reply_Count       20
dtype: int64

In [58]:
# statistical summary
data.describe()

Unnamed: 0.1,Unnamed: 0,Likes_Count,Retweet_Count,Quote_Count,Reply_Count
count,790.0,790.0,790.0,790.0,790.0
mean,394.5,17.55443,3.924051,0.192405,1.264557
std,228.197648,48.8594,12.994756,0.750795,2.999727
min,0.0,0.0,0.0,0.0,0.0
25%,197.25,1.0,0.0,0.0,0.0
50%,394.5,2.0,0.0,0.0,0.0
75%,591.75,10.0,2.0,0.0,1.0
max,789.0,604.0,272.0,10.0,38.0


In [59]:
data.isnull().sum()

Unnamed: 0         0
Date               0
TweetURL           0
User               0
Source             0
Location         163
Tweet              0
Likes_Count        0
Retweet_Count      0
Quote_Count        0
Reply_Count        0
dtype: int64

In [60]:
data.duplicated().sum()

0

## Data Preprocessing

In [61]:
# define a function to perform sentiment analysis on a single tweet
def analyze(tweet):
    blob = TextBlob(tweet)
    return blob.sentiment.polarity

# apply the analyze function to each tweet in the dataframe and store the results in a new column
data['sentiment_score'] = data['Tweet'].apply(analyze)

In [62]:
# output the results
data.sample(5)

Unnamed: 0.1,Unnamed: 0,Date,TweetURL,User,Source,Location,Tweet,Likes_Count,Retweet_Count,Quote_Count,Reply_Count,sentiment_score
569,569,2022-06-12 19:21:00+00:00,https://twitter.com/_JosiahAdesola/status/1536...,_JosiahAdesola,Twitter Web App,"Lagos, Nigeria","Access to Microsoft 365 full package, freebies...",8,0,0,0,0.2875
109,109,2022-06-26 21:19:03+00:00,https://twitter.com/TheOyinbooke/status/154116...,TheOyinbooke,Twitter for Android,Global,"I know you are not sleeping so, take 5secs to ...",45,13,0,4,0.5125
721,721,2022-06-08 10:53:31+00:00,https://twitter.com/d_adetunji/status/15344887...,d_adetunji,Twitter Web App,"Ibadan, Nigeria",It was a great session yesterday with @TheOyin...,6,0,0,0,0.8
381,381,2022-06-17 09:15:17+00:00,https://twitter.com/Phatergh/status/1537725560...,Phatergh,Twitter for Android,Metaverse,#30daysoflearning \n#DataAnalytics \nRecap of ...,6,1,0,1,0.30625
404,404,2022-06-16 11:13:50+00:00,https://twitter.com/ZionDoz/status/15373930039...,ZionDoz,Twitter for Android,🌏,Day 1 of #NG30DaysOfLearning\n-Start with Valu...,12,3,0,1,0.0


In [63]:
# define a function to group score based sentiment
def grade(score):
    if score > 0:
        return 'Positive'
    elif score < 0:
        return 'Negative'
    else:
        return 'Neutral'

# apply the grade function to the sentiment_score column and store the results in a new column
data['sentiment'] = data['sentiment_score'].apply(grade)

In [64]:
# output the results
data.sample(5)

Unnamed: 0.1,Unnamed: 0,Date,TweetURL,User,Source,Location,Tweet,Likes_Count,Retweet_Count,Quote_Count,Reply_Count,sentiment_score,sentiment
212,212,2022-06-22 20:44:44+00:00,https://twitter.com/ladun331/status/1539711002...,ladun331,Twitter for iPhone,"Lagos, Nigeria",I created this dashboard in powerBI to show th...,6,2,0,3,0.0,Neutral
34,34,2022-06-29 15:57:34+00:00,https://twitter.com/unstoppable_tp/status/1542...,unstoppable_tp,Twitter for Android,"Ekiti, Nigeria",@TheOyinbooke #NG30DaysOfLearning \n#30daysofl...,0,0,0,0,0.0,Neutral
12,12,2022-06-30 17:06:02+00:00,https://twitter.com/ruc_thakkar/status/1542555...,ruc_thakkar,Twitter Web App,"Ahmadabad City, India",Day 12 of #30daysofcode\n\nCompleted remaining...,5,6,0,1,0.0,Neutral
35,35,2022-06-29 14:49:02+00:00,https://twitter.com/TheOyinbooke/status/154215...,TheOyinbooke,Twitter for Android,Global,Unsupervised Machine Learning in Action💻 🔥\nCl...,24,2,0,1,-0.4,Negative
263,263,2022-06-21 13:21:49+00:00,https://twitter.com/KingOlisa/status/153923715...,KingOlisa,Twitter for Android,"Lagos, Nigeria",Week 2 Built a dashboard on Excel analysing CO...,1,1,0,0,0.0,Neutral


In [65]:
data.to_csv('30DLTweets.csv')