### Tweets Mining and Analysis notebook
Author: Shahbaz Khan

### library imports

In [1]:
import tweepy
# !pip install textblob
from textblob import TextBlob
# !pip install git+https://github.com/JustAnotherArchivist/snscrape.git
import snscrape.modules.twitter as sntwitter
import pandas as pd
import re
import numpy as np

###### tweeter app api key & access token for tweepy

In [18]:
# enter your own consumer & access token keys pair.
consumer_key = "your_api_key"
consumer_secret = "your_api_key_secret"
access_token = "your_access_token_key"
access_token_secret = "your_access_token_secret"

In [9]:
# OAuthHandler object for authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)

# Sets access token and token secret
auth.set_access_token(access_token, access_token_secret)

# Creates API object by passing auth information
api = tweepy.API(auth)

#### Task a: latest trending topic for India. (#tag and no of tweets for the particular #tag)

In [10]:
# twitter india trends by woeid
india_woeid = 2282863
twitter_trends = api.trends_place(id=india_woeid)

trends_list = []

for trend in twitter_trends[0]['trends']:
    trends_list.append([trend['name'], trend['tweet_volume']])

In [12]:
trends_df = pd.DataFrame(trends_list, columns=['#tags', 'Tweets Count'])
trends_df.to_csv('saves/indian_trends.csv', encoding='utf-8', index=True)
trends_df.head(10)

Unnamed: 0,#tags,Tweets Count
0,#FreedomForTibet,48956.0
1,#WorldRadioDay,
2,#KissDay,
3,Rohit,20936.0
4,siraj,
5,Nightingale of India,
6,HYDERABAD INSPIRED BY SSR,21329.0
7,Bumrah,
8,Olly Stone,
9,Kuldeep,


#### Task b:  Extract first 100 tweets for #JoeBiden

In [13]:
query = '#JoeBiden'
count = 100

tweets_list = []

# querry twitter through sntwitter.TwitterSearchScraper and append each tweet to tweets_list array
for i,tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
    if i > count:
        break
    tweets_list.append([tweet.date, tweet.id, tweet.content, tweet.user.username])

In [14]:
# adds tweets_list to data frame
tweets_df = pd.DataFrame(tweets_list, columns=['Date', 'Tweet Id', 'Text', 'Username'])

# saves tweets in csv file
tweets_df.to_csv('saves/100_biden_tweets.csv', encoding='utf-8', index=True)
tweets_df.head(9)

Unnamed: 0,Date,Tweet Id,Text,Username
0,2021-02-13 05:26:41+00:00,1360460365398740994,@CrossroadsToday He was and is an illegal alie...,WSpiesC
1,2021-02-13 05:26:33+00:00,1360460328354471939,#Colombia #EstadosUnidos #IvánDuque #JoeBiden ...,FlaShBloGLive
2,2021-02-13 05:18:47+00:00,1360458375251644418,Amerika Serikat Akan Membuka Sekolah di Tengah...,BacaDiBaBe
3,2021-02-13 05:17:56+00:00,1360458163590365184,Remember when you could say Trump is not my pr...,down6with6the6
4,2021-02-13 05:17:47+00:00,1360458122163273730,Can someone explain to me how @potus is going ...,shadowreporting
5,2021-02-13 05:16:31+00:00,1360457805979877379,When is #JoeBiden going to fire asshole #TJDuc...,changeillinois
6,2021-02-13 05:16:24+00:00,1360457774002556928,"I want my Fauci ouchie! Vaccinate educators, p...",Amyk5251
7,2021-02-13 05:15:33+00:00,1360457560298557442,#TJDucklo is just one of the many assholes #Jo...,changeillinois
8,2021-02-13 05:14:14+00:00,1360457231330906112,#JoeBiden is released for his ten minute exerc...,chilternbear1


#### Task c: Sentiment Analysis for #JoeBiden (Is it positive/negative, what is the perception)

In [15]:
# Cleans tweet text by removing links, emails, etc
def clean_tweet_text(tweet):
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z\t])|(\w+:\/\/\S+)", " ", tweet).split())

tweets_df['Text'] = np.array([clean_tweet_text(tweet) for tweet in tweets_df['Text']])

# Analyse tweet & score 1 for positive, 0 for neutral & -1 for negative sentiment
def sentiment_analyzer(tweet):
    analysis = TextBlob(tweet)
    
    if analysis.sentiment.polarity > 0:
        return 1
    elif analysis.sentiment.polarity == 0:
        return 0
    else: 
        return -1
    
tweets_df['sentiment'] = np.array([sentiment_analyzer(tweet) for tweet in tweets_df['Text']])

# saves tweets df with sentiment column to csv file
tweets_df.to_csv('saves/biden_tweets_sentiments.csv', encoding='utf-8', index=True)

tweets_df.head(10)

Unnamed: 0,Date,Tweet Id,Text,Username,sentiment
0,2021-02-13 05:26:41+00:00,1360460365398740994,He was and is an illegal alien Shouldn t have ...,WSpiesC,-1
1,2021-02-13 05:26:33+00:00,1360460328354471939,Colombia EstadosUnidos Iv nDuque JoeBiden L de...,FlaShBloGLive,0
2,2021-02-13 05:18:47+00:00,1360458375251644418,Amerika Serikat Akan Membuka Sekolah di Tengah...,BacaDiBaBe,0
3,2021-02-13 05:17:56+00:00,1360458163590365184,Remember when you could say Trump is not my pr...,down6with6the6,1
4,2021-02-13 05:17:47+00:00,1360458122163273730,Can someone explain to me how is going to stan...,shadowreporting,-1
5,2021-02-13 05:16:31+00:00,1360457805979877379,When is JoeBiden going to fire asshole TJDucklo,changeillinois,0
6,2021-02-13 05:16:24+00:00,1360457774002556928,I want my Fauci ouchie Vaccinate educators ple...,Amyk5251,0
7,2021-02-13 05:15:33+00:00,1360457560298557442,TJDucklo is just one of the many assholes JoeB...,changeillinois,1
8,2021-02-13 05:14:14+00:00,1360457231330906112,JoeBiden is released for his ten minute exerci...,chilternbear1,0
9,2021-02-13 05:14:04+00:00,1360457188507086848,JoeBiden can kiss the crack of myblackass unti...,Thotful_Musings,-1


In [17]:
tweets_df['sentiment'].mean()

0.2079207920792079

###### overall sentiments are positive