##### FETCHING TWEETS USING TWEEPY

In [1]:
import tweepy
import pandas as pd
import numpy as np
keys=pd.read_csv('twitter_api_keys.csv')

In [2]:
# Variables that contains the credentials to access Twitter API
access_token = keys['API Keys'][0]
access_secret = keys['API Keys'][1]
consumer_key = keys['API Keys'][2]
consumer_secret = keys['API Keys'][3]

In [3]:
# Create the authentication object
authenticate = tweepy.OAuthHandler(consumer_key,consumer_secret) 
# Set the access token and access token secret
authenticate.set_access_token(access_token,access_secret) 
# Creating the API object while passing in auth information
api = tweepy.API(authenticate, wait_on_rate_limit = True,)

In [4]:
# tweets from a specific user
ny_tweets = api.user_timeline('@nytimes',count=200)
for tweets in ny_tweets:
    print(tweets.text)

South Korean officials questioned the accuracy of news reports that Kim Jong-un was recovering from heart surgery.… https://t.co/c41Zr6GER2
Morning briefing: Here's what you need to know to start your day https://t.co/XrCwNBRAH5
For today, April 21, here’s how to follow the latest on the coronavirus outbreak. We’ll update this every day. 

🌎… https://t.co/NdVFfWC0Z8
Educators in the U.S. say they want to teach from home during the coronavirus pandemic, but are fighting to limit w… https://t.co/r1auvzyrQ7
At least 26,000 more people died during the coronavirus pandemic over the last month than official death counts rep… https://t.co/8sez8zHiCS
President Trump portrayed suspending immigration as a bid to save Americans’ jobs amid the coronavirus pandemic. Im… https://t.co/l9mfD5L8LJ
In January, at a restaurant in Guangzhou, China, an infected diner who was not yet feeling sick appeared to have sp… https://t.co/03vm00t6K3
A letter to the editors of four British tabloids said that Harry an

In [5]:
# fuction to extract data from tweet object
def extract_tweet(tweet_object):
    # create empty list
    tweet_list =[]
    # loop through tweet objects
    for tweet in tweet_object:
        tweet_id = tweet.id # unique integer identifier for tweet
        text = tweet.text # utf-8 text of tweet
        created_at = tweet.created_at # utc time tweet created
        source = tweet.source # utility used to post tweet
        retweets = tweet.retweet_count # number of times this tweet retweeted
        favorites = tweet.favorite_count # number of time this tweet liked
        # append attributes to list
        tweet_list.append({'tweet_id':tweet_id, 
                          'text':text, 
                          'time':created_at, 
                          'source':source, 
                          'retweets':retweets,
                          'favorites':favorites})
    # create dataframe   
    df = pd.DataFrame(tweet_list, columns=['tweet_id','text','time','source','retweets','favorites'])
    return df


df = extract_tweet(ny_tweets)

In [6]:
df

Unnamed: 0,tweet_id,text,time,source,retweets,favorites
0,1252567790713810944,South Korean officials questioned the accuracy...,2020-04-21 12:00:07,SocialFlow,59,108
1,1252562739966480384,Morning briefing: Here's what you need to know...,2020-04-21 11:40:03,SocialFlow,53,142
2,1252559439976071169,"For today, April 21, here’s how to follow the ...",2020-04-21 11:26:56,Twitter Web App,27,47
3,1252557721741778946,Educators in the U.S. say they want to teach f...,2020-04-21 11:20:06,SocialFlow,100,303
4,1252552683954847745,"At least 26,000 more people died during the co...",2020-04-21 11:00:05,SocialFlow,274,381
...,...,...,...,...,...,...
195,1251938334093475844,The FDA eased its rules and allowed antibody t...,2020-04-19 18:18:53,Twitter Web App,17,21
196,1251938332034191371,Many scientists and political leaders say the ...,2020-04-19 18:18:52,Twitter Web App,23,46
197,1251938330318692352,Our reporters spoke to dozens of people for th...,2020-04-19 18:18:52,Twitter Web App,20,44
198,1251938328548704257,Blood tests for coronavirus antibodies are see...,2020-04-19 18:18:52,Twitter Web App,193,405


In [7]:
df['text']

0      South Korean officials questioned the accuracy...
1      Morning briefing: Here's what you need to know...
2      For today, April 21, here’s how to follow the ...
3      Educators in the U.S. say they want to teach f...
4      At least 26,000 more people died during the co...
                             ...                        
195    The FDA eased its rules and allowed antibody t...
196    Many scientists and political leaders say the ...
197    Our reporters spoke to dozens of people for th...
198    Blood tests for coronavirus antibodies are see...
199    North Korea denied President Trump’s claim tha...
Name: text, Length: 200, dtype: object

###### REMOVING THE USER HANDLES

In [8]:
# removing user handle 
df['tidy_tweet'] = df['text'].replace(to_replace ='(@[\w]+)', value ='', regex = True) 
df['tidy_tweet'] = df['tidy_tweet'].replace(to_replace =('RT'), value ='',regex = True) 
df['tidy_tweet']

0      South Korean officials questioned the accuracy...
1      Morning briefing: Here's what you need to know...
2      For today, April 21, here’s how to follow the ...
3      Educators in the U.S. say they want to teach f...
4      At least 26,000 more people died during the co...
                             ...                        
195    The FDA eased its rules and allowed antibody t...
196    Many scientists and political leaders say the ...
197    Our reporters spoke to dozens of people for th...
198    Blood tests for coronavirus antibodies are see...
199    North Korea denied President Trump’s claim tha...
Name: tidy_tweet, Length: 200, dtype: object

##### REMOVE THE LINKS FROM THE TWEETS

In [9]:
#remove any links from the tweet: Links not required for performing sentiment analysis.
df['tidy_tweet'] = df['tidy_tweet'].str.replace('((www\.[\s]+)|(https?://[^\s]+))','\0',regex=True)
df['tidy_tweet']

0      South Korean officials questioned the accuracy...
1      Morning briefing: Here's what you need to know...
2      For today, April 21, here’s how to follow the ...
3      Educators in the U.S. say they want to teach f...
4      At least 26,000 more people died during the co...
                             ...                        
195    The FDA eased its rules and allowed antibody t...
196    Many scientists and political leaders say the ...
197    Our reporters spoke to dozens of people for th...
198    Blood tests for coronavirus antibodies are see...
199    North Korea denied President Trump’s claim tha...
Name: tidy_tweet, Length: 200, dtype: object

##### REMOVE SPECIAL CHARACTERS,PUNCTUATION,NUMBERS

In [10]:
# remove special characters, numbers, punctuations: None of them would add any value to the sentiment score.
df['tidy_tweet'] = df['tidy_tweet'].str.replace("[^a-zA-Z]+", " ")

In [11]:
df['tidy_tweet']

0      South Korean officials questioned the accuracy...
1      Morning briefing Here s what you need to know ...
2      For today April here s how to follow the lates...
3      Educators in the U S say they want to teach fr...
4      At least more people died during the coronavir...
                             ...                        
195    The FDA eased its rules and allowed antibody t...
196    Many scientists and political leaders say the ...
197    Our reporters spoke to dozens of people for th...
198    Blood tests for coronavirus antibodies are see...
199    North Korea denied President Trump s claim tha...
Name: tidy_tweet, Length: 200, dtype: object

##### TOKENIZING AND REMOVE THE STOP WORDS

In [12]:
df["tidy_tweet"] = df["tidy_tweet"].str.lower()
df["tidy_tweet"] = df["tidy_tweet"].str.split()
from nltk.corpus import stopwords

stop = stopwords.words('english')

In [13]:
df['tidy_tweet']=df['tidy_tweet'].apply(lambda x: [item for item in x if item not in stop])
df['tidy_tweet']

0      [south, korean, officials, questioned, accurac...
1            [morning, briefing, need, know, start, day]
2      [today, april, follow, latest, coronavirus, ou...
3      [educators, u, say, want, teach, home, coronav...
4      [least, people, died, coronavirus, pandemic, l...
                             ...                        
195    [fda, eased, rules, allowed, antibody, tests, ...
196    [many, scientists, political, leaders, say, co...
197    [reporters, spoke, dozens, people, article, in...
198    [blood, tests, coronavirus, antibodies, seen, ...
199    [north, korea, denied, president, trump, claim...
Name: tidy_tweet, Length: 200, dtype: object

In [14]:
def rejoin_words(row):
    my_list = row['tidy_tweet']
    joined_words = ( " ".join(my_list))
    return joined_words

df['tidy_tweet'] = df.apply(rejoin_words, axis=1)


In [15]:
df['tidy_tweet']

0      south korean officials questioned accuracy new...
1                   morning briefing need know start day
2      today april follow latest coronavirus outbreak...
3      educators u say want teach home coronavirus pa...
4      least people died coronavirus pandemic last mo...
                             ...                        
195    fda eased rules allowed antibody tests many ma...
196    many scientists political leaders say country ...
197    reporters spoke dozens people article includin...
198    blood tests coronavirus antibodies seen crucia...
199    north korea denied president trump claim leade...
Name: tidy_tweet, Length: 200, dtype: object

##### LEMMATIZATION

In [16]:
import nltk
w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
lemmatizer = nltk.stem.WordNetLemmatizer()

def lemmatize_text(text):
    return [lemmatizer.lemmatize(w) for w in w_tokenizer.tokenize(text)]

df['tidy_tweet'] = df['tidy_tweet'].apply(lemmatize_text)

In [17]:
df['tidy_tweet']

0      [south, korean, official, questioned, accuracy...
1            [morning, briefing, need, know, start, day]
2      [today, april, follow, latest, coronavirus, ou...
3      [educator, u, say, want, teach, home, coronavi...
4      [least, people, died, coronavirus, pandemic, l...
                             ...                        
195    [fda, eased, rule, allowed, antibody, test, ma...
196    [many, scientist, political, leader, say, coun...
197    [reporter, spoke, dozen, people, article, incl...
198    [blood, test, coronavirus, antibody, seen, cru...
199    [north, korea, denied, president, trump, claim...
Name: tidy_tweet, Length: 200, dtype: object

In [18]:
def rejoin_words(row):
    my_list = row['tidy_tweet']
    joined_words = ( " ".join(my_list))
    return joined_words

df['tidy_tweet'] = df.apply(rejoin_words, axis=1)

In [19]:
df['tidy_tweet']

0      south korean official questioned accuracy news...
1                   morning briefing need know start day
2      today april follow latest coronavirus outbreak...
3      educator u say want teach home coronavirus pan...
4      least people died coronavirus pandemic last mo...
                             ...                        
195    fda eased rule allowed antibody test many made...
196    many scientist political leader say country no...
197    reporter spoke dozen people article including ...
198    blood test coronavirus antibody seen crucial a...
199    north korea denied president trump claim leade...
Name: tidy_tweet, Length: 200, dtype: object

In [20]:
df.isnull().sum()

tweet_id      0
text          0
time          0
source        0
retweets      0
favorites     0
tidy_tweet    0
dtype: int64

##### CHECK THE SENTIMENT USING TEXTBLOB 

In [21]:
from textblob import TextBlob
df[['polarity', 'subjectivity']] = df['tidy_tweet'].apply(lambda Text: pd.Series(TextBlob(Text).sentiment))

In [22]:
df[['text','polarity', 'subjectivity']]

Unnamed: 0,text,polarity,subjectivity
0,South Korean officials questioned the accuracy...,0.000000,0.000000
1,Morning briefing: Here's what you need to know...,0.000000,0.000000
2,"For today, April 21, here’s how to follow the ...",0.500000,0.900000
3,Educators in the U.S. say they want to teach f...,0.000000,0.000000
4,"At least 26,000 more people died during the co...",-0.150000,0.233333
...,...,...,...
195,The FDA eased its rules and allowed antibody t...,0.500000,0.500000
196,Many scientists and political leaders say the ...,0.166667,0.366667
197,Our reporters spoke to dozens of people for th...,0.000000,0.000000
198,Blood tests for coronavirus antibodies are see...,0.000000,1.000000


In [23]:
single_tweet=df['text'][100]
tidy_tweet1=df['tidy_tweet'][100]

In [25]:
from textblob.sentiments import NaiveBayesAnalyzer
analysis = TextBlob(tidy_tweet1,analyzer=NaiveBayesAnalyzer())
print(single_tweet)
sentiment=analysis.sentiment[0]
if sentiment == 'pos':
    print('Positive Sentiment')
else :
    print('Negative Sentiment')


Do environmental rules matter? “As a reporter in Washington for more than 20 years, I’ve had a front-row seat to th… https://t.co/mOu6rdx6W6
Negative Sentiment
