# Intro

This notebook pulls the latest tweets from Twitter relating to "apples", "#apples", or "@apples" (excluding retweets) from the last 7 days, applies sentiment analysis using Vader, and updates a running list in Google Sheets.

Maybe you can extend to other types of fruit!

Note that the free tier of the Twitter API only allows access to the last 7 days of data, so suggest running more often than that.

# Code

To run all: Runtime/Cell > Run all

In [None]:
!pip install --upgrade --quiet gspread
!pip install gspread-dataframe
!pip install jsonpickle
!pip install tweepy
!pip install vaderSentiment

In [None]:
import pandas as pd
import tweepy as tw
import json
import jsonpickle

In [None]:
# Step 1 - Authenticate
consumer_key= 'YOUR_KEY'
consumer_secret= 'YOUR_SECRET'

access_token='YOUR_TOKEN'
access_token_secret='YOUR_TOKEN_SECRET'

auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tw.API(auth)

In [None]:
#@title
#(To see fields available on a tweet)
# temp = tw.Cursor(api.search,q="apples",lang='en').items(1)
# for tweet in temp:
#     j = jsonpickle.encode(tweet._json, unpicklable=False)
#     parsed = json.loads(j)
#     print(json.dumps(parsed, indent=4, sort_keys=True))

In [None]:
#@title
# Functions to get/save/load tweets
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from datetime import datetime
import re

def get_save_tweets(filepath, api, queries, max_tweets=1000, lang='en'):
    tweetCount = 0

    # Open file and save tweets as JSON
    with open(filepath, 'w') as f:
        for query in queries:
            for tweet in tw.Cursor(api.search,q=query,lang=lang).items(max_tweets):         
                f.write(jsonpickle.encode(tweet._json, unpicklable=False) + '\n')
                tweetCount += 1
        print("Downloaded {0} tweets".format(tweetCount))
        
def tweets_to_df(path):    
    tweets = list(open(path, 'rt'))
    analyser = SentimentIntensityAnalyzer()
    
    tweet_id = []
    text = []
    sentiment = []
    date = []
    hour = []
    weekday = []
    hashtag = []
    url = []
    favorite = []
    reply = []
    retweet = []
    follower = []
    following = []
    user = []
    screen_name = []

    for t in tweets:
        t = jsonpickle.decode(t)
        
        # Ignore if from a Verizon account
        if 'Verizon' in t['user']['name']:
            continue
        
        # Id
        tweet_id.append(t['id'])
        
        # Text and sentiment (remove url from sentiment score)
        text.append(t['text'])
        trim_text = re.sub(r"http\S+", "", t['text'])
        sentiment.append(analyser.polarity_scores(trim_text)['compound'])
        
        # Decompose weekday, date, and hour
        d = datetime.strptime(t['created_at'], '%a %b %d %H:%M:%S +0000 %Y')
        date.append(d.strftime("%Y-%m-%d"))
        hour.append(d.strftime("%H"))
        weekday.append(d.strftime("%a"))
        
        # Has hashtag
        if len(t['entities']['hashtags']) == 0:
            hashtag.append(0)
        else:
            hashtag.append(1)
            
        # Has url
        if len(t['entities']['urls']) == 0:
            url.append(0)
        else:
            url.append(1)
            
        # Number of favs
        favorite.append(t['favorite_count'])
        
        # Is reply?
        if t['in_reply_to_status_id'] == None:
            reply.append(0)
        else:
            reply.append(1)       
        
        # Retweets count
        retweet.append(t['retweet_count'])
        
        # Followers number
        follower.append(t['user']['followers_count'])
        
        # Following number
        following.append(t['user']['friends_count'])
        
        # Add user
        user.append(t['user']['name'])

        # Add screen name
        screen_name.append(t['user']['screen_name'])
        
    d = {'id': tweet_id,
         'text': text,
         'sentiment': sentiment,
         'date': date,
         'hour' : hour,
         'weekday': weekday,
         'has_hashtag': hashtag,
         'has_url': url,
         'fav_count': favorite,
         'is_reply': reply,
         'retweet_count': retweet,
         'followers': follower,
         'following' : following,
         'user': user,
         'screen_name' : screen_name
        }
    
    return pd.DataFrame(data = d)

In [None]:
# Step 2 - Get apple-related tweets
queries = ['@apples -filter:retweets',
           '"apples" -filter:retweets',
           '#apples -filter:retweets']
get_save_tweets('tweets.json', api, queries)

In [None]:
# Step 3 - Load tweets into data frame
tweets_df = tweets_to_df('tweets.json')
tweets_df.head(1)

In [None]:
# Step 4 - Open Google Sheet and get existing tweets
from google.colab import auth
auth.authenticate_user()

import gspread
from oauth2client.client import GoogleCredentials

SHEET_NAME = "YOUR_SHEET_NAME"
WORKSHEET_NAME = "YOUR_WORKSHEET_NAME"

gc = gspread.authorize(GoogleCredentials.get_application_default())

worksheet = gc.open(SHEET_NAME).worksheet(WORKSHEET_NAME)

rows = worksheet.get_all_values()
existing_df = pd.DataFrame.from_records(rows[1:],columns=rows[0])
existing_df.head(1)

In [None]:
# Step 5 - Identify new tweets (not in existing)
new_tweets_df = tweets_df[~tweets_df['id'].isin(existing_df['id'])]
new_tweets_df

In [None]:
# Step 6 - Write all tweets to the sheet
from gspread_dataframe import set_with_dataframe
all_tweets = existing_df.append(new_tweets_df, sort=False).sort_values(['date','hour'], ascending=[False, False])
set_with_dataframe(worksheet, all_tweets)
print("Done: added %d new tweets to sheet" % len(new_tweets_df))