## Understanding Tweepy

In [1]:
# to interact with Twitter API
import tweepy
# for Authenticaion Keys
from os import getenv

In [2]:
# Load Environment Variables
TWITTER_API_KEY = getenv('TWITTER_API_KEY')
TWITTER_API_KEY_SECRET = getenv('TWITTER_API_KEY_SECRET')

In [3]:
auth = tweepy.OAuthHandler(TWITTER_API_KEY, TWITTER_API_KEY_SECRET)
TWITTER = tweepy.API(auth)
elon = TWITTER.get_user('elonmusk')

In [4]:
dir(elon)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_api',
 '_json',
 'contributors_enabled',
 'created_at',
 'default_profile',
 'default_profile_image',
 'description',
 'entities',
 'favourites_count',
 'follow',
 'follow_request_sent',
 'followers',
 'followers_count',
 'followers_ids',
 'following',
 'friends',
 'friends_count',
 'geo_enabled',
 'has_extended_profile',
 'id',
 'id_str',
 'is_translation_enabled',
 'is_translator',
 'lang',
 'listed_count',
 'lists',
 'lists_memberships',
 'lists_subscriptions',
 'location',
 'name',
 'notifications',
 'parse',
 'parse_list',
 'profile_background_color',
 'profile_background_image_url',
 'profile_back

In [5]:
dir(elon.status)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_api',
 '_json',
 'contributors',
 'coordinates',
 'created_at',
 'destroy',
 'entities',
 'favorite',
 'favorite_count',
 'favorited',
 'geo',
 'id',
 'id_str',
 'in_reply_to_screen_name',
 'in_reply_to_status_id',
 'in_reply_to_status_id_str',
 'in_reply_to_user_id',
 'in_reply_to_user_id_str',
 'is_quote_status',
 'lang',
 'parse',
 'parse_list',
 'place',
 'retweet',
 'retweet_count',
 'retweeted',
 'retweeted_status',
 'retweets',
 'source',
 'source_url',
 'text',
 'truncated']

In [6]:
elon.timeline()[0].text

"RT @Tesla: There's no place like home..especially if it has a Solar Roof.\n\nThis Kansas ranch got theirs installed in 4 days."

In [7]:
elon.timeline()[0].in_reply_to_screen_name

In [8]:
elon.timeline()[0].retweeted

False

In [9]:
elon.timeline()[0].created_at

datetime.datetime(2020, 10, 17, 6, 20, 21)

In [10]:
# How to Get the Text of a Tweet
elon.timeline(
            count=200, exclude_replies=True, include_rts=False,
            tweet_mode='extended', since_id=None)[0].full_text

'The gauntlet has been thrown down! \n\nThe prophecy will be fulfilled. \n\nModel S price changes to $69,420 tonight!'

## Understanding the Analysis

In [11]:
"""SQLAlchemy models and utility functions for TwitOff."""
from flask_sqlalchemy import SQLAlchemy

DB = SQLAlchemy()


class User(DB.Model):
    """Twitter users corresponding to Tweets in the Tweet table."""
    id = DB.Column(DB.BigInteger, primary_key=True)
    name = DB.Column(DB.String(15), nullable=False)
    # Tweet IDs are ordinal ints, so can be used to fetch only more recent
    newest_tweet_id = DB.Column(DB.BigInteger)

    def __repr__(self):
        return '<User {}>'.format(self.name)


class Tweet(DB.Model):
    """Tweets and their embeddings from Basilica."""
    id = DB.Column(DB.BigInteger, primary_key=True)
    text = DB.Column(DB.Unicode(300))  # Allowing for full + links
    embedding = DB.Column(DB.PickleType, nullable=False)
    user_id = DB.Column(DB.BigInteger, DB.ForeignKey('user.id'), nullable=False)
    user = DB.relationship('User', backref=DB.backref('tweets', lazy=True))

    def __repr__(self):
        return '<Tweet {}>'.format(self.text)

In [21]:
import spacy
nlp = spacy.load('en_core_web_md')

In [30]:
# Example of Embedding
example_tweet = elon.timeline(
            count=200, exclude_replies=True, include_rts=False,
            tweet_mode='extended', since_id=None)[0].full_text
print(example_tweet)
print("Characters in Tweet:", len(example_tweet), "\n")

vector = nlp(example_tweet).vector
print('Shape', vector.shape)
print(vector[:30])

The gauntlet has been thrown down! 

The prophecy will be fulfilled. 

Model S price changes to $69,420 tonight!
Characters in Tweet: 112 

Shape (300,)
[-9.4506284e-03  1.5336229e-01 -2.5372095e-03 -9.5228769e-02
 -2.7187144e-02 -1.0545822e-01 -1.6963283e-03  4.8190664e-02
 -2.1860823e-02  1.8075575e+00 -1.1649820e-01  1.1165074e-01
  5.4758918e-02 -1.8347213e-02 -4.3208938e-02 -3.8607262e-02
 -8.6710207e-02  9.1198415e-01 -1.3321900e-01 -2.7166331e-02
 -2.6441609e-02 -4.9684543e-02 -2.8614223e-02  5.8328699e-02
 -6.8295249e-03  1.0789394e-01 -8.1784010e-02  2.0494087e-02
 -2.6310230e-02  3.7932806e-02]


In [31]:
# Example of Embedding
roomy = elon.timeline(
            count=200, exclude_replies=True, include_rts=False,
            tweet_mode='extended', since_id=None)[1].full_text
print(roomy_tweet)
print("Characters in Tweet:", len(roomy_tweet), "\n")

vector = nlp(example_tweet).vector
print('Shape', vector.shape)
print(vector[:30])

Will be less roomy with 3 vacuum rocket engines added https://t.co/pKtDFdiZYC
Characters in Tweet: 77 

Shape (300,)
[-0.08404455  0.11927336 -0.13547546 -0.02787445 -0.08094203 -0.10540713
 -0.14473881 -0.14430866  0.16513301  1.3458518   0.06666209 -0.10184363
 -0.09117419  0.0021664  -0.27958182 -0.15109317 -0.01114936  1.34824
 -0.34028542  0.05826197 -0.13286272  0.01760194 -0.02729966 -0.03167208
 -0.09737719  0.09402583 -0.15103136 -0.048516    0.1156179   0.03222555]


In [32]:
# Example of Embedding
example_tweet = elon.timeline(
            count=200, exclude_replies=True, include_rts=False,
            tweet_mode='extended', since_id=None)[8].full_text
print(example_tweet)
print("Characters in Tweet:", len(example_tweet), "\n")

vector = nlp(example_tweet).vector
print('Shape', vector.shape)
print(vector[:30])

Rewatched Young Frankenstein this weekend. Still awesome. Ovaltine? https://t.co/WiMdyFSuiq
Characters in Tweet: 91 

Shape (300,)
[ 0.04336233  0.08990633 -0.07694017 -0.12693374 -0.01657242  0.03619684
  0.0665415  -0.09073585  0.05467108  1.3778566  -0.2515944   0.11460087
  0.08428337  0.05942682  0.00580774 -0.0925743   0.0160935   0.5512089
 -0.02123524 -0.00896758  0.171113   -0.08572566  0.11272017 -0.13973583
 -0.04344182  0.18941484 -0.16934758 -0.06531025  0.12107075  0.063259  ]


In [17]:
def add_or_update_user(username):
    try:
        # Get User from Username from API
        twitter_user = TWITTER.get_user(username)
        # Create a Row in User Table
        db_user = User(id=twitter_user.id, name=username)
        
        # Get Tweets from Username from API
        tweets = twitter_user.timeline(
            count=200, exclude_replies=True, include_rts=False,
            # ?- What happens if db_user does not have a newest_tweet_id? -?
            tweet_mode='extended', since_id=db_user.newest_tweet_id)
        # Get newest_tweet_id
        if tweets:
            db_user.newest_tweet_id = tweets[0].id
        for tweet in tweets:
            embedding = vectorize_tweet(tweet.full_text)
            
            
    except Exception as e:
        print('Error processiing {}: {}'.format(username, e))
        raise e
        
    else:
        print("Commit Changes if in App")
        return db_user
    

In [18]:
mb = add_or_update_user('MorningBrew')

Commit Changes if in App


In [19]:
mb.newest_tweet_id

1317226185118654472

In [33]:
# Example of Embedding
example_tweet = elon.timeline(
            count=200, exclude_replies=True, include_rts=False,
            tweet_mode='extended', since_id=None)[8].full_text
print(example_tweet)
print("Characters in Tweet:", len(example_tweet), "\n")

vector = nlp(example_tweet).vector
print('Shape', vector.shape)
print(vector[:30])

Rewatched Young Frankenstein this weekend. Still awesome. Ovaltine? https://t.co/WiMdyFSuiq
Characters in Tweet: 91 

Shape (300,)
[ 0.04336233  0.08990633 -0.07694017 -0.12693374 -0.01657242  0.03619684
  0.0665415  -0.09073585  0.05467108  1.3778566  -0.2515944   0.11460087
  0.08428337  0.05942682  0.00580774 -0.0925743   0.0160935   0.5512089
 -0.02123524 -0.00896758  0.171113   -0.08572566  0.11272017 -0.13973583
 -0.04344182  0.18941484 -0.16934758 -0.06531025  0.12107075  0.063259  ]


In [None]:
# Vectorizing a phrase/doc ->
# A real-valued meaning representation. 
# Defaults to an average of the token vectors.