In [1]:
import tweepy
import json
import requests
import datetime as dt
from nltk.corpus import stopwords # stopword examples, 'its', 'on', 'the', etc <---- will be helpful later
# most pythonistas will rename pandas as pd and numpy as np for short (you don't have to)
import pandas as pd
import numpy as np

In [2]:
# open file with keys and set the path to your credentials JSON file
# see example.json for formatting
# you'll need to replace my file with yours
credentials = "keys.json"
with open(credentials, "r") as keys:
    api_tokens = json.load(keys)

In [3]:
# read the keys and assign each to a variable
bearer_token = api_tokens["bearer_token"]
api_key = api_tokens["api_key"]
api_secret = api_tokens["api_secret"]
access_token = api_tokens["access_token"]
access_secret = api_tokens["access_secret"]

In [4]:
client = tweepy.Client(
    bearer_token=bearer_token,
    consumer_key=api_key,
    consumer_secret=api_secret,
    access_token=access_token,
    access_token_secret=access_secret,
    return_type = requests.Response
)

Let's look up the 100 most recent tweets using #London
<br/>We're going to use the search_recent_tweets() function and 5 parameters, they are:
<br/><b>q: </b>Short for query, <a href="https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query">learn more about building queries here</a>
<br/><b>max_results: </b>The maximum number of search results to be returned by a request. A number between 10 and 100. By default, a request response will return 10 results.
<br/><b>tweet_fields: </b><a href="https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/tweet">See all tweet fields here</a>
<br/><b>user_fields: </b><a href="https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/user">See all user fields here</a>
<br/><b>expansions: </b>This field will allow us to include the user_field values. <a href="https://docs.tweepy.org/en/latest/expansions_and_fields.html">Learn more about expansions here</a>

In [5]:
tweets = client.search_recent_tweets(
    query = "#WPP -is:retweet",  # searches for #London while ignoring retweets
    max_results = 100,
    tweet_fields = ['author_id', 'created_at', 'text', 'source', 'lang', 'geo'],
    user_fields = ['name','username','location','verified'],
    expansions = 'author_id'
)

In [6]:
tweets_dict = tweets.json()

In [7]:
tweets_dict

{'data': [{'id': '1565738244372803585',
   'source': 'Tickeron',
   'text': '$WPP enters a Downtrend as Momentum Indicator dropped below the 0 level on August 5, 2022. View odds for this and other indicators: https://t.co/gvq6YHFMzw #WPP https://t.co/a9MufN6mLe',
   'created_at': '2022-09-02T16:27:42.000Z',
   'lang': 'en',
   'author_id': '374729426'},
  {'id': '1565514952877686784',
   'source': 'Twitter Web App',
   'text': '💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members for making huge gains with Trade Ideas $WPP #WPP \n\nhttps://t.co/rEobNg7QQw',
   'created_at': '2022-09-02T01:40:25.000Z',
   'lang': 'en',
   'author_id': '1434909340205465605'},
  {'id': '1565476711071973378',
   'source': 'Twitter for iPhone',
   'text': '@GiDavidswright Ay yo las amo, tengo botas y pilotos de cuando viví en 🇬🇧 y odio no poder usarlas tanto acá 😒 #wpp del año. Ya lo sé',
   'created_at': '2022-09-01T23:08:27.000Z',
   'lang': 'es',
   'author_id': '1260946502714904576'},
  {'id': '156539448214458

In [8]:
# extract 'data' from dictionary, this will exclude the metadata
tweets_data = tweets_dict['data']
tweets_users = tweets_dict['includes']['users']

In [9]:
tweets_data

[{'id': '1565738244372803585',
  'source': 'Tickeron',
  'text': '$WPP enters a Downtrend as Momentum Indicator dropped below the 0 level on August 5, 2022. View odds for this and other indicators: https://t.co/gvq6YHFMzw #WPP https://t.co/a9MufN6mLe',
  'created_at': '2022-09-02T16:27:42.000Z',
  'lang': 'en',
  'author_id': '374729426'},
 {'id': '1565514952877686784',
  'source': 'Twitter Web App',
  'text': '💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members for making huge gains with Trade Ideas $WPP #WPP \n\nhttps://t.co/rEobNg7QQw',
  'created_at': '2022-09-02T01:40:25.000Z',
  'lang': 'en',
  'author_id': '1434909340205465605'},
 {'id': '1565476711071973378',
  'source': 'Twitter for iPhone',
  'text': '@GiDavidswright Ay yo las amo, tengo botas y pilotos de cuando viví en 🇬🇧 y odio no poder usarlas tanto acá 😒 #wpp del año. Ya lo sé',
  'created_at': '2022-09-01T23:08:27.000Z',
  'lang': 'es',
  'author_id': '1260946502714904576'},
 {'id': '1565394482144587776',
  'source': 'Tickero

In [10]:
tweets_users

[{'id': '374729426',
  'verified': False,
  'username': 'Tickeron',
  'location': 'Sunnyvale, CA',
  'name': 'Tickeron'},
 {'id': '1434909340205465605',
  'verified': False,
  'username': 'SuccessAfter25',
  'name': 'Trader PLUS'},
 {'id': '1260946502714904576',
  'verified': False,
  'username': 'lahannadom',
  'location': 'Buenos Aires, Argentina',
  'name': 'Anto'},
 {'id': '1171873456276889600',
  'verified': False,
  'username': 'investor_feed',
  'location': 'Toronto, Canada',
  'name': 'Docoh Feed'},
 {'id': '799226790065508352',
  'verified': False,
  'username': 'MarTechSeries',
  'location': 'Hawthorne, CA',
  'name': 'MarTech Series'},
 {'id': '1340919529166221313',
  'verified': False,
  'username': 'TZokani',
  'name': 'Tarek Zokani'},
 {'id': '114739070',
  'verified': False,
  'username': 'PdelaPublicidad',
  'location': 'Madrid, Spain',
  'name': 'Periódico PublicidAD'},
 {'id': '1068561714055012352',
  'verified': False,
  'username': 'EdgarInsider',
  'name': 'SEC.rep

In [11]:
print(len(tweets_data))
print(len(tweets_users))

52
32


In [12]:
# the different numbers let's us know some users did multiple tweets with #WPP
# transform to pandas dataframe
df_data = pd.json_normalize(tweets_data)
df_users = pd.json_normalize(tweets_users)

In [13]:
df_data

Unnamed: 0,id,source,text,created_at,lang,author_id,geo.place_id
0,1565738244372803585,Tickeron,$WPP enters a Downtrend as Momentum Indicator ...,2022-09-02T16:27:42.000Z,en,374729426,
1,1565514952877686784,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,2022-09-02T01:40:25.000Z,en,1434909340205465605,
2,1565476711071973378,Twitter for iPhone,"@GiDavidswright Ay yo las amo, tengo botas y p...",2022-09-01T23:08:27.000Z,es,1260946502714904576,
3,1565394482144587776,Tickeron,$WPP's in Downtrend: Moving Average Convergenc...,2022-09-01T17:41:42.000Z,en,374729426,
4,1565331598324940802,Investor Feed,"$WPP 6-K WPP (1 Sep, 9:30am)\n\n📰 Total Vo...",2022-09-01T13:31:50.000Z,en,1171873456276889600,
5,1565326212293091329,Hootsuite Inc.,@WPP Acquires Ecommerce Consultancy Newcraft h...,2022-09-01T13:10:26.000Z,en,799226790065508352,
6,1565311917236912130,Twitter for Android,@JLD_200 #WPP\nhttps://t.co/cM0OVS2RtT,2022-09-01T12:13:37.000Z,qme,1340919529166221313,
7,1565291466947182598,Twitter Web App,#WPP anuncia la adquisición de Newcraft\nhttps...,2022-09-01T10:52:22.000Z,es,114739070,
8,1565283732222644224,Edgar Filings,SEC Form 6-K filed by $WPP $WPPGF #WPP plc ...,2022-09-01T10:21:37.000Z,en,1068561714055012352,
9,1565281015643492352,Investor Feed,"$WPP 6-K WPP (1 Sep, 6:08am)\n\n📰 Current ...",2022-09-01T10:10:50.000Z,en,1171873456276889600,


In [14]:
df_users

Unnamed: 0,id,verified,username,location,name
0,374729426,False,Tickeron,"Sunnyvale, CA",Tickeron
1,1434909340205465605,False,SuccessAfter25,,Trader PLUS
2,1260946502714904576,False,lahannadom,"Buenos Aires, Argentina",Anto
3,1171873456276889600,False,investor_feed,"Toronto, Canada",Docoh Feed
4,799226790065508352,False,MarTechSeries,"Hawthorne, CA",MarTech Series
5,1340919529166221313,False,TZokani,,Tarek Zokani
6,114739070,False,PdelaPublicidad,"Madrid, Spain",Periódico PublicidAD
7,1068561714055012352,False,EdgarInsider,,SEC.report
8,54319874,True,WunThompson,Global,Wunderman Thompson
9,22779605,False,proactive_UK,"London, England",Proactive


In [15]:
# I want to merge these two data frames together. 
# I can see author_id in my data dataframe, and id in my users dataframe is what connect the two
# let's make sure both columns use 'author_id' so pandas can merge the two
df_users.rename(columns={'id': 'author_id'}, inplace=True)
df_users

Unnamed: 0,author_id,verified,username,location,name
0,374729426,False,Tickeron,"Sunnyvale, CA",Tickeron
1,1434909340205465605,False,SuccessAfter25,,Trader PLUS
2,1260946502714904576,False,lahannadom,"Buenos Aires, Argentina",Anto
3,1171873456276889600,False,investor_feed,"Toronto, Canada",Docoh Feed
4,799226790065508352,False,MarTechSeries,"Hawthorne, CA",MarTech Series
5,1340919529166221313,False,TZokani,,Tarek Zokani
6,114739070,False,PdelaPublicidad,"Madrid, Spain",Periódico PublicidAD
7,1068561714055012352,False,EdgarInsider,,SEC.report
8,54319874,True,WunThompson,Global,Wunderman Thompson
9,22779605,False,proactive_UK,"London, England",Proactive


In [16]:
# Now I can merge our two DataFrames
df_merged = df_data.merge(df_users, on='author_id')
df_merged

Unnamed: 0,id,source,text,created_at,lang,author_id,geo.place_id,verified,username,location,name
0,1565738244372803585,Tickeron,$WPP enters a Downtrend as Momentum Indicator ...,2022-09-02T16:27:42.000Z,en,374729426,,False,Tickeron,"Sunnyvale, CA",Tickeron
1,1565394482144587776,Tickeron,$WPP's in Downtrend: Moving Average Convergenc...,2022-09-01T17:41:42.000Z,en,374729426,,False,Tickeron,"Sunnyvale, CA",Tickeron
2,1564792540645060611,Tickeron,$WPP's 10-day Moving Average crossed below its...,2022-08-31T01:49:48.000Z,en,374729426,,False,Tickeron,"Sunnyvale, CA",Tickeron
3,1564302942793961472,Tickeron,$WPP's 10-day Moving Average broke below its 5...,2022-08-29T17:24:19.000Z,en,374729426,,False,Tickeron,"Sunnyvale, CA",Tickeron
4,1563925877049671686,Tickeron,$WPP's 10-day Moving Average crossed below its...,2022-08-28T16:26:00.000Z,en,374729426,,False,Tickeron,"Sunnyvale, CA",Tickeron
5,1565514952877686784,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,2022-09-02T01:40:25.000Z,en,1434909340205465605,,False,SuccessAfter25,,Trader PLUS
6,1564851941976457217,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,2022-08-31T05:45:51.000Z,en,1434909340205465605,,False,SuccessAfter25,,Trader PLUS
7,1564171443243556865,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,2022-08-29T08:41:47.000Z,en,1434909340205465605,,False,SuccessAfter25,,Trader PLUS
8,1563424047077400584,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,2022-08-27T07:11:54.000Z,en,1434909340205465605,,False,SuccessAfter25,,Trader PLUS
9,1565476711071973378,Twitter for iPhone,"@GiDavidswright Ay yo las amo, tengo botas y p...",2022-09-01T23:08:27.000Z,es,1260946502714904576,,False,lahannadom,"Buenos Aires, Argentina",Anto


In [17]:
# the created_at time is a little difficult to read so let's fix that
df_merged["created_at"] = df_merged["created_at"].dt.strftime('%d-%m-%Y')

AttributeError: Can only use .dt accessor with datetimelike values

In [18]:
# this didn't work!
# let's just the type value of "created_at"
print(type(df_merged.iloc[0].created_at))

<class 'str'>


In [19]:
# it's a string, so we need to convert to this a DateTime object first
df_merged['created_at'] = pd.to_datetime(df_merged['created_at'])
df_merged["created_at"] = df_merged["created_at"].dt.strftime('%d-%m-%Y %H:%M')
df_merged

Unnamed: 0,id,source,text,created_at,lang,author_id,geo.place_id,verified,username,location,name
0,1565738244372803585,Tickeron,$WPP enters a Downtrend as Momentum Indicator ...,02-09-2022 16:27,en,374729426,,False,Tickeron,"Sunnyvale, CA",Tickeron
1,1565394482144587776,Tickeron,$WPP's in Downtrend: Moving Average Convergenc...,01-09-2022 17:41,en,374729426,,False,Tickeron,"Sunnyvale, CA",Tickeron
2,1564792540645060611,Tickeron,$WPP's 10-day Moving Average crossed below its...,31-08-2022 01:49,en,374729426,,False,Tickeron,"Sunnyvale, CA",Tickeron
3,1564302942793961472,Tickeron,$WPP's 10-day Moving Average broke below its 5...,29-08-2022 17:24,en,374729426,,False,Tickeron,"Sunnyvale, CA",Tickeron
4,1563925877049671686,Tickeron,$WPP's 10-day Moving Average crossed below its...,28-08-2022 16:26,en,374729426,,False,Tickeron,"Sunnyvale, CA",Tickeron
5,1565514952877686784,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,02-09-2022 01:40,en,1434909340205465605,,False,SuccessAfter25,,Trader PLUS
6,1564851941976457217,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,31-08-2022 05:45,en,1434909340205465605,,False,SuccessAfter25,,Trader PLUS
7,1564171443243556865,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,29-08-2022 08:41,en,1434909340205465605,,False,SuccessAfter25,,Trader PLUS
8,1563424047077400584,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,27-08-2022 07:11,en,1434909340205465605,,False,SuccessAfter25,,Trader PLUS
9,1565476711071973378,Twitter for iPhone,"@GiDavidswright Ay yo las amo, tengo botas y p...",01-09-2022 23:08,es,1260946502714904576,,False,lahannadom,"Buenos Aires, Argentina",Anto


In [20]:
# better!
# I also don't care about the ids anymore, so let's get rid of those
# 1 is the axis number (0 for rows and 1 for columns.)
df_merged.drop(['author_id','id'], axis=1)

Unnamed: 0,source,text,created_at,lang,geo.place_id,verified,username,location,name
0,Tickeron,$WPP enters a Downtrend as Momentum Indicator ...,02-09-2022 16:27,en,,False,Tickeron,"Sunnyvale, CA",Tickeron
1,Tickeron,$WPP's in Downtrend: Moving Average Convergenc...,01-09-2022 17:41,en,,False,Tickeron,"Sunnyvale, CA",Tickeron
2,Tickeron,$WPP's 10-day Moving Average crossed below its...,31-08-2022 01:49,en,,False,Tickeron,"Sunnyvale, CA",Tickeron
3,Tickeron,$WPP's 10-day Moving Average broke below its 5...,29-08-2022 17:24,en,,False,Tickeron,"Sunnyvale, CA",Tickeron
4,Tickeron,$WPP's 10-day Moving Average crossed below its...,28-08-2022 16:26,en,,False,Tickeron,"Sunnyvale, CA",Tickeron
5,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,02-09-2022 01:40,en,,False,SuccessAfter25,,Trader PLUS
6,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,31-08-2022 05:45,en,,False,SuccessAfter25,,Trader PLUS
7,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,29-08-2022 08:41,en,,False,SuccessAfter25,,Trader PLUS
8,Twitter Web App,💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats to all Team members ...,27-08-2022 07:11,en,,False,SuccessAfter25,,Trader PLUS
9,Twitter for iPhone,"@GiDavidswright Ay yo las amo, tengo botas y p...",01-09-2022 23:08,es,,False,lahannadom,"Buenos Aires, Argentina",Anto


In [21]:
# that's a lot better!
# now let's save our data in pickled format - so that we don't have to grab it again if our machine crashes

import pickle
path = 'twitter' + dt.datetime.now().strftime("%Y-%m-%d_%I-%M-%S_%p") + '.pkl'
df_merged.to_pickle(path)

In [22]:
# compute a collection of all words from all tweets
# this one takes a min or two to run

words = []
for text in df_merged['text']:
    for word in text.split():
        if word in stopwords.words() or 'http' in word or word == '#WPP':
            continue # skip if word is a link
        else:
            words.append(word)
words

['$WPP',
 'enters',
 'Downtrend',
 'Momentum',
 'Indicator',
 'dropped',
 '0',
 'level',
 'August',
 '5,',
 '2022.',
 'View',
 'odds',
 'indicators:',
 "$WPP's",
 'Downtrend:',
 'Moving',
 'Average',
 'Convergence',
 'Divergence',
 '(MACD)',
 'Histogram',
 'turned',
 'negative.',
 'View',
 'odds',
 'indicators:',
 "$WPP's",
 '10-day',
 'Moving',
 'Average',
 'crossed',
 '50-day',
 'Moving',
 'Average',
 'August',
 '16,',
 '2022.',
 'View',
 'odds',
 'indicators:',
 "$WPP's",
 '10-day',
 'Moving',
 'Average',
 'broke',
 '50-day',
 'Moving',
 'Average',
 'August',
 '16,',
 '2022.',
 'View',
 'odds',
 'indicators:',
 "$WPP's",
 '10-day',
 'Moving',
 'Average',
 'crossed',
 '50-day',
 'Moving',
 'Average',
 'August',
 '16,',
 '2022.',
 'View',
 'odds',
 'indicators:',
 '💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats',
 'Team',
 'members',
 'making',
 'huge',
 'gains',
 'Trade',
 'Ideas',
 '$WPP',
 '💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰💰Congrats',
 'Team',
 'members',
 'making',
 'huge',
 'gains',
 'Trade',
 'Ideas',
 '$WPP',
 '💰💰💰

In [23]:
# let's find the most frequent words in these tweets

from collections import Counter

c = Counter(words)
print(c.most_common()[:10])  # top 10

[('$WPP', 10), ('Moving', 7), ('Average', 7), ('This', 7), ('Newcraft', 6), ('$WPP.L', 6), ('average', 6), ('volume', 6), ('sign', 6), ('nice', 6)]


In [24]:
from prettytable import PrettyTable

pt = PrettyTable(field_names=['Word', 'Count']) 
c = Counter(words)
[ pt.add_row(kv) for kv in c.most_common()[:10] ]
pt.align['Word'], pt.align['Count'] = 'l', 'r' # Set column alignment
print(pt)

+----------+-------+
| Word     | Count |
+----------+-------+
| $WPP     |    10 |
| Moving   |     7 |
| Average  |     7 |
| This     |     7 |
| Newcraft |     6 |
| $WPP.L   |     6 |
| average  |     6 |
| volume   |     6 |
| sign     |     6 |
| nice     |     6 |
+----------+-------+
