In [15]:
import pandas as pd
import numpy as np
import requests
import tweepy  # best for Twitter Standard API
from TwitterAPI import TwitterAPI  # needed for Twitter Premium API access
from requests_oauthlib import OAuth1
from pprint import pprint
import pickle
from pymongo import MongoClient

from twitter_api_functions import tweet_to_string, tweets_to_df, get_tweets_premium

In [2]:
# need to have Twitter Developer account in order for this to work (https://developer.twitter.com)
# see setup_twitter_instructions.md below for further instructions 

from twitter_credentials import credentials

oauth = OAuth1(credentials["TWITTER_CONSUMER_KEY"],
               credentials["TWITTER_CONSUMER_KEY_SECRET"],
               credentials["TWITTER_ACCESS_TOKEN"],
               credentials["TWITTER_ACCESS_TOKEN_SECRET"])

See [this file](./setup_twitter_instructions.md) for step-by-step instructions on how to setup a Twitter app and get access tokens.  

In [3]:
# test query of Twitter API using requests (get current tweets from @thisismetis)
response = requests.get("https://api.twitter.com/1.1/search/tweets.json?q=%40thisismetis",
                        auth = oauth)

tweets = response.json()
list(tweets.keys())

['statuses', 'search_metadata']

In [4]:
pprint(tweets['statuses'][0])

{'contributors': None,
 'coordinates': None,
 'created_at': 'Sat Aug 24 23:02:29 +0000 2019',
 'entities': {'hashtags': [],
              'symbols': [],
              'urls': [],
              'user_mentions': [{'id': 2233022917,
                                 'id_str': '2233022917',
                                 'indices': [3, 15],
                                 'name': 'Metis',
                                 'screen_name': 'thisismetis'}]},
 'favorite_count': 0,
 'favorited': False,
 'geo': None,
 'id': 1165399003271434241,
 'id_str': '1165399003271434241',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'is_quote_status': False,
 'lang': 'en',
 'metadata': {'iso_language_code': 'en', 'result_type': 'recent'},
 'place': None,
 'retweet_count': 1,
 'retweeted': False,
 'retweeted_status': {'contributors': None,
                      'coordinates': None,
     

In [5]:
list(tweets['statuses'][0].keys())

['created_at',
 'id',
 'id_str',
 'text',
 'truncated',
 'entities',
 'metadata',
 'source',
 'in_reply_to_status_id',
 'in_reply_to_status_id_str',
 'in_reply_to_user_id',
 'in_reply_to_user_id_str',
 'in_reply_to_screen_name',
 'user',
 'geo',
 'coordinates',
 'place',
 'contributors',
 'retweeted_status',
 'is_quote_status',
 'retweet_count',
 'favorite_count',
 'favorited',
 'retweeted',
 'lang']

In [6]:
pprint(tweets['search_metadata'])

{'completed_in': 0.048,
 'count': 15,
 'max_id': 1165399003271434241,
 'max_id_str': '1165399003271434241',
 'next_results': '?max_id=1164187044270223360&q=%40thisismetis&include_entities=1',
 'query': '%40thisismetis',
 'refresh_url': '?since_id=1165399003271434241&q=%40thisismetis&include_entities=1',
 'since_id': 0,
 'since_id_str': '0'}


In [8]:
# get 5 tweets with keywords 'Artificial Intelligence'
parameters = {"q": "Artificial Intelligence", "count": 5, "lang": "en"}
url = "https://api.twitter.com/1.1/search/tweets.json"

response = requests.get(url, params = parameters, auth = oauth)

tweets = response.json()['statuses']
for tweet in tweets:
    print(tweet_to_string(tweet).strip())
    print('\n')

Text: RT @KirkDBorne: The Black Box — Explainable Artificial Intelligence #XAI and the Limits of Human Deliberative Processes: https://t.co/VYs43…
        Hashtags: [{'text': 'XAI', 'indices': [68, 72]}]
        Name: Robert Navarro 
        Username: damarafaka
        User Description: Beyond the abyss, free thinker, simulations, provocateur, reverse engineering, security consultant; e-wheel & another algorithm War
        Social Status: 766 friends, 239 followers, 8488 favorites
        Location: 
        Geocode: None
        Date: Sun Aug 25 00:40:33 +0000 2019


Text: RT @nordicinst: Scientist to Hollywood: Artificial Intelligence Doesn't Work the Way You Think it Does. #aiethics #MachineLearning #aiforgo…
        Hashtags: [{'text': 'aiethics', 'indices': [104, 113]}, {'text': 'MachineLearning', 'indices': [114, 130]}]
        Name: Benjamin Huskinson 
        Username: Benhuskinson
        User Description: Historian of science and religion. Aggressively pro-brunch.
        Soc

In [9]:
# print the next 5 tweets
next_page = url + response.json()['search_metadata']['next_results']
tweets = requests.get(next_page, auth = oauth).json()['statuses']
for tweet in tweets:
    print(tweet_to_string(tweet).strip())
    print('\n')

Text: RT @IBMSystems: Artificial intelligence is transforming the way we communicate with each other. But can your infrastructure harness the ful…
        Hashtags: []
        Name: Shubham kumar singh 
        Username: Shubham59039315
        User Description: Fan of #jai shree ram #mahadev
        Social Status: 4 friends, 15 followers, 5 favorites
        Location: Barauni begusarai Bihar, India
        Geocode: None
        Date: Sun Aug 25 00:38:36 +0000 2019


Text: Artificial intelligence will have implications for policymakers in education, welfare and geopolitics https://t.co/iCmYbfp8Ft
        Hashtags: []
        Name: The Economist 
        Username: EconUS
        User Description: Official tweets from The Economist's US correspondents
        Social Status: 582 friends, 90503 followers, 1176 favorites
        Location: Washington
        Geocode: None
        Date: Sun Aug 25 00:38:15 +0000 2019


Text: RT @neuriot_tech: neurIOT helps farmers identify 8 different disease

In [10]:
# compile tweets as dataframe
tweets_to_df(tweets)

Unnamed: 0,id,user_name,user_screenname,user_description,user_friends,user_followers,user_favorites,retweets,date,location,geocode,hashtags,tweet
0,1165423194678784001,Shubham kumar singh,Shubham59039315,Fan of #jai shree ram #mahadev,4,15,5,638,Sun Aug 25 00:38:36 +0000 2019,"Barauni begusarai Bihar, India",,,RT @IBMSystems: Artificial intelligence is tra...
1,1165423107361914880,The Economist,EconUS,Official tweets from The Economist's US corres...,582,90503,1176,0,Sun Aug 25 00:38:15 +0000 2019,Washington,,,Artificial intelligence will have implications...
2,1165422947122565123,jayeshthakur,jayeshmthakur,"Husband, Father, Student, Security n00b. Inter...",699,1495,1271,2,Sun Aug 25 00:37:37 +0000 2019,::1,,,RT @neuriot_tech: neurIOT helps farmers identi...
3,1165422756395008000,Dean Melnyk,DeanMelnyk,"Leader SAS Western Canada Team, Dad, Husband, ...",827,415,697,43,Sun Aug 25 00:36:52 +0000 2019,"Calgary, AB Canada",,[AI],RT @SASsoftware: Our $1 billion #AI investment...
4,1165422644499554310,Jesse James Jarmon,JarmonJesse,"I'm an Atheist, a self-proclaimed Compassionat...",52,36,2481,63,Sun Aug 25 00:36:25 +0000 2019,,,,RT @sciam: Will China overtake the U.S. in art...


In [11]:
tweets_to_df(tweets)['tweet'][0]

'RT @IBMSystems: Artificial intelligence is transforming the way we communicate with each other. But can your infrastructure harness the ful…'

In [12]:
# setup API connection with Tweepy
consumer_key = credentials["TWITTER_CONSUMER_KEY"]
consumer_secret = credentials["TWITTER_CONSUMER_KEY_SECRET"]
access_token_key = credentials["TWITTER_ACCESS_TOKEN"]
access_token_secret = credentials["TWITTER_ACCESS_TOKEN_SECRET"]

auth = tweepy.OAuthHandler(consumer_key = consumer_key, consumer_secret = consumer_secret)
auth.set_access_token(access_token_key, access_token_secret)

api = tweepy.API(auth, retry_count = 1, retry_delay = 10, wait_on_rate_limit = True, 
                 wait_on_rate_limit_notify = True)

In [13]:
# pull Tweets using Tweepy and display Tweet text
for index, tweet in enumerate(tweepy.Cursor(api.search, **parameters).items(5)):
    print(str(index) + '. ' + tweet.text + '\n')

0. RT @KirkDBorne: The Black Box — Explainable Artificial Intelligence #XAI and the Limits of Human Deliberative Processes: https://t.co/VYs43…

1. RT @nordicinst: Scientist to Hollywood: Artificial Intelligence Doesn't Work the Way You Think it Does. #aiethics #MachineLearning #aiforgo…

2. RT @USATODAY: The increasing prevalence of Artificial Intelligence has boosted efficiency and reduced costs for companies but has also draw…

3. RT @claudiomkd: What's the Difference Between #ArtificialIntelligence, #MachineLearning, and #DeepLearning?
 by mvc |

Read more here: http…

4. Artificial intelligence and machine learning are the next frontiers for ETFs, says industry pro https://t.co/tkywDjEjtt



In [14]:
# make df from Tweepy tweets
cursor = tweepy.Cursor(api.search, **parameters).items(5)
tweets = [tweet._json for tweet in cursor]
tweet_df = pd.DataFrame(tweets)
tweet_df

Unnamed: 0,contributors,coordinates,created_at,entities,favorite_count,favorited,geo,id,id_str,in_reply_to_screen_name,...,metadata,place,possibly_sensitive,retweet_count,retweeted,retweeted_status,source,text,truncated,user
0,,,Sun Aug 25 00:40:33 +0000 2019,"{'hashtags': [{'text': 'XAI', 'indices': [68, ...",0,False,,1165423683567050753,1165423683567050753,,...,"{'iso_language_code': 'en', 'result_type': 're...",,,6,False,{'created_at': 'Sat Aug 24 23:49:00 +0000 2019...,"<a href=""https://mobile.twitter.com"" rel=""nofo...",RT @KirkDBorne: The Black Box — Explainable Ar...,False,"{'id': 1364473266, 'id_str': '1364473266', 'na..."
1,,,Sun Aug 25 00:40:04 +0000 2019,"{'hashtags': [{'text': 'aiethics', 'indices': ...",0,False,,1165423561701380096,1165423561701380096,,...,"{'iso_language_code': 'en', 'result_type': 're...",,,8,False,{'created_at': 'Sat Aug 24 16:10:26 +0000 2019...,"<a href=""http://twitter.com/download/android"" ...",RT @nordicinst: Scientist to Hollywood: Artifi...,False,"{'id': 1165345386019479552, 'id_str': '1165345..."
2,,,Sun Aug 25 00:39:25 +0000 2019,"{'hashtags': [], 'symbols': [], 'user_mentions...",0,False,,1165423399046397952,1165423399046397952,,...,"{'iso_language_code': 'en', 'result_type': 're...",,,19,False,{'created_at': 'Fri Aug 23 22:03:00 +0000 2019...,"<a href=""http://twitter.com/download/android"" ...",RT @USATODAY: The increasing prevalence of Art...,False,"{'id': 2314020889, 'id_str': '2314020889', 'na..."
3,,,Sun Aug 25 00:39:09 +0000 2019,{'hashtags': [{'text': 'ArtificialIntelligence...,0,False,,1165423333808070657,1165423333808070657,,...,"{'iso_language_code': 'en', 'result_type': 're...",,,4,False,{'created_at': 'Sat Aug 24 21:11:42 +0000 2019...,"<a href=""https://twitter.bot"" rel=""nofollow"">j...",RT @claudiomkd: What's the Difference Between ...,False,"{'id': 190097582, 'id_str': '190097582', 'name..."
4,,,Sun Aug 25 00:39:02 +0000 2019,"{'hashtags': [], 'symbols': [], 'user_mentions...",0,False,,1165423301646266368,1165423301646266368,,...,"{'iso_language_code': 'en', 'result_type': 're...",,False,0,False,,"<a href=""https://dlvrit.com/"" rel=""nofollow"">d...",Artificial intelligence and machine learning a...,False,"{'id': 3689058808, 'id_str': '3689058808', 'na..."


In [19]:
# Tweepy does not appear to support the Premium APIs - need TwitterAPI for older tweets
api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret)

In [21]:
# test query of Standard API (note order of terms in first argument)
r = api.request('search/tweets',
                {'q': "'Artificial Intelligence'",
                 'lang': 'en', 
                 'count': 1, 
                 'tweet_mode': 'extended'})
for tweet in r:
    pprint(tweet)

{'contributors': None,
 'coordinates': None,
 'created_at': 'Sat Aug 24 20:18:31 +0000 2019',
 'display_text_range': [0, 116],
 'entities': {'hashtags': [],
              'symbols': [],
              'urls': [{'display_url': 'twitter.com/themarkosmers/…',
                        'expanded_url': 'https://twitter.com/themarkosmers/status/1165030284313731073',
                        'indices': [117, 140],
                        'url': 'https://t.co/VZYL80gaia'}],
              'user_mentions': []},
 'favorite_count': 0,
 'favorited': False,
 'full_text': 'Point proven: artificial intelligence and self-driving cars are '
              'safer than aggressive, passive aggressive LA drivers '
              'https://t.co/VZYL80gaia',
 'geo': None,
 'id': 1165357739331031040,
 'id_str': '1165357739331031040',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'is_quote_status': 

In [19]:
# in order to use the '30day' and 'fullarchive' (all years) Twitter Premium APIs, must first 
# set up 'development environments' associated with the app (https://developer.twitter.com/en/dashboard)
# see setup_twitter_instructions.md file linked earlier

# test query of '30day' Premium API
r = api.request('tweets/search/30day/:development',
                    params = 
                        {'query': "Artificial Intelligence",
                         'fromDate': '201908010000',
                         'toDate': '201908150000',
                         'maxResults': 10}
                   )
tweets = r.json()
pprint(tweets)

{'next': 'eyJhdXRoZW50aWNpdHkiOiI4YTRhMmNlOTFkNWMyMTRmOTZkMDQ0Mzk4NDBkNmM0MDU1MWNmMTdkZjdhNDlhMTI0ZDdiYWNjYTdkNTA3NDAwIiwiZnJvbURhdGUiOiIyMDE5MDgwMTAwMDAiLCJ0b0RhdGUiOiIyMDE5MDgxNTAwMDAiLCJuZXh0IjoiMjAxOTA4MTUwMDAwMDAtMTE2MTc4ODY3MjA4MzAzNDExNC0wIn0=',
 'requestParameters': {'fromDate': '201908010000',
                       'maxResults': 10,
                       'toDate': '201908150000'},
 'results': [{'contributors': None,
              'coordinates': None,
              'created_at': 'Wed Aug 14 23:59:50 +0000 2019',
              'entities': {'hashtags': [],
                           'symbols': [],
                           'urls': [],
                           'user_mentions': [{'id': 879548970082721792,
                                              'id_str': '879548970082721792',
                                              'indices': [3, 18],
                                              'name': 'Rising serpent',
                                              'screen_name':

                           'urls': [{'display_url': 'twitter.com/josephfcox/sta…',
                                     'expanded_url': 'https://twitter.com/josephfcox/status/1161720780666728448',
                                     'indices': [89, 112],
                                     'url': 'https://t.co/HwfEND2Xvf'}],
                           'user_mentions': [{'id': 905154964829462528,
                                              'id_str': '905154964829462528',
                                              'indices': [76, 88],
                                              'name': 'The Spy Brief',
                                              'screen_name': 'thespybrief'}]},
              'favorite_count': 5,
              'favorited': False,
              'filter_level': 'low',
              'geo': None,
              'id': 1161789042955960321,
              'id_str': '1161789042955960321',
              'in_reply_to_screen_name': None,
              'in_reply_to_status_id

In [20]:
tweets.keys()

dict_keys(['results', 'next', 'requestParameters'])

In [21]:
tweets['requestParameters']

{'maxResults': 10, 'fromDate': '201908010000', 'toDate': '201908150000'}

In [22]:
tweets['next']  # next key provided when additional pages available

'eyJhdXRoZW50aWNpdHkiOiI4YTRhMmNlOTFkNWMyMTRmOTZkMDQ0Mzk4NDBkNmM0MDU1MWNmMTdkZjdhNDlhMTI0ZDdiYWNjYTdkNTA3NDAwIiwiZnJvbURhdGUiOiIyMDE5MDgwMTAwMDAiLCJ0b0RhdGUiOiIyMDE5MDgxNTAwMDAiLCJuZXh0IjoiMjAxOTA4MTUwMDAwMDAtMTE2MTc4ODY3MjA4MzAzNDExNC0wIn0='

In [23]:
for tweet in tweets['results']:
    print(tweet_to_string(tweet))


        Text: RT @rising_serpent: Once you combine this gigantic amorphous mass of the human gene code with artificial intelligence programming, the resu…
        Hashtags: []
        Name: Barbara Frietchie 
        Username: Barbaren7
        User Description: "Shoot, if you must, this old gray head But spare your country's flag" she said. --- John Greenleaf Whittier
I study A Course in Miracles (ACIM). #MAGA
        Social Status: 1191 friends, 937 followers, 52445 favorites
        Location: None
        Geocode: None
        Date: Wed Aug 14 23:59:50 +0000 2019
    

        Text: RT @CaliaDomenico: An animation shows the random appearance of fast radio bursts (FRBs) across the sky. Astronomers have discovered about 8…
        Hashtags: []
        Name: dmi 
        Username: dmitsag
        User Description: None
        Social Status: 145 friends, 2 followers, 1005 favorites
        Location: None
        Geocode: None
        Date: Wed Aug 14 23:59:28 +0000 2019
    

        

In [24]:
# to get full tweet text (Premium API requests truncate tweet text by default)
print(tweets['results'][0]['retweeted_status']['extended_tweet']['full_text'])

Once you combine this gigantic amorphous mass of the human gene code with artificial intelligence programming, the results are far beyond the capacity of my meager imagination.
I've ranted enough. Gotta go earn a living so that the government can give my money away.
-Fin


In [41]:
query = '("Artificial Intelligence") OR #ArtificialIntelligence OR #AI lang:en'

# test query of 30day Premium API
get_tweets_premium(api, 1, '30day', 'development', query = query, 
                   fromDate = '201908010000', toDate = '201908140000', 
                   maxResults = 10)

[{'created_at': 'Tue Aug 13 23:59:59 +0000 2019',
  'id': 1161427210252836864,
  'id_str': '1161427210252836864',
  'text': 'RT @Paula_Piccard: This app helps the visually impaired identify surroundings\n#AI #ArtificialIntelligence #ML #ComputerVision #IoT #Tech\n\n@…',
  'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>',
  'truncated': False,
  'in_reply_to_status_id': None,
  'in_reply_to_status_id_str': None,
  'in_reply_to_user_id': None,
  'in_reply_to_user_id_str': None,
  'in_reply_to_screen_name': None,
  'user': {'id': 1003882296003481600,
   'id_str': '1003882296003481600',
   'name': 'Minmax',
   'screen_name': 'Seasidesevn1',
   'location': None,
   'url': None,
   'description': None,
   'translator_type': 'none',
   'protected': False,
   'verified': False,
   'followers_count': 7,
   'friends_count': 26,
   'listed_count': 2,
   'favourites_count': 62,
   'statuses_count': 245,
   'created_at': 'Tue Jun 05 06:12:45 +0000 201

In [44]:
query = '("Artificial Intelligence") OR #ArtificialIntelligence OR #AI lang:en'

# get 500 tweets from 2007 using fullarchive API
AI_tweets_2007 = get_tweets_premium(api, 5, 'fullarchive', 'development', query = query, 
                                    fromDate = '200701010000', toDate = '200801010000', 
                                    maxResults = 100)

In [46]:
len(AI_tweets_2007)  # only 200 found

199

In [48]:
for tweet in AI_tweets_2007:
    tweet['year'] = 2007  # keep track of year for later compilation
    print(tweet['text'] + '\n')

Anyone involved in artificial intelligence work? Specifically, anyone know of any AI system that can RECOGNISE puns in natural language?

読書中 Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp

Update on geekgirl LOVE AND SEX WITH ROBOTS: If advances in artificial intelligence cont.. http://tinyurl.com/2vykx7

"A.I. Artificial Intelligence" a Speilberg movie based on a Brian Aldiss  short story is very creepy.  Like it mo' better than "I Robot."

Paradigms of Artificial Intelligence Programming を注文した♪

@moltz: chill it, dude. Your twirly brain wave patterns are fucking with the lesbian sexbot artificial intelligence orbs production line

Leadership and Artificial Intelligence: Two new issues of Shrinkrapradio were published shortly one aft.. http://tinyurl.com/327etm

Class: Intro to Artificial Intelligence and Computer Simulation. Official Grade: A. Yay!

aibits: Artificial Intelligence for Advanced Problem Solving Techniques http://tinyurl.com/2rfndt

Most 

In [49]:
# get 500 tweets from 2008 using fullarchive API
AI_tweets_2008 = get_tweets_premium(api, 5, 'fullarchive', 'development', query = query, 
                                    fromDate = '200801010000', toDate = '200901010000', 
                                    maxResults = 100)

In [50]:
for tweet in AI_tweets_2008:
    tweet['year'] = 2008
len(AI_tweets_2008)

495

In [51]:
# get 500 tweets from 2009 using fullarchive API
AI_tweets_2009 = get_tweets_premium(api, 5, 'fullarchive', 'development', query = query, 
                                    fromDate = '200901010000', toDate = '201001010000', 
                                    maxResults = 100)

In [52]:
for tweet in AI_tweets_2009:
    tweet['year'] = 2009
len(AI_tweets_2009)

491

In [53]:
# get 500 tweets from 2010 using fullarchive API
AI_tweets_2010 = get_tweets_premium(api, 5, 'fullarchive', 'development', query = query, 
                                    fromDate = '201001010000', toDate = '201101010000', 
                                    maxResults = 100)

In [54]:
for tweet in AI_tweets_2010:
    tweet['year'] = 2010
len(AI_tweets_2010)

496

In [55]:
# get 500 tweets from 2011 using fullarchive API
AI_tweets_2011 = get_tweets_premium(api, 5, 'fullarchive', 'development', query = query, 
                                    fromDate = '201101010000', toDate = '201201010000', 
                                    maxResults = 100)

In [56]:
for tweet in AI_tweets_2011:
    tweet['year'] = 2011
len(AI_tweets_2011)

499

In [57]:
# get 500 tweets from 2012 using fullarchive API
AI_tweets_2012 = get_tweets_premium(api, 5, 'fullarchive', 'development', query = query, 
                                    fromDate = '201201010000', toDate = '201301010000', 
                                    maxResults = 100)

In [58]:
for tweet in AI_tweets_2012:
    tweet['year'] = 2012
len(AI_tweets_2012)

495

In [59]:
# get 500 tweets from 2013 using fullarchive API
AI_tweets_2013 = get_tweets_premium(api, 5, 'fullarchive', 'development', query = query, 
                                    fromDate = '201301010000', toDate = '201401010000', 
                                    maxResults = 100)

In [60]:
for tweet in AI_tweets_2013:
    tweet['year'] = 2013
len(AI_tweets_2013)

499

In [61]:
# get 500 tweets from 2014 using fullarchive API
AI_tweets_2014 = get_tweets_premium(api, 5, 'fullarchive', 'development', query = query, 
                                    fromDate = '201401010000', toDate = '201501010000', 
                                    maxResults = 100)

In [62]:
for tweet in AI_tweets_2014:
    tweet['year'] = 2014
len(AI_tweets_2014)

499

In [64]:
# get 500 tweets from 2016 using fullarchive API
AI_tweets_2016 = get_tweets_premium(api, 5, 'fullarchive', 'development', query = query, 
                                    fromDate = '201601010000', toDate = '201701010000', 
                                    maxResults = 100)

In [65]:
for tweet in AI_tweets_2016:
    tweet['year'] = 2016
len(AI_tweets_2016)

497

In [66]:
# get 300 tweets from 2017 using fullarchive API (not 500 to avoid going over Sandbox request limits)
AI_tweets_2017 = get_tweets_premium(api, 3, 'fullarchive', 'development', query = query, 
                                    fromDate = '201701010000', toDate = '201801010000', 
                                    maxResults = 100)

In [67]:
for tweet in AI_tweets_2017:
    tweet['year'] = 2017
len(AI_tweets_2017)

297

In [68]:
# get 500 tweets from 2018 using fullarchive API
AI_tweets_2018 = get_tweets_premium(api, 5, 'fullarchive', 'development', query = query, 
                                    fromDate = '201801010000', toDate = '201901010000', 
                                    maxResults = 100)

In [69]:
for tweet in AI_tweets_2018:
    tweet['year'] = 2018
len(AI_tweets_2018)

495

In [70]:
# get 500 tweets from 2019 using 30day API
AI_tweets_2019 = get_tweets_premium(api, 5, '30day', 'development', query = query, 
                                    fromDate = '201907170000', toDate = '201908150000', 
                                    maxResults = 100)

In [71]:
for tweet in AI_tweets_2019:
    tweet['year'] = 2019
len(AI_tweets_2019)

498

In [72]:
# compile results
tweets_all_years = AI_tweets_2007 + AI_tweets_2008 + AI_tweets_2009 + AI_tweets_2010 +\
                   AI_tweets_2011 + AI_tweets_2012 + AI_tweets_2013 + AI_tweets_2014 +\
                   AI_tweets_2016 + AI_tweets_2017 + AI_tweets_2018 + AI_tweets_2019

print(type(tweets_all_years), len(tweets_all_years))

<class 'list'> 5460


In [135]:
tweets_all_years[0:5]

[{'created_at': 'Sat Dec 29 22:43:06 +0000 2007',
  'id': 544845252,
  'id_str': '544845252',
  'text': 'Anyone involved in artificial intelligence work? Specifically, anyone know of any AI system that can RECOGNISE puns in natural language?',
  'source': '<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>',
  'truncated': False,
  'in_reply_to_status_id': None,
  'in_reply_to_status_id_str': None,
  'in_reply_to_user_id': None,
  'in_reply_to_user_id_str': None,
  'in_reply_to_screen_name': None,
  'user': {'id': 5967392,
   'id_str': '5967392',
   'name': 'Mark Harrison',
   'screen_name': 'MarkHarrisonUK',
   'location': 'Malham Close, Crawley, West Su',
   'url': 'http://markharrison.wordpress.com',
   'description': 'Serial entrepreneur, currently running a boutique programming shop in London specialising in regulated markets.',
   'translator_type': 'none',
   'protected': False,
   'verified': False,
   'followers_count': 757,
   'friends_count': 600,
   'listed_

In [73]:
tweets_all_years_df = tweets_to_df(tweets_all_years)
tweets_all_years_df.head()

Unnamed: 0,id,user_name,user_screenname,user_description,user_friends,user_followers,user_favorites,retweets,date,location,geocode,hashtags,tweet
0,544845252,Mark Harrison,MarkHarrisonUK,"Serial entrepreneur, currently running a bouti...",600,757,8,0,Sat Dec 29 22:43:06 +0000 2007,"Malham Close, Crawley, West Su",,,Anyone involved in artificial intelligence wor...
1,532130162,ヤーンポッチナズムナイルピネロピルニャー,quek,ヤーンポッチナズムナイルピネロピルニャール卿バッハナスカレシン Common Lisp 合...,620,606,2164,0,Tue Dec 25 13:29:53 +0000 2007,"JP (Japan, Japan)",,,読書中 Paradigms of Artificial Intelligence Progr...
2,531926162,geekgirl Ⓥ,rosiex,#geekgirl founder. publisher of australia's lo...,4204,4178,11324,0,Tue Dec 25 11:33:43 +0000 2007,Melbourne,,,Update on geekgirl LOVE AND SEX WITH ROBOTS: I...
3,524153882,Tom Novak,rebelsofmars,Microblogging Retro Sci Fi and Fantastic Adven...,217,232,2,0,Sat Dec 22 15:20:03 +0000 2007,Barsoom!,,,"""A.I. Artificial Intelligence"" a Speilberg mov..."
4,523822582,ヤーンポッチナズムナイルピネロピルニャー,quek,ヤーンポッチナズムナイルピネロピルニャール卿バッハナスカレシン Common Lisp 合...,620,606,2164,0,Sat Dec 22 11:29:44 +0000 2007,"JP (Japan, Japan)",,,Paradigms of Artificial Intelligence Programmi...


In [81]:
# pickle and save the results as CSV
pd.to_pickle(tweets_all_years_df, 'tweets_all_years_df.pickle')
tweets_all_years_df.to_csv('tweets_all_years_df.csv')

In [76]:
# request 24,000 tweets from 30day API (takes ~10 minutes)
AI_tweets_2019_large = get_tweets_premium(api, 240, '30day', 'development', query = query, 
                                          fromDate = '201907170000', toDate = '201908150000', 
                                          maxResults = 100)

In [77]:
len(AI_tweets_2019_large)

23864

In [78]:
for tweet in AI_tweets_2019_large:
    tweet['year'] = 2019

In [79]:
tweets_2019_large_df = tweets_to_df(AI_tweets_2019_large)
tweets_2019_large_df.head()

Unnamed: 0,id,user_name,user_screenname,user_description,user_friends,user_followers,user_favorites,retweets,date,location,geocode,hashtags,tweet
0,1161789559715196928,Barbara Frietchie,Barbaren7,"""Shoot, if you must, this old gray head But sp...",1191,937,52448,0,Wed Aug 14 23:59:50 +0000 2019,,,,RT @rising_serpent: Once you combine this giga...
1,1161789531328319488,ILoveUiPath,ILoveUiPath,#RPA lover. I believe that technology is here ...,4966,2453,533,0,Wed Aug 14 23:59:43 +0000 2019,,,"[AI, SXSWEDU]",RT @UiPath: Do you believe in the power of #AI...
2,1161789508771336195,Max 🇫🇷🇺🇸,girardmaxime33,Head of Data&Traffic http://Jeux.com and Actip...,1139,2925,2152,2,Wed Aug 14 23:59:38 +0000 2019,"Bordeaux, France",,"[AI, FutureofWork, ANN, MachineLearning]",RT MikeQuindazzi: 7 #AI terms in the #Futureof...
3,1161789506426671104,willynet,willynet,"In love with Stella, Father of Cecilia, Ximena...",3124,1667,11819,0,Wed Aug 14 23:59:37 +0000 2019,"Santiago, Chile",,[MSInspire],RT @Microsoft: What if technology could help y...
4,1161789505889820672,Max 🇫🇷🇺🇸,girardmaxime33,Head of Data&Traffic http://Jeux.com and Actip...,1139,2925,2152,2,Wed Aug 14 23:59:37 +0000 2019,"Bordeaux, France",,"[BigData, PwC, AI]",RT MikeQuindazzi: #BigData sets unlocking $45 ...


In [82]:
pd.to_pickle(tweets_2019_large_df, 'tweets_2019_large_df.pickle')
tweets_2019_large_df.to_csv('tweets_2019_large_df.csv')

In [120]:
# open MongoDB daemon and connect with pymongo

#!mongod
client = MongoClient()
client.list_database_names()

['admin', 'books', 'config', 'local', 'outings']

In [121]:
twitter = client['twitter']  # make new db
twitter.create_collection('tweets_all_years')  # make new collection
twitter.create_collection('tweets_2019_large')

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'twitter'), 'tweets_2019_large')

In [122]:
twitter.list_collection_names()

['tweets_2019_large', 'tweets_all_years']

In [123]:
all_years = twitter.tweets_all_years
large_2019 = twitter.tweets_2019_large

In [124]:
all_years.insert_many(tweets_all_years)  # insert fullarchive tweets to 'tweets_all_years' collection

<pymongo.results.InsertManyResult at 0x121a43b48>

In [125]:
large_2019.insert_many(AI_tweets_2019_large)  # insert 30day tweets to 'tweets_2019_large' collection

<pymongo.results.InsertManyResult at 0x120717208>

In [133]:
print(all_years.count_documents({}), large_2019.count_documents({}))

5460 23864
