In [8]:
import tweepy
from twitter_authentication import bearer_token
import time
import pandas as pd
import json
import pickle

In [9]:
def json_load(filepath):
    with open(filepath, "r") as json_file:
        data = json.load(json_file)
    return data

def json_dump(data, filepath, pretty_format = True):
    with open(filepath, 'w') as fw:
        if pretty_format:
            json.dump(data, fw, indent=2, sort_keys=True)
        else:
            json.dump(data, fw)

def pickle_dump(obj, pickle_filepath):
    with open(pickle_filepath, "wb") as f:
        pickle.dump(obj, f, protocol=2)

def pickle_load(pickle_filepath):
    with open(pickle_filepath, "rb") as f:
        obj = pickle.load(f)
    return obj

In [10]:
client = tweepy.Client(bearer_token, wait_on_rate_limit=True)

In [11]:
alz_tweets = []
for tweet in tweepy.Paginator(client.search_all_tweets, 
                                 query = 'alzheimers disease -is:retweet place_country:US',
                                 user_fields = ['username', 'public_metrics', 'description', 'location'],
                                 place_fields = ['place_type', 'geo'],
                                 tweet_fields = ['created_at', 'geo', 'public_metrics', 'text'],
                                 expansions = ['author_id', 'geo.place_id'],
                                 start_time = '2018-01-01T00:00:00Z',
                                 end_time = '2018-12-31T00:00:00Z',
                              max_results=500):
    time.sleep(1)
    alz_tweets.append(tweet)

In [12]:
response = alz_tweets[0]
response.includes.keys()

dict_keys(['users', 'places'])

In [53]:
#response.data

In [23]:
place_keys= ["full_name", 'id', 'contained_within', 'country', 'country_code', 'geo', 'name', 'place_type' ]
place_dict = {}
user_keys= ['id', 'name', 'username', 'created_at', 
            'description', "entities", 'location', 'pinned_tweet_id', 
            'profile_image_url', 'protected', 'public_metrics', 'url', 'verified', 'withheld']
user_dict = {}
# Loop through each response object
for response in alz_tweets:
    # Take all of the users, and put them into a dictionary of dictionaries with the info we want to keep
    for place in response.includes['places']:
        place_obj = {}
        for key in place_keys:
            place_obj[key] = place[key]
        place_dict[place_obj['id']] = place_obj
    for user in response.includes['users']:
        user_obj = {}
        for key in user_keys:
            user_obj[key] = user[key]
        user_dict[user_obj['id']] = user_obj

In [51]:
json_dump(place_dict, filepath = "place.json")

In [52]:
json_dump(user_dict, filepath = "user.json")

In [48]:
tweet_keys = ['id', 'text', 'author_id', 'context_annotations',
             'conversation_id', 'entities', 'in_reply_to_user_id', 'lang',
             'non_public_metrics', 'organic_metrics', 'possibly_sensitive',
             'promoted_metrics', 'public_metrics', 'referenced_tweets', 'reply_settings',
             'source', 'withheld']
tweet_dict = {}
for tweet in response.data:
    tweet_obj = {}
    for key in tweet_keys:
        tweet_obj[key] = getattr(tweet, key)
    tweet_dict[tweet_obj['id']] = tweet_obj

In [49]:
tweet_dict

{980784772527067136: {'id': 980784772527067136,
  'text': "Alzheimers's Disease Linked To Junk Food Not Genetics https://t.co/HBcyMzl9du via @YouTube",
  'author_id': 1849839132,
  'context_annotations': [],
  'conversation_id': None,
  'entities': None,
  'in_reply_to_user_id': None,
  'lang': None,
  'non_public_metrics': None,
  'organic_metrics': None,
  'possibly_sensitive': None,
  'promoted_metrics': None,
  'public_metrics': {'retweet_count': 0,
   'reply_count': 0,
   'like_count': 0,
   'quote_count': 0,
   'impression_count': 0},
  'referenced_tweets': None,
  'reply_settings': None,
  'source': None,
  'withheld': None},
 980745474088013824: {'id': 980745474088013824,
  'text': 'The road to Alzheimer’s disease is lined with processed foods https://t.co/Tkn5iNCmn2 via @qz',
  'author_id': 2457484022,
  'context_annotations': [],
  'conversation_id': None,
  'entities': None,
  'in_reply_to_user_id': None,
  'lang': None,
  'non_public_metrics': None,
  'organic_metrics': Non

In [50]:
json_dump(tweet_dict, filepath = "tweet.json")