In [8]:
import tweepy
from twitter_authentication import bearer_token
import time
import pandas as pd
import json
import pickle

In [9]:
def json_load(filepath):
    with open(filepath, "r") as json_file:
        data = json.load(json_file)
    return data

def json_dump(data, filepath, pretty_format = True):
    with open(filepath, 'w') as fw:
        if pretty_format:
            json.dump(data, fw, indent=2, sort_keys=True)
        else:
            json.dump(data, fw)

def pickle_dump(obj, pickle_filepath):
    with open(pickle_filepath, "wb") as f:
        pickle.dump(obj, f, protocol=2)

def pickle_load(pickle_filepath):
    with open(pickle_filepath, "rb") as f:
        obj = pickle.load(f)
    return obj

In [10]:
client = tweepy.Client(bearer_token, wait_on_rate_limit=True)

In [11]:
alz_tweets = []
for tweet in tweepy.Paginator(client.search_all_tweets, 
                                 query = 'alzheimers disease -is:retweet place_country:US',
                                 user_fields = ['username', 'public_metrics', 'description', 'location'],
                                 place_fields = ['place_type', 'geo'],
                                 tweet_fields = ['created_at', 'geo', 'public_metrics', 'text'],
                                 expansions = ['author_id', 'geo.place_id'],
                                 start_time = '2018-01-01T00:00:00Z',
                                 end_time = '2018-12-31T00:00:00Z',
                              max_results=500):
    time.sleep(1)
    alz_tweets.append(tweet)

In [12]:
response = alz_tweets[0]
response.includes.keys()

dict_keys(['users', 'places'])

In [13]:
response.data

[<Tweet id=1079428528007979008 text='Testing could help doctors pinpoint sooner the type of dementia a patient might have.\nhttps://t.co/cG3jeHx6Jw'>,
 <Tweet id=1079068032092291072 text='Learn how spinal taps are being used to diagnose or rule out #Alzheimers disease and determine dementia treatment:  https://t.co/5z4NwVat4u  #ENDALZ'>,
 <Tweet id=1078822411473223680 text='Meet Vicky Patel of Saratoga, the top fundraiser nationwide for the #walk2endalz. Vicky supports the @alzassociation to honor her father, Richard Sakers Sr, who lived with Alzheimer’s disease for over 10 years. Read her story: https://t.co/rGD8vhYlUK https://t.co/FPHqxjc7VV'>,
 <Tweet id=1077720068023373825 text='A high intake of fish is associated with a lower risk of dementia and #Alzheimers disease. One study found that individuals consuming fish once a week or more had a 60 percent lower risk of being diagnosed with Alzheimer’s compared to those who rarely or never eat fish.'>,
 <Tweet id=1076947320472723456 tex

In [16]:
place_keys= ["full_name", 'id', 'contained_within', 'country', 'country_code', 'geo', 'name', 'place_type' ]
place_dict = {}
user_keys= ['id', 'name', 'username', 'created_at', 
            'description', "entities", 'location', 'pinned_tweet_id', 
            'profile_image_url', 'protected', 'public_metrics', 'url', 'verified', 'withheld']
user_dict = {}
# Loop through each response object
for response in alz_tweets:
    # Take all of the users, and put them into a dictionary of dictionaries with the info we want to keep
    for place in response.includes['places']:
        place_obj = {}
        for key in place_keys:
            place_obj[key] = place[key]
        place_dict[place_obj['id']] = place_obj
    for user in response.includes['users']:
        user_obj = {}
        for key in user_keys:
            user_obj[key] = user[key]
        user_dict[user_obj['id']] = user_obj

In [17]:
json_dump(place_dict, filepath = "place.json")

In [19]:
tweet_keys = ['id', 'text', 'created_at', 'author_id']
tweet_dict = {}
for tweet in response.data:
    tweet_obj = {}
    for key in tweet_keys:
        tweet_obj[key] = getattr(tweet, key)
    tweet_dict[tweet_obj['id']] = tweet_obj

In [20]:
tweet_dict

{980784772527067136: {'id': 980784772527067136,
  'text': "Alzheimers's Disease Linked To Junk Food Not Genetics https://t.co/HBcyMzl9du via @YouTube",
  'created_at': datetime.datetime(2018, 4, 2, 12, 31, 26, tzinfo=datetime.timezone.utc),
  'author_id': 1849839132},
 980745474088013824: {'id': 980745474088013824,
  'text': 'The road to Alzheimer’s disease is lined with processed foods https://t.co/Tkn5iNCmn2 via @qz',
  'created_at': datetime.datetime(2018, 4, 2, 9, 55, 17, tzinfo=datetime.timezone.utc),
  'author_id': 2457484022},
 980628093357514752: {'id': 980628093357514752,
  'text': 'https://t.co/HfBRvoka0a',
  'created_at': datetime.datetime(2018, 4, 2, 2, 8, 51, tzinfo=datetime.timezone.utc),
  'author_id': 14823121},
 979348314201182208: {'id': 979348314201182208,
  'text': '@_victoriaharris or yourself, family member or friend has been afflicted by a disease state. Such as, alcoholism, addiction, mental health or Alzheimers.  Compassion comes ez then!!  Everyone faces battl