# Get User Timeline Tweets
## This script takes an array of usernames and queries Twitter for their most recent 3200 tweets and saves them to JSON files, 100 tweets per file.

In [None]:
# -*- coding: utf-8 -*-
"""
Created on 2022-12-14

@author: jsale
"""

In [None]:
import requests
import os
import json

In [None]:
# Example lists containing twitter users
test = ['CBSMornings','nytimes','TechCrunch']
# Set the twitter_user_array equal to one of the example lists
twitter_user_array = test

### Create a directory for each user's tweet data

In [None]:
##########################################
# Optional Zip folder creation

cwd_path = os.getcwd()

# Save to a 'timelines' subfolder
for i in range(len(twitter_user_array)):
    new_dir = cwd_path + "/timelines/" + twitter_user_array[i]
    try:
        os.mkdir(new_dir)
    except OSError:
        print ("Creation of the directory %s failed" % new_dir)
    else:
        print ("Directory %s successfully created." % new_dir)


### Get user timeline tweets and save as JSON with 100 tweets per file

In [None]:
cwd_path = os.getcwd()

# Enter your Twitter API keys here
consumer_key=""
consumer_secret=""
access_token=""
access_token_secret=""

def bearer_oauth(r):
    r.headers["Authorization"] = "Bearer <<Your Bearer Token Here>>"
    return r

outdir = cwd_path + '/timelines/'
follower_tweet_count = []
ftc_inc = 0
for username in twitter_user_array:
    # Optional print statement for tracking progress
    print("Working on " + username)
    
    # Get user_id from username because user_id is needed to query user timeline tweets
    user_fields = "user.fields=id,description,created_at"
    url = "https://api.twitter.com/2/users/by?usernames=" + username + "&user.fields=id,description,created_at"
    response = requests.request("GET", url, auth=bearer_oauth)
    # Optional print statement for tracking progress
    print("user_id successful " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(
            "Request returned an error: {} {}".format(
                response.status_code, response.text
            )
        )
    json_response = response.json()
    user_id = json_response['data'][0]['id']
    
    # Get first page of user timeline tweets to initialize next_token field
    url = "https://api.twitter.com/2/users/" + str(user_id) + "/tweets"

#     params = {"tweet.fields": "id,text,attachments,author_id,context_annotations,conversation_id,created_at,entities,geo,in_reply_to_user_id,lang,possibly_sensitive,referenced_tweets,public_metrics,reply_settings,source,withheld&user.fields=id,name,profile_image_url,url,username&expansions=referenced_tweets.id,referenced_tweets.id.author_id,entities.mentions.username,in_reply_to_user_id,attachments.media_keys&media.fields=preview_image_url,type,url","max_results":100 }
    params = {"tweet.fields": "id,text,attachments,author_id,context_annotations,conversation_id,created_at,entities,geo,in_reply_to_user_id,lang,possibly_sensitive,referenced_tweets,public_metrics,reply_settings,source,withheld","max_results":100 }
    response = requests.request("GET", url, auth=bearer_oauth, params=params)
    print("0 " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(
            "Request returned an error: {} {}".format(
                response.status_code, response.text
            )
        )
    json_response =  response.json()
    with open(cwd_path + outdir + username + '/' + username + '00.json', 'w', encoding="utf-8") as outfile:
        json.dump(json_response, outfile) 
        
    # Loop through up to 33 pages of user timeline tweets because of 3200 tweet query limit
    for i in range(1,33):
        if 'meta' in json_response:
            if 'next_token' in json_response['meta']:
    #             params = {"tweet.fields": "id,text,attachments,author_id,context_annotations,conversation_id,created_at,entities,geo,in_reply_to_user_id,lang,possibly_sensitive,referenced_tweets,public_metrics,reply_settings,source,withheld&user.fields=id,name,profile_image_url,url,username&expansions=referenced_tweets.id,referenced_tweets.id.author_id,entities.mentions.username,in_reply_to_user_id,attachments.media_keys&media.fields=preview_image_url,type,url","max_results":100,"pagination_token":json_response['meta']['next_token']  }
                params = {"tweet.fields": "id,text,attachments,author_id,context_annotations,conversation_id,created_at,entities,geo,in_reply_to_user_id,lang,possibly_sensitive,referenced_tweets,public_metrics,reply_settings,source,withheld","max_results":100,"pagination_token":json_response['meta']['next_token']  }
                response = requests.request("GET", url, auth=bearer_oauth, params=params)
                print(str(i) + " " + str(response.status_code))
                if response.status_code != 200:
                    raise Exception(
                        "Request returned an error: {} {}".format(
                            response.status_code, response.text
                        )
                    )
                json_response =  response.json()

                # If i is less than 10, add a zero before the i in the filename
                if i < 10:
                    with open(cwd_path + outdir + username + '/' + username + "0" + str(i) + '.json', 'w', encoding="utf-8") as outfile:
                        json.dump(json_response, outfile)        
                else:
                    with open(cwd_path + outdir + username + '/' + username + str(i) + '.json', 'w', encoding="utf-8") as outfile:
                        json.dump(json_response, outfile)            


### Read JSON files into array of tweets and run utilities to check data

In [None]:
from pprint import pprint

cwd_path = os.getcwd()
# for bot_inc in range(len(twitter_user_array)):
for bot_inc in range(3):
    pprint("bot_inc:" + str(bot_inc) + " | Working on user " + twitter_user_array[bot_inc])

    # Define variables
    all_tweets = []
    inc = 0
    val = 0
    val_inc = 0
    dir = cwd_path + "/timelines/myfollowers_tweets/" + twitter_user_array[bot_inc] + '/'
    filenames = next(os.walk(dir))[2]
    for filename in filenames:
        print(filename)
        with open(dir + filename, 'r', encoding="utf-8") as f:
            data = json.load(f)
            for tweet in data['data']:
#                 tweet = json.loads(line)
                all_tweets.append(tweet)

                # Increment variables to track progress, mostly for very large files
                inc += 1
                val_inc += 1
                if val_inc > 100:
                    val = val + 100
                    print(str(val))
                    val_inc = 0


### Utilities

In [None]:
len(all_tweets)

In [None]:
for i in range(len(all_tweets)):
    print(str(i) + " " + all_tweets[i]['created_at'])

In [None]:
for tweet in all_tweets:
#     print(tweet['id'])
    if "entities" in tweet:
        if "urls" in  tweet['entities']:
            url_size = len(tweet['entities']['urls'])
            print(str(url_size))
        else:
            print("no urls")
    else:
        print('no entities')


In [None]:
inc = 0
for tweet in all_tweets:
#     print(tweet['id'])
    if "referenced_tweets" in tweet:
        print(str(inc))
        print(tweet['referenced_tweets'][0]['type'])
    else:
        print(str(inc) + ' no referenced_tweets')
    inc = inc + 1


In [None]:
inc = 0
for tweet in all_tweets:
#     print(tweet['id'])
    if "entities" in tweet:
        if "hashtags" in tweet['entities']:
            print(str(inc))
            print("# of hashtags: " + str(len(tweet['entities']['hashtags'])))
            print(tweet['entities']['hashtags'])
        else:
            print("no hashtags")
    else:
        print(str(inc) + ' no entities')
    inc = inc + 1


In [None]:
print(all_tweets[0]['created_at'])