# Imports, environmental variables, and API authentication

In [30]:
import requests
import json
import csv
import pandas as pd
import os
from os import path
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

In [31]:
#environment variables
bearer_token = "AAAAAAAAAAAAAAAAAAAAANqgWAEAAAAAjebgE2riBRW%2BRY3bLEfWyRXSEao%3DuZnQFTlyRppafAa24BQiDDf0TIYaYgpSpTgDMu5anfZ8zPNelr"
search_url = "https://api.twitter.com/2/tweets/search/recent"
#download sentiment analyzer script
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

def bearer_oauth(r):
    r.headers["Authorization"] = f"Bearer {bearer_token}"
    r.headers["User-Agent"] = "v2RecentSearchPython"
    return r

def connect_to_endpoint(url, params):
    response = requests.get(url, auth=bearer_oauth, params=params)
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\megan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


# Build a query and specify number of API calls
In the string variable 'topic', specify query parameters to search for the tweet you want. It can be as simple as a single keyword or a hashtag. OR operators and brackets work, if no operators are specified between keywords an AND operator is implied. The sample query is 'KelloggsStrike OR KelloggStrike OR BoycottKelloggs OR (Kellogg (strike OR striking OR boycott OR scab))'.

For more details on how to build a more complex query, check https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query

Then specify how many API calls you would like to make in variable 'n' (up to 450). Since making 450 calls would take a while, for testing a smaller number is recommended.

This will print n lines of '200' to signal that the API call was successful.

In [32]:
#replace sample topic with your own query to test
topic = 'dune'
#specify number of API calls to make
n=10

query_params = {
    'query': topic+' lang:en',
    'tweet.fields': 'public_metrics,entities,created_at,lang,geo,referenced_tweets',
    'user.fields':'public_metrics,verified',
    'expansions':'author_id',
    'max_results':'100'
}
    
#Connecting to api and running query and getting json response
json_response = connect_to_endpoint(search_url, query_params)
    
#Pagination:get next page if available based on next_token collected from the first call
next_token=json_response.get('meta').get('next_token')
count = 0
while count<(n-1) and next_token!=None:
    count+=1
    query_params = {
    'query': topic+' lang:en',
    'tweet.fields': 'public_metrics,entities,created_at,lang,geo,referenced_tweets',
    'user.fields':'public_metrics,verified',
    'expansions':'author_id',
    'max_results':'100',
    'next_token':next_token    
    }
    new_data = connect_to_endpoint(search_url, query_params)
    
    #append data to the original json response
    for item in new_data['data']:
        json_response['data'].append(item)
    for item in new_data['includes']['users']:
        if item not in json_response['includes']['users']:#only add unique users
            json_response['includes']['users'].append(item) 
    next_token=new_data.get('meta').get('next_token')

#write to json file
json_response.pop('meta') 
with open(os.getcwd()+'\\json_responses\\'+'KelloggStrike.json', 'w') as f:
    json.dump(json_response, f, sort_keys = True, indent=4)
f.close()

200
200
200
200
200
200
200
200
200
200


# Convert the JSON file to CSV files

In [33]:
def jsonToCsv(jsonData, case, folder):
    #write to Tweet.csv
    if case=='tweet':
        data_file = open(os.getcwd()+'\\'+folder+'\\'+'Tweet.csv', 'w',encoding='utf-8',newline='')
        csv_writer = csv.writer(data_file)
        
        #referenced tweets
        data_file_RT = open(os.getcwd()+'\\'+folder+'\\'+'Referenced_Tweet.csv', 'w',encoding='utf-8',newline='')
        csv_writer_RT = csv.writer(data_file_RT)
        count = 0
        for var in jsonData:
            if count == 0:
                # Writing headers of CSV file
                header = ['tweet_id','user_id','created_at','text','like_count','quote_count','reply_count','retweet_count','sentiment_score']
                csv_writer.writerow(header)
                
                header = ['tweet_id','referenced_tweet_id','type']
                csv_writer_RT.writerow(header)
                count += 1
            # Writing data of CSV file
            
            #calculate sentiment
            score=sia.polarity_scores(var.get('text'))["compound"]
            
            metrics=var.get('public_metrics')
            row=[var.get('id'),var.get('author_id'),var.get('created_at'),"'"+var.get('text')+"'",metrics['like_count'],metrics['quote_count'],metrics['reply_count'],metrics['retweet_count'], score]
            csv_writer.writerow(row)
            
            #rt
            if 'referenced_tweets' in var:
                rts=var.get('referenced_tweets')
                for rt in rts:
                    row=[var.get('id'),rt['id'], rt['type']]
                    csv_writer_RT.writerow(row)
        data_file.close()
        data_file_RT.close()
    #write to User.csv
    if case=='user':
        data_file = open(os.getcwd()+'\\'+folder+'\\'+'User.csv', 'w',encoding='utf-8', newline='')
        csv_writer = csv.writer(data_file)
        count = 0
        for var in jsonData:
            if count == 0:
                # Writing headers of CSV file
                header = ['user_id','followers','following','tweet_count','listed_count','verified']
                csv_writer.writerow(header)
                count += 1
            # Writing data of CSV file
            metrics=var.get('public_metrics')
            row=[var.get('id'),metrics['followers_count'],metrics['following_count'],metrics['tweet_count'],metrics['listed_count'], var.get('verified') ]
            csv_writer.writerow(row)
        data_file.close()
    ##write to Hashtag_Tweet.csv, URL_Tweet.csv, Mention_Tweet.csv
    if case =='entities':
        data_file_HT = open(os.getcwd()+'\\'+folder+'\\'+'Hashtag_Tweet.csv', 'w',encoding='utf-8', newline='')
        csv_writer_HT = csv.writer(data_file_HT)
        
        data_file_UT = open(os.getcwd()+'\\'+folder+'\\'+'URL_Tweet.csv', 'w',encoding='utf-8', newline='')
        csv_writer_UT = csv.writer(data_file_UT)
            
        data_file_MT = open(os.getcwd()+'\\'+folder+'\\'+'Mention_Tweet.csv', 'w',encoding='utf-8', newline='')
        csv_writer_MT = csv.writer(data_file_MT)
            
        count = 0
        for var in jsonData:
            if count == 0:
                    # Writing headers of CSV file
                header = ['tweet_id','hashtag']
                csv_writer_HT.writerow(header)
                    
                header = ['tweet_id','urls']
                csv_writer_UT.writerow(header)
                    
                header = ['tweet_id','mentions']
                csv_writer_MT.writerow(header)
                count += 1
                    
                # Writing data of CSV file
            if 'entities' in var:
                entities=var.get('entities')
                if 'hashtags' in entities:
                    for hashtag in entities['hashtags']:
                        row=[var.get('id'),hashtag.get('tag')]
                        csv_writer_HT.writerow(row)
                if 'urls' in entities:
                    for url in entities['urls']:
                        row=[var.get('id'),url.get('expanded_url')]
                        csv_writer_UT.writerow(row)
                if 'mentions' in entities:
                    for mention in entities['mentions']:
                        row=[var.get('id'),mention.get('id')]
                        csv_writer_MT.writerow(row)
        data_file_HT.close()
        data_file_UT.close()
        data_file_MT.close()

## For testing, specify the new test folder name in the variable 'folderName'
A new test folder will be created containing the newly generated CSV files. If the folder already exists then overwrite the CSV files in the specified folder.


In [34]:
with open(os.getcwd()+'\\json_responses\\'+'KelloggStrike.json') as json_file:
    data = json.load(json_file)
jsonData=data['data']
jsonUser=data['includes']['users']

#specify folder name here
folderName = 'Test4'
#Generate CSV files into the new folder, if folder already exists, overwrite CSV files in that folder
location=os.getcwd()+'\\'+folderName
if (path.exists(location)==False):
    os.mkdir(location)
jsonToCsv(jsonData, 'tweet', folderName)
jsonToCsv(jsonData, 'entities',folderName)
jsonToCsv(jsonUser, 'user',folderName)
