In [1]:
import requests
import os
import json
import pandas as pd
import time
from datetime import date

In [2]:
# where the bearer token is stored
import config as cfg

## functions

In [3]:
# === user list request

def bearer_oauth(r):
    """
    Method required by bearer token authentication.
    """
    token = cfg.twitter_keys['BEARER_TOKEN']
    r.headers["Authorization"] = f"Bearer {token}"
    r.headers["User-Agent"] = "v2RecentSearchPython"
    return r

def get_list(url, params):
    response = requests.get(url, auth=bearer_oauth, params=params)
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

def users_request():
    list_req_url = "https://api.twitter.com/1.1/lists/members.json"
    parlListId = 864088912087715840
    query_params = {'list_id': '{}'.format(parlListId), 'count': '5000'}
    json_response = get_list(list_req_url, query_params)
    return json_response

def id_list():
    json_response = users_request()
    # extract the user IDs
    members = pd.DataFrame.from_dict(json_response['users'])
    user_ids = list(members['id'])
    return user_ids

def user_list():
    json_response = users_request()
    members = pd.DataFrame.from_dict(json_response['users'])
    mem_list = members[['id','name','screen_name']]
    return mem_list

# === user information requests

def create_headers():
    """
    create the request headers, includes the token
    """
    headers = {"Authorization": "Bearer {}".format(cfg.twitter_keys['BEARER_TOKEN'])}
    return headers

def connect_to_endpoint(url, headers):
    """
    connect to the API and return the json response
    """
    response = requests.request("GET", url, headers=headers)
    if response.status_code != 200:
        print(response.status_code)
        raise Exception(response.status_code, response.text)
    return response.json()

def api_request(search_url):
    headers = create_headers()
    json_response = connect_to_endpoint(search_url, headers)
    #return json.dumps(json_response, indent=4, sort_keys=True)
    return json_response


def extract(user_id_list):
    """
    Extracts all MPs past 30 tweets. Returns a list of JSON objects that represent each MPs activity.
    """
    # list to hold all of the json responses
    aggregate_data = []
    
    # request data for each user in the user list
    for user in user_id_list:
        # define which fields will be returned
        tweet_fields = 'created_at,author_id,conversation_id,entities,in_reply_to_user_id,referenced_tweets,text,public_metrics'
        user_fields = 'username,public_metrics,verified,url,id'
        max_results = '30'

        # the user request url
        search_url = "https://api.twitter.com/2/users/{}/tweets?tweet.fields={}&user.fields={}&max_results={}".format(user,tweet_fields,user_fields,max_results)
        response = api_request(search_url)

        aggregate_data.append(response)
        #time.sleep(1)
    
    # return the data in a list
    return aggregate_data


def transform_users(user_list):
    user_list = user_list.to_json() # make it json
    user_list = json.loads(user_list)
    user_list['date'] = str(date.today()) # add the date of collection

    return user_list

def transform_tweets():
    print('do it')
    

## Run the program - the main() function steps

#### table 1 - user data

In [4]:
# get the user dataframe
user_list = user_list() 

200


In [5]:
# transform it
loadable_user_list = transform_users(user_list)

In [6]:
loadable_user_list

{'id': {'0': 1204044181037101057,
  '1': 1198299741060239360,
  '2': 1192858611879428097,
  '3': 1187132490684784642,
  '4': 1173429508034220032,
  '5': 1172345859905871872,
  '6': 1170770038208565248,
  '7': 1166717268975333376,
  '8': 1162219997977960449,
  '9': 1148197780583849984,
  '10': 1143342270399373313,
  '11': 1143229947932229632,
  '12': 1140090941195325440,
  '13': 1125543746697924609,
  '14': 1113280044934340610,
  '15': 1086084557009575936,
  '16': 1084856644687679496,
  '17': 1072606577566183424,
  '18': 1063494232126689280,
  '19': 1044738781171961856,
  '20': 1034798285389737984,
  '21': 1026225299057389569,
  '22': 989311745100566529,
  '23': 955157569894404096,
  '24': 943174774154498048,
  '25': 883774859452579840,
  '26': 876849435820474368,
  '27': 868245382148521984,
  '28': 791282631006621696,
  '29': 767735634438094850,
  '30': 737359208945844224,
  '31': 725344850414043136,
  '32': 720579941184757760,
  '33': 720083863830069249,
  '34': 710180898763890688,
  

#### table 2 - mp tweets

In [7]:
# get the user id list
user_id_list = id_list()

200


In [None]:
# get the tweets
mp_data = extract(user_id_list)

In [None]:
# transform the mp tweets

### IN PROGRESS WORK

In [18]:
# check that we have all the MPs
print(len(mp_data))

NameError: name 'mp_data' is not defined

In [67]:
print(aggregate_data[0]) # one tweet

{'data': [{'id': '1319085046934929410', 'created_at': '2020-10-22T01:15:57.000Z', 'public_metrics': {'retweet_count': 23, 'reply_count': 0, 'like_count': 0, 'quote_count': 0}, 'conversation_id': '1319085046934929410', 'referenced_tweets': [{'type': 'retweeted', 'id': '1318947998857109504'}], 'author_id': '1223994797834297350', 'text': 'RT @BlocQuebecois: Depuis un an, 32 députés du #BlocQc portent fièrement la voix du Québec à Ottawa. Grâce à vous, nous avons le privilège…'}, {'id': '1318664109160878087', 'created_at': '2020-10-20T21:23:17.000Z', 'public_metrics': {'retweet_count': 0, 'reply_count': 0, 'like_count': 0, 'quote_count': 0}, 'conversation_id': '1318664109160878087', 'author_id': '1223994797834297350', 'text': 'Attention! Attention! La date butoir est repoussée au vendredi 23 octobre, 15 h pour le programme Nouveaux Horizons aux aînés 2020 #polcan https://t.co/mLEGV4eB6F'}, {'id': '1317208407133257728', 'created_at': '2020-10-16T20:58:51.000Z', 'public_metrics': {'retweet_c

### upload the list of users to the database as json

200
                      id                    name      screen_name
0    1223994797834297350      Charbonneau Louise  CharbonneauLou5
1    1204044181037101057           Gerald Soroka   GeraldSorokaMP
2    1198299741060239360             kyleseeback      kyleseeback
3    1192858611879428097       Mike Kelloway, MP     mikekelloway
4    1187132490684784642           Louise Chabot   LouiseChabotBQ
..                   ...                     ...              ...
316             16014404       Stéphane Bergeron        sbergeron
317             15810950       Matt Jeneroux, MP         jeneroux
318             14538949  Michelle Rempel Garner   MichelleRempel
319             14260960          Justin Trudeau    JustinTrudeau
320              3358671              Ken Hardie        KenHardie

[321 rows x 3 columns]
{'id': {'0': 1223994797834297350, '1': 1204044181037101057, '2': 1198299741060239360, '3': 1192858611879428097, '4': 1187132490684784642, '5': 1173429508034220032, '6': 11723458599

#### Single Tweet Request for Testing

In [73]:
tweet_fields = 'created_at,author_id,conversation_id,entities,in_reply_to_user_id,referenced_tweets,text,public_metrics'
# 'id,created_at,author_id,created_at,conversation_id,lang,entities,in_reply_to_user_id,referenced_tweets,text,public_metrics'  
user_fields = 'username,public_metrics,verified,url,id'
expansions = 'author_id'
max_results = '30'

# Michelle Rempel Garner
single_user = 256069692

# the user request url
single_url = "https://api.twitter.com/2/users/{}/tweets?expansions={}&tweet.fields={}&user.fields={}&max_results={}".format(single_user,expansions,tweet_fields,user_fields,max_results)

print(single_url)

single_response = main(single_url)

https://api.twitter.com/2/users/256069692/tweets?expansions=author_id&tweet.fields=created_at,author_id,conversation_id,entities,in_reply_to_user_id,referenced_tweets,text,public_metrics&user.fields=username,public_metrics,verified,url,id&max_results=30
200


In [77]:
print(single_response)
print(type(single_response))

{'data': [{'id': '1421983432830115843', 'created_at': '2021-08-01T23:57:24.000Z', 'entities': {'mentions': [{'start': 0, 'end': 9, 'username': 'K_elly_B', 'id': '378826663'}], 'annotations': [{'start': 44, 'end': 50, 'probability': 0.9746, 'type': 'Person', 'normalized_text': 'Trudeau'}]}, 'conversation_id': '1421982750702145541', 'in_reply_to_user_id': '378826663', 'text': '@K_elly_B I think to highlight that this is Trudeau’s call and his alone.\n\nOnce campaign starts, messaging will shift gears.', 'referenced_tweets': [{'type': 'replied_to', 'id': '1421982750702145541'}], 'public_metrics': {'retweet_count': 0, 'reply_count': 1, 'like_count': 1, 'quote_count': 0}, 'author_id': '256069692'}, {'id': '1421949439615193093', 'created_at': '2021-08-01T21:42:19.000Z', 'conversation_id': '1421949439615193093', 'text': 'It’s not a fall election if E-Day is before September 21st. \n\nPlease adjust your headlines accordingly.\n\nThanks.', 'public_metrics': {'retweet_count': 2, 'reply_count': 2

In [82]:
print(single_response['data'][0]) # each value in the list is a tweet - loop through all 30 to take the goods

{'id': '1421983432830115843', 'created_at': '2021-08-01T23:57:24.000Z', 'entities': {'mentions': [{'start': 0, 'end': 9, 'username': 'K_elly_B', 'id': '378826663'}], 'annotations': [{'start': 44, 'end': 50, 'probability': 0.9746, 'type': 'Person', 'normalized_text': 'Trudeau'}]}, 'conversation_id': '1421982750702145541', 'in_reply_to_user_id': '378826663', 'text': '@K_elly_B I think to highlight that this is Trudeau’s call and his alone.\n\nOnce campaign starts, messaging will shift gears.', 'referenced_tweets': [{'type': 'replied_to', 'id': '1421982750702145541'}], 'public_metrics': {'retweet_count': 0, 'reply_count': 1, 'like_count': 1, 'quote_count': 0}, 'author_id': '256069692'}


## ignore this ending part

In [87]:
first_tweet = single_response['data'][0]

first_tweet_id = int(first_tweet['author_id'])

author = user_list.loc[user_list['id'] == first_tweet_id]

In [92]:
print(author)

            id                name     screen_name
216  256069692  Alistair MacGregor  AMacGregor4CML


In [94]:
single_response_with_auth = single_response

single_response_with_auth['author_name'] = author['name']