# Summary

## Test the Phase 1/2 Twitter (X) API scraper


## Import libraries

In [1]:
import os
import pickle

import pandas as pd
import tweepy
import json


# These are SWB Phase 1 libraries
# import sys
# sys.path.insert(0, '../scraper')

# import model
# import scraper


## Read X API credentials

In [2]:
# Read credentials from a JSON file
CREDENTIALS_FILE = 'credentials.json'
with open(CREDENTIALS_FILE, "r") as f:
    credentials = json.load(f)
    

## Establish X API client

In [5]:
client = tweepy.Client(bearer_token=credentials['bearer_token'], 
                       consumer_key=credentials['consumer_key'], 
                       consumer_secret=credentials['consumer_secret'],
                       access_token=credentials['access_token'], 
                       access_token_secret=credentials['access_token_secret']
)


## API V2 Test

In [7]:
# Test the API
response = client.search_recent_tweets("Tweepy", max_results=10)
    
    

In [8]:
response

Response(data=[<Tweet id=1725650040356536496 text='🕚 23:00 on 17 November 2023\n\nBarometer 1036.9mb, Falling\nTemperature 7.2°C, Rising\nRain today 0.3mm\nWind 0mph NNE, Gust 2mph\nHumidity 94%\nUV 0.0\nSolarRad 0W/m²\n\n#WS3085 @Raspberry_Pi\n#CumulusMX #tweepy'>, <Tweet id=1725647497278955825 text='This Tweet was Tweeted using Tweepy and Twitter API v2!!'>, <Tweet id=1725637905002148220 text='This Tweet was Tweeted using Tweepy and Twitter API v2!'>, <Tweet id=1725634952627126548 text='🕙 22:00 on 17 November 2023\n\nBarometer 1037.8mb, Falling\nTemperature 7.6°C, Rising\nRain today 0.3mm\nWind 1mph N, Gust 7mph\nHumidity 95%\nUV 0.0\nSolarRad 0W/m²\n\n#WS3085 @Raspberry_Pi\n#CumulusMX #tweepy'>, <Tweet id=1725621024362725717 text="@idowuilekura @BifolaX Yes, that's what I've been saying.\n\nNone of the SM allow data scraping without using their API, and access typically requires payment. I've tried using Tweepy to extract Twitter data but it was blocked.\n\nChatGpt is doing the same

## Look at Phase 1 and Phase 2 queries

In [6]:
# Read the pickled query list
QUERY_CACHE_FILE = '../scraper/querylist.pkl'
with open(QUERY_CACHE_FILE, "rb") as f:
    query_cache = pickle.load(f)
    
print('There are {} queries.'.format(len(query_cache)))


There are 846 queries.


In [7]:
print('The first query in the query cache:')
print(query_cache[0])


The first query in the query cache:
('(900-block pandora avenue OR esquimalt OR highlands OR metchosin OR pacheedaht OR saanich OR sooke OR vic west) (aboriginal coalition to end homelessness OR our place society OR victoria native friendship centre OR island health OR united way southern vancouver island OR greater victoria acting together OR victoria foundation OR aechr OR pacifica housing OR umbrella society OR unhoused OR crime) lang:en -is:retweet', '900-block pandora avenue OR esquimalt OR highlands OR metchosin OR pacheedaht OR saanich OR sooke OR vic west')


## Test the Phase 1 queries

In [11]:
START_TIME = None
END_TIME = None
MAX_TWEETS = 10


# Test the main scraper query function to see if it's working
for q in query_cache[:5]:

    TW_QUERY = q[0].replace("&", "")
    search_query = TW_QUERY.replace(" and ", ' "and" ')
    RELEVANT_REGION = q[1]
    
    print('TW_Query: {}'.format(TW_QUERY))
    print('Relevant Region: {}'.format(RELEVANT_REGION))
    
    tweets = client.search_recent_tweets(
        query=search_query,
        start_time=START_TIME,
        end_time=END_TIME,
        tweet_fields=[
            "context_annotations",
            "public_metrics",
            "created_at",
            "text",
            "source",
            "geo",
        ],
        user_fields=[
            "name",
            "username",
            "location",
            "verified",
            "description",
            "public_metrics",
        ],
        max_results=MAX_TWEETS,
        place_fields=["country", "geo", "name", "place_type"],
        expansions=["author_id", "geo.place_id", "referenced_tweets.id"],
    )

    print('Query_result: {}'.format(tweets))
    print()


TW_Query: (900-block pandora avenue OR esquimalt OR highlands OR metchosin OR pacheedaht OR saanich OR sooke OR vic west) (aboriginal coalition to end homelessness OR our place society OR victoria native friendship centre OR island health OR united way southern vancouver island OR greater victoria acting together OR victoria foundation OR aechr OR pacifica housing OR umbrella society OR unhoused OR crime) lang:en -is:retweet
Relevant Region: 900-block pandora avenue OR esquimalt OR highlands OR metchosin OR pacheedaht OR saanich OR sooke OR vic west
Query_result: Response(data=[<Tweet id=1725600957419925709 text='Woman sentenced to 4 decades for death of newborn baby https://t.co/NVz6ltUFlM'>, <Tweet id=1725559679504338976 text="THE PARTY HOUSE - 'This will appeal to fans of psychological thrillers and crime and mystery readers who are likely to love being immersed in the Scottish Highlands location.' https://t.co/AfLsxmjDVN  #CrimeFiction #Thriller #ThePartyHouse #PartyHouseBook #LinA

Query_result: Response(data=[<Tweet id=1725650168580600117 text='#NewsRelease: An affordable housing development for Indigenous people has opened near Central Saanich. It has 28 homes and is located on the Tsawout First Nation land.\n\nLearn more: https://t.co/djrIVsZJqA\n\n#AffordableHousing #Saanich #Indigenous https://t.co/yvR44eqRMS'>, <Tweet id=1725622839888450017 text='Very excited to see this project moving forward. A big thank you to all the #Saanich neighbours who attended one of the open houses this week! \n\nPlease get in touch if you have additional thoughts to share! \n\nhttps://t.co/WUWaNQITBL'>, <Tweet id=1725230028781392123 text='"The Nellie McClung library branch would double in size, occupying two storeys, and have 10 storeys of lower-cost housing on top of it, under a proposal being pitched by the District of Saanich."\nhttps://t.co/50csxSVQGC'>, <Tweet id=1724843757085012317 text='#Saanich - Saanich looks to expand Nellie McClung library and put 10 storeys of afford