# Imports and Twitter Auth

In [17]:
# Module imports 
import tweepy
import time
import pandas as pd
import numpy as np
import json
import time
from random import randint, random
from googlesearch import search as gsearch
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import pathlib
import os
from dotenv import load_dotenv

# name the file we want
env_file_name = 'lrhf97_twit_cred.env'
parent_dir = pathlib.Path.cwd().parent

env_path = parent_dir.joinpath(env_file_name)

load_dotenv(env_path)
test_cred = os.getenv("test_cred")

api_key = os.getenv("API_KEY")
secret_key = os.getenv("API_SECRET")
access_token = ''
secret_token = ''

def authenticate(api_key, secret_key, access_token, secret_token):

    auth = tweepy.OAuthHandler(api_key, secret_key)
    auth.set_access_token(access_token, secret_token)
    api = tweepy.API(auth, 
                     wait_on_rate_limit=True, 
                     wait_on_rate_limit_notify=True)
  
    return api


api = authenticate(api_key, secret_key, access_token, secret_token)

## User-Based Tweet pull

In [43]:
username = 'jack'
count = 15
try:     
 # Creation of query method using parameters
    tweets = tweepy.Cursor(api.user_timeline,id=username).items(count)
 
 # Pulling information from tweets iterable object
    tweets_list = [[tweet.created_at, tweet.id, tweet.text] for tweet in tweets]
 
 # Creation of dataframe from tweets list
 # Add or remove columns as you remove tweet information
    tweets_df = pd.DataFrame(tweets_list)
    
except BaseException as e:
    print('failed on_status,',str(e))
    time.sleep(3)

In [44]:
tweets_df.head()

Unnamed: 0,0,1,2
0,2021-09-17 19:02:40,1438941493096550407,RT @elonmusk: Please add your voice to the pub...
1,2021-09-17 18:57:50,1438940275292917761,RT @HBerkoe: During @Square's I&amp;D Week I s...
2,2021-09-17 12:49:19,1438847534131208196,RT @archillect: https://t.co/IfyeYaQuSE
3,2021-09-16 19:36:32,1438587627289071620,"RT @artistbasquiat: Bird on Money, 1981 #neoex..."
4,2021-09-16 18:53:35,1438576819117215749,“crypto”


In [22]:
# Function created to extract coordinates from tweet if it has coordinate info
# Tweets tend to have null so important to run check
# Make sure to run this cell as it is used in a lot of different functions below
def extract_coordinates(row):
    if row['Tweet Coordinates']:
        return row['Tweet Coordinates']['coordinates']
    else:
        return None
        
# Function created to extract place such as city, state or country from tweet if it has place info
# Tweets tend to have null so important to run check
# Make sure to run this cell as it is used in a lot of different functions below
def extract_place(row):
    if row['Place Info']:
        return row['Place Info'].full_name
    else:
        return None

## Coordiante Based Pull

In [33]:
# Example may no longer show tweets if until_date falls outside 
# of 7-day period from when you run cell
coordinates = '19.402833,-99.141051,50mi'
language = 'es'
result_type = 'recent'
until_date = '2021-09-10'
max_tweets = 150
 
# Creation of query method using parameters
tweets = tweepy.Cursor(api.search, geocode=coordinates, lang=language, result_type = result_type, until = until_date, count = 100).items(max_tweets)
 
# List comprehension pulling chosen tweet information from tweets iterable object
# Add or remove tweet information you want in the below list comprehension
tweets_list = [[tweet.text, tweet.created_at, tweet.id_str, tweet.favorite_count, tweet.user.screen_name, tweet.user.id_str, tweet.user.location, tweet.user.url, tweet.user.verified, tweet.user.followers_count, tweet.user.friends_count, tweet.user.statuses_count, tweet.user.default_profile_image, 
tweet.lang] for tweet in tweets]
 
# Creation of dataframe from tweets_list
# Did not include column names to simplify code 
tweetsLOC_df = pd.DataFrame(tweets_list)

The above query pulls 150 recent tweets in Mexico City in Spanish with the latest date being August 10th, 2020. This code snippet is a little different than the other two shown before. In order to refine search parameters, you’ll have to add the different parameters shown in the picture above to tweepy.Cursor(geocode = coordinates, lang=language, etc.) and pass it a variable or hardcode it. That way you can refine your search by either location, language, whatever you want to do.

In [34]:
tweetsLOC_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,@Ivo_cachonda Ya t' sigo en tus 2s insta...,2021-09-09 23:59:59,1436117212272726016,4,RuiGarciDaviso,995365920,Tizayuca Hidalgo,,False,26,383,947,False,es
1,Les dejo este artículo que escribí sobre Silvi...,2021-09-09 23:59:59,1436117210225852418,0,ricardojescobar,70053997,Ciudad de México.,https://t.co/BgWaMyZhmf,True,19651,887,49431,False,es
2,@DeportesMinuto @Jcchavez115 Recuerdo perfecta...,2021-09-09 23:59:59,1436117210196439054,2,JhonyOAmaroNav1,1427450402216157186,"Cuernavaca, Morelos",,False,18,150,1071,False,es
3,"@Rubluoo En Banco Azteca tenemos la mejor App,...",2021-09-09 23:59:59,1436117209533894660,3,BancoAzteca,187552996,"México, DF",https://t.co/HnIF8uocOT,True,49579,3281,58662,False,es
4,@Excelsior Jajajajajajaja en tres años ese aer...,2021-09-09 23:59:58,1436117206555840517,0,RAUNA101,145802339,mexico d.f.,,False,163,603,3974,False,es
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,#Cultura | No se pierdan esta exposición fotog...,2021-09-09 23:59:02,1436116973390336003,3,pauloyolatl,148126344,"Puebla, México",https://t.co/sZfSIK0Wo2,False,17024,4001,90388,False,es
146,@miguelrb1 ¿Departamento o qué?,2021-09-09 23:59:02,1436116973373505536,0,JoorgeGalvan,95039363,Mexico City,,False,326,323,7042,False,es
147,yo en best buy:\n\n-señorita este teclado es m...,2021-09-09 23:59:02,1436116971851030537,0,ThisReedo,461540195,Toluca,,False,356,967,9255,False,es
148,@ClaromusicaPE Listo ya llegamos a la meta💜\nT...,2021-09-09 23:59:01,1436116967602089993,0,Lizethreyes2001,2882374298,Morelos México,,False,319,1049,10341,False,es


## Query based + Coordinates

In [39]:
text_query = 'Coronavirus'
coordinates = '36.169786,-115.139858,50mi'
max_tweets = 150
 
# Creation of query method using parameters
tweets = tweepy.Cursor(api.search, q = text_query, geocode = coordinates, count = 100).items(max_tweets)
 
# Pulling information from tweets iterable object
# Add or remove tweet information you want in the below list comprehension
tweets_list = [[tweet.text, tweet.created_at, tweet.id_str, tweet.favorite_count, tweet.user.screen_name, tweet.user.id_str, tweet.user.location, tweet.user.followers_count, tweet.coordinates, tweet.place] for tweet in tweets]
 
# Creation of dataframe from tweets_list
# Did not include column names to simplify code
tweetsLV_df = pd.DataFrame(tweets_list)

Whether you want to scrape tweets from a specific user searching for keywords or search for tweets within a 50-mile radius of Las Vegas, NV (Lat 36.169786, Long -115.139858) that have the keyword Coronavirus. Your tweet scraping is only limited by your imagination and the attributes and methods available in Tweepy. Below I’ll show you how easy it is to pick and choose the methods and information you want by showing the query mentioned above.

In [40]:
tweetsLV_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,NEW: FDA advisory panel endorses COVID-19 boos...,2021-09-17 20:28:25,1438963071116926976,0,FOX5Vegas,21021326,Las Vegas,259583,,
1,FDA panel votes against coronavirus boosters f...,2021-09-17 20:26:47,1438962661069246467,0,dianeraucher,2473825934,"North Las Vegas, NV",8966,,
2,#VaxFact: Evidence shows that infertility is n...,2021-09-17 20:22:03,1438961468691582984,0,TuSNHD,372491544,"Las Vegas, Nevada",328,,
3,French scientist who promoted one of Trump’s f...,2021-09-17 20:09:09,1438958220844810247,0,dianeraucher,2473825934,"North Las Vegas, NV",8966,,
4,You need to read this! Democrats want every Am...,2021-09-17 20:07:43,1438957860969488388,0,BarbArn,144612867,"Las Vegas, NV",20866,,
...,...,...,...,...,...,...,...,...,...,...
145,"More than 6,000 people received the first dose...",2021-09-15 19:40:00,1438226109913464833,36,FOX5Vegas,21021326,Las Vegas,259583,,
146,Want to sign your child up for a COVID-19 vacc...,2021-09-15 19:35:00,1438224851756478465,8,KTNV,16396368,Las Vegas,187404,,
147,TRACKING COVID-19: Clark County's lower COVID-...,2021-09-15 19:20:21,1438221164766244876,5,8NewsNow,15593696,"Las Vegas, Nevada",247821,,
148,"VACCINE INCENTIVE: ""With more employers mandat...",2021-09-15 19:15:00,1438219818444857349,7,FOX5Vegas,21021326,Las Vegas,259583,,
