In [2]:
import requests
import os
import json
import pandas as pd
import pickle as pkl
import TwitterUtils as TU

## Exploratory Space For Developing Twitter API Workflow
Our first step is to sample some arbitrary number of twitter users. We have done this in the TwitterUtils.py file and are using this document to develop the workflow.

In [2]:
user_seed = TU.TwitterClient()
rules = user_seed.get_rules() # Prints Rule for current Stream; Need to determine if we can repurpose the get_stream to another endpoint
user_sample = user_seed.get_stream(rules, sample_size = 100)

{"data": [{"id": "1633877176914149393", "value": "(a OR the) has:geo lang:en tweets_count:50", "tag": "active_user"}], "meta": {"sent": "2023-03-09T18:04:09.149Z", "result_count": 1}}
200


In [4]:
user_ids = [tweet['data']['author_id'] for tweet in user_sample]
user_ids

In [3]:
with open("places.pkl", "rb") as file:
    places = pkl.load(file)

In [4]:
places

{'67b98f17fdcf20be': Place(id='67b98f17fdcf20be', name='Boston', full_name='Boston, MA', country='United States', country_code='US', url='https://api.twitter.com/1.1/geo/id/67b98f17fdcf20be.json', place_type='city', attributes={'162772:place_id': '2507000', '189390:id': 'boston-ma', 'geotagCount': '3786', '162772:pop100': '589141'}, bounding_box=BoundingBox(type='Polygon', coordinates=[[[-71.191421, 42.227797], [-71.191421, 42.399542], [-70.986004, 42.399542], [-70.986004, 42.227797], [-71.191421, 42.227797]]]), centroid=[-71.08847799196823, 42.311398499999996], contained_within=[Place(id='988257060feb2e4b', name='BOSTON (MANCHESTER)', full_name='BOSTON (MANCHESTER)', country='United States', country_code='US', url='https://api.twitter.com/1.1/geo/id/988257060feb2e4b.json', place_type='admin', attributes={}, bounding_box=BoundingBox(type='Polygon', coordinates=[[[-73.007434, 41.238757], [-73.007434, 43.764988], [-69.926874, 43.764988], [-69.926874, 41.238757], [-73.007434, 41.238757]]]

In [5]:
places_unpacked = [item  for item in places.values()]
def unpack_place(place):
    return (place.id, place.name, place.full_name, place.country, place.country_code, place.place_type)

unpacked_places = [unpack_place(place) for place in places_unpacked]
place_df = pd.DataFrame(unpacked_places, columns = ("id", "name", "full_name", "country", "country_code", "type"))


In [6]:
unpacked_places

[('67b98f17fdcf20be', 'Boston', 'Boston, MA', 'United States', 'US', 'city'),
 ('5c62ffb0f0f3479d', 'Phoenix', 'Phoenix, AZ', 'United States', 'US', 'city'),
 ('8a927a7056322151', 'Botswana', 'Botswana', 'Botswana', 'BW', 'country'),
 ('06168d1feda43857',
  'South East',
  'South East, England',
  'United Kingdom',
  'GB',
  'admin'),
 ('94d47cc557aa35f4',
  'Nicholasville',
  'Nicholasville, KY',
  'United States',
  'US',
  'city'),
 ('74316979ec360e9f',
  'St Clair Shores',
  'St Clair Shores, MI',
  'United States',
  'US',
  'city'),
 ('ac88a4f17a51c7fc',
  'Portland',
  'Portland, OR',
  'United States',
  'US',
  'city'),
 ('41c331abe42d9969',
  'Apple Valley',
  'Apple Valley, CA',
  'United States',
  'US',
  'city'),
 ('e4a0d228eb6be76b',
  'Philadelphia',
  'Philadelphia, PA',
  'United States',
  'US',
  'city'),
 ('be1ec4774a795507', 'Leander', 'Leander, TX', 'United States', 'US', 'city'),
 ('003f351717d5d10f', 'Chelsea', 'Chelsea, AL', 'United States', 'US', 'city'),
 ('

In [43]:
# Load Users
len(place_df['country'].unique())


75

In [77]:
place_df

Unnamed: 0,id,name,full_name,country,country_code,type
0,67b98f17fdcf20be,Boston,"Boston, MA",United States,US,city
1,5c62ffb0f0f3479d,Phoenix,"Phoenix, AZ",United States,US,city
2,8a927a7056322151,Botswana,Botswana,Botswana,BW,country
3,06168d1feda43857,South East,"South East, England",United Kingdom,GB,admin
4,94d47cc557aa35f4,Nicholasville,"Nicholasville, KY",United States,US,city
...,...,...,...,...,...,...
895,b90f2a335f8565c0,Arden-Arcade,"Arden-Arcade, CA",United States,US,city
896,011a05612b6a331c,Pasig City,"Pasig City, National Capital Region",Republic of the Philippines,PH,city
897,71c5606d33bd0013,Hucknall,"Hucknall, England",United Kingdom,GB,city
898,28679b23ed15b380,Belfast,"Belfast, Northern Ireland",United Kingdom,GB,city


In [49]:
with open("users.json", "r") as file:
    user_json = file.read()

In [66]:
test = '{"total": [' + user_json.replace("}{", "},{") + "]}"

In [64]:
test

'{"total": {"data": [{"username": "iam_rousey", "description": "\\ud83c\\udf37I BELIEVE IN GOD\\u270a\\ud83d\\udcaa||\\ud83d\\udc90PSALM 51 \\ud83d\\ude4f\\ud83d\\udc90||UDSM Alumni||\\ud83c\\udf43Textile Technologist\\ud83c\\udf43||Lady of Ambitions\\ud83c\\udf43||Libraqueen\\u264e\\ufe0f\\ud83c\\udf3b\\ud83c\\udf3b#WANAZI_HQ", "id": "1293183874386731008", "name": "\\ud83e\\uddb9\\u200d\\u2640\\ufe0f\\ud835\\udc07\\ud835\\udc04\\ud835\\udc11\\ud835\\udc0e\\ud835\\udc08\\ud835\\udc0d\\ud835\\udc04\\ud835\\udc11\\ud835\\udc0e\\ud835\\udc12\\ud835\\udc04 \\ud83e\\udd8b", "location": "Arusha,Tanzania"}, {"username": "Pavanasoonu", "description": "Global SAP Tech.  #Jyotishkatti. Spiritual counselor. Associate Prof. @VayuUsa. Jyotish teacher. Followed by Hon\'ble PM Shri. @narendramodi ji. #AskPanditKatti", "id": "1051074201082388480", "name": "Pavanasoonu", "location": "Bengaluru, India"}, {"username": "JohnCam14394418", "description": "", "id": "1625506362515152898", "name": "John Campbe

In [68]:
user_data = json.loads(test)

In [74]:
users = [u['data'][0] for u in user_data["total"]]

In [75]:
users

[{'username': 'iam_rousey',
  'description': '🌷I BELIEVE IN GOD✊💪||💐PSALM 51 🙏💐||UDSM Alumni||🍃Textile Technologist🍃||Lady of Ambitions🍃||Libraqueen♎️🌻🌻#WANAZI_HQ',
  'id': '1293183874386731008',
  'name': '🦹\u200d♀️𝐇𝐄𝐑𝐎𝐈𝐍𝐄𝐑𝐎𝐒𝐄 🦋',
  'location': 'Arusha,Tanzania'},
 {'description': 'Humbly Great !#TEAMLAKERS #FinsUp  Be the wave 🌊 don’t just ride them',
  'location': 'From The Norf',
  'username': 'LastKing24_',
  'id': '328095374',
  'name': 'Franklin Fish🦈'},
 {'description': 'National Spokesperson & Telangana Telugu Mahila President  @JaiTDP @TDPTelangana #TDPTwitter #100YearsOfNTR  #TDPWillBeBack',
  'username': 'jyothsna_tdp',
  'id': '1201449547429367808',
  'location': 'Hyderabad',
  'name': 'JYOTHSNA TIRUNAGARI'},
 {'username': 'lxschurm',
  'id': '43615933',
  'name': 'Leonardo NoCaprio',
  'description': 'i like sports and being outside',
  'location': 'San Francisco'},
 {'description': 'FB:Zion Jessie/Sc:lilzane.zj/ASU 25’💜💛/SoFyeEnt.🔥',
  'name': 'Zno SoFye🔥',
  'id': '1329

In [76]:
pd.DataFrame(users)

Unnamed: 0,username,description,id,name,location
0,iam_rousey,🌷I BELIEVE IN GOD✊💪||💐PSALM 51 🙏💐||UDSM Alumni...,1293183874386731008,🦹‍♀️𝐇𝐄𝐑𝐎𝐈𝐍𝐄𝐑𝐎𝐒𝐄 🦋,"Arusha,Tanzania"
1,LastKing24_,Humbly Great !#TEAMLAKERS #FinsUp Be the wave...,328095374,Franklin Fish🦈,From The Norf
2,jyothsna_tdp,National Spokesperson & Telangana Telugu Mahil...,1201449547429367808,JYOTHSNA TIRUNAGARI,Hyderabad
3,lxschurm,i like sports and being outside,43615933,Leonardo NoCaprio,San Francisco
4,ZionJessie,FB:Zion Jessie/Sc:lilzane.zj/ASU 25’💜💛/SoFyeEnt.🔥,1329103576447193089,Zno SoFye🔥,
5,stanleyeldritch,Bear 🐻.porn maker. World Traveler. A Terror. T...,1265193606740131840,StanleyEldritchXXX↕️ Japan(end of march),"New York, USA"
6,DevilsDigest,The @Rivals affiliate for Arizona State run by...,196474263,DevilsDigest.com,"Tempe, AZ"
7,BeaverOntario,"nature, photography, reading, father, musician...",1184609650634833920,Jon,unceded Algonquin territory
8,NWJS_jobs,Find local jobs now.,1101270461986496512,Nationwide Job Search,
9,Webbdingus,"Host of The Oklahoma Baseball Experience, The ...",23009422,Johnny Baseball,OKC/Nompton


In [65]:
with open("test.json", "w") as f:
    f.write(test)