In [1]:
import requests
import os
import json
import pandas as pd
import pickle as pkl
import TwitterUtils as TU

## Exploratory Space For Developing Twitter API Workflow
Our first step is to sample some arbitrary number of twitter users. We have done this in the TwitterUtils.py file and are using this document to develop the workflow.

In [2]:
user_seed = TU.TwitterClient()
rules = user_seed.get_rules() # Prints Rule for current Stream; Need to determine if we can repurpose the get_stream to another endpoint
user_sample = user_seed.get_stream(rules, sample_size = 100)

{"data": [{"id": "1633877176914149393", "value": "(a OR the) has:geo lang:en tweets_count:50", "tag": "active_user"}], "meta": {"sent": "2023-03-09T18:04:09.149Z", "result_count": 1}}
200


In [4]:
user_ids = [tweet['data']['author_id'] for tweet in user_sample]
user_ids

In [4]:
with open("places.pkl", "rb") as file:
    places = pkl.load(file)

In [17]:
places_unpacked = [item  for item in places.values()]
def unpack_place(place):
    return (place.id, place.name, place.full_name, place.country, place.country_code, place.place_type)

unpacked_places = [unpack_place(place) for place in places_unpacked]
place_df = pd.DataFrame(unpacked_places, columns = ("id", "name", "full_name", "country", "country_code", "type"))


In [43]:
# Load Users
len(place_df['country'].unique())

75

In [77]:
place_df

Unnamed: 0,id,name,full_name,country,country_code,type
0,67b98f17fdcf20be,Boston,"Boston, MA",United States,US,city
1,5c62ffb0f0f3479d,Phoenix,"Phoenix, AZ",United States,US,city
2,8a927a7056322151,Botswana,Botswana,Botswana,BW,country
3,06168d1feda43857,South East,"South East, England",United Kingdom,GB,admin
4,94d47cc557aa35f4,Nicholasville,"Nicholasville, KY",United States,US,city
...,...,...,...,...,...,...
895,b90f2a335f8565c0,Arden-Arcade,"Arden-Arcade, CA",United States,US,city
896,011a05612b6a331c,Pasig City,"Pasig City, National Capital Region",Republic of the Philippines,PH,city
897,71c5606d33bd0013,Hucknall,"Hucknall, England",United Kingdom,GB,city
898,28679b23ed15b380,Belfast,"Belfast, Northern Ireland",United Kingdom,GB,city


## Creating The Dataframe

In [38]:
with open("users.json", "r") as file:
    user_json = file.read()

In [39]:
test = '{"total": [' + user_json.replace("}{", "},{") + "]}"

In [40]:
test

'{"total": [{"data": [{"username": "iam_rousey", "description": "\\ud83c\\udf37I BELIEVE IN GOD\\u270a\\ud83d\\udcaa||\\ud83d\\udc90PSALM 51 \\ud83d\\ude4f\\ud83d\\udc90||UDSM Alumni||\\ud83c\\udf43Textile Technologist\\ud83c\\udf43||Lady of Ambitions\\ud83c\\udf43||Libraqueen\\u264e\\ufe0f\\ud83c\\udf3b\\ud83c\\udf3b#WANAZI_HQ", "id": "1293183874386731008", "name": "\\ud83e\\uddb9\\u200d\\u2640\\ufe0f\\ud835\\udc07\\ud835\\udc04\\ud835\\udc11\\ud835\\udc0e\\ud835\\udc08\\ud835\\udc0d\\ud835\\udc04\\ud835\\udc11\\ud835\\udc0e\\ud835\\udc12\\ud835\\udc04 \\ud83e\\udd8b", "location": "Arusha,Tanzania"}, {"username": "Pavanasoonu", "description": "Global SAP Tech.  #Jyotishkatti. Spiritual counselor. Associate Prof. @VayuUsa. Jyotish teacher. Followed by Hon\'ble PM Shri. @narendramodi ji. #AskPanditKatti", "id": "1051074201082388480", "name": "Pavanasoonu", "location": "Bengaluru, India"}, {"username": "JohnCam14394418", "description": "", "id": "1625506362515152898", "name": "John Campb

In [41]:
user_json

'{"data": [{"username": "iam_rousey", "description": "\\ud83c\\udf37I BELIEVE IN GOD\\u270a\\ud83d\\udcaa||\\ud83d\\udc90PSALM 51 \\ud83d\\ude4f\\ud83d\\udc90||UDSM Alumni||\\ud83c\\udf43Textile Technologist\\ud83c\\udf43||Lady of Ambitions\\ud83c\\udf43||Libraqueen\\u264e\\ufe0f\\ud83c\\udf3b\\ud83c\\udf3b#WANAZI_HQ", "id": "1293183874386731008", "name": "\\ud83e\\uddb9\\u200d\\u2640\\ufe0f\\ud835\\udc07\\ud835\\udc04\\ud835\\udc11\\ud835\\udc0e\\ud835\\udc08\\ud835\\udc0d\\ud835\\udc04\\ud835\\udc11\\ud835\\udc0e\\ud835\\udc12\\ud835\\udc04 \\ud83e\\udd8b", "location": "Arusha,Tanzania"}, {"username": "Pavanasoonu", "description": "Global SAP Tech.  #Jyotishkatti. Spiritual counselor. Associate Prof. @VayuUsa. Jyotish teacher. Followed by Hon\'ble PM Shri. @narendramodi ji. #AskPanditKatti", "id": "1051074201082388480", "name": "Pavanasoonu", "location": "Bengaluru, India"}, {"username": "JohnCam14394418", "description": "", "id": "1625506362515152898", "name": "John Campbell"}, {"us

In [42]:
user_data = json.loads(test)

In [43]:
users = [u['data'] for u in user_data["total"]]

In [44]:
flat_list = [user_id for user in users for user_id in user]

In [45]:
flat_list

[{'username': 'iam_rousey',
  'description': '🌷I BELIEVE IN GOD✊💪||💐PSALM 51 🙏💐||UDSM Alumni||🍃Textile Technologist🍃||Lady of Ambitions🍃||Libraqueen♎️🌻🌻#WANAZI_HQ',
  'id': '1293183874386731008',
  'name': '🦹\u200d♀️𝐇𝐄𝐑𝐎𝐈𝐍𝐄𝐑𝐎𝐒𝐄 🦋',
  'location': 'Arusha,Tanzania'},
 {'username': 'Pavanasoonu',
  'description': "Global SAP Tech.  #Jyotishkatti. Spiritual counselor. Associate Prof. @VayuUsa. Jyotish teacher. Followed by Hon'ble PM Shri. @narendramodi ji. #AskPanditKatti",
  'id': '1051074201082388480',
  'name': 'Pavanasoonu',
  'location': 'Bengaluru, India'},
 {'username': 'JohnCam14394418',
  'description': '',
  'id': '1625506362515152898',
  'name': 'John Campbell'},
 {'username': 'realsast',
  'description': '14, 🇸🇪, ASD, ADHD, He/Him, #gdtwt, #daputwt, Backup/Alt @SASTSimon',
  'id': '1300118462329769986',
  'name': 'Simon',
  'location': 'Helsingborg, Sverige'},
 {'username': 'AmaBoukman1804',
  'description': 'I am a moderate; the world is radically unjust.\n---When black lives 

In [46]:
len(flat_list)

1700

In [47]:
users_df = pd.DataFrame(flat_list)

In [48]:
users_df

Unnamed: 0,username,description,id,name,location,withheld
0,iam_rousey,🌷I BELIEVE IN GOD✊💪||💐PSALM 51 🙏💐||UDSM Alumni...,1293183874386731008,🦹‍♀️𝐇𝐄𝐑𝐎𝐈𝐍𝐄𝐑𝐎𝐒𝐄 🦋,"Arusha,Tanzania",
1,Pavanasoonu,Global SAP Tech. #Jyotishkatti. Spiritual cou...,1051074201082388480,Pavanasoonu,"Bengaluru, India",
2,JohnCam14394418,,1625506362515152898,John Campbell,,
3,realsast,"14, 🇸🇪, ASD, ADHD, He/Him, #gdtwt, #daputwt, B...",1300118462329769986,Simon,"Helsingborg, Sverige",
4,AmaBoukman1804,I am a moderate; the world is radically unjust...,2586324829,Ama Boukman,United States,
...,...,...,...,...,...,...
1695,KellyAnneOsbor1,I try to stay positive but have family behind ...,2624298429,Kelly Pinky Harrison💗,Nottingham,
1696,smusyoka,"Passionate about good ideas, sports, outdoors ...",28579644,Stephen Musyoka,Kenya,
1697,Ashleighisabel3,"NQN #QUBNurse | Interests in #CancerCare, #Pal...",1207822274654613506,AshleighIsabella,"Belfast, Northern Ireland",
1698,AndrewAstle5,,1502730053737713672,Andrew Astle,"Tamworth, England",


In [50]:
users_df = users_df.drop(['withheld'], axis = 1)
users_df

Unnamed: 0,username,description,id,name,location
0,iam_rousey,🌷I BELIEVE IN GOD✊💪||💐PSALM 51 🙏💐||UDSM Alumni...,1293183874386731008,🦹‍♀️𝐇𝐄𝐑𝐎𝐈𝐍𝐄𝐑𝐎𝐒𝐄 🦋,"Arusha,Tanzania"
1,Pavanasoonu,Global SAP Tech. #Jyotishkatti. Spiritual cou...,1051074201082388480,Pavanasoonu,"Bengaluru, India"
2,JohnCam14394418,,1625506362515152898,John Campbell,
3,realsast,"14, 🇸🇪, ASD, ADHD, He/Him, #gdtwt, #daputwt, B...",1300118462329769986,Simon,"Helsingborg, Sverige"
4,AmaBoukman1804,I am a moderate; the world is radically unjust...,2586324829,Ama Boukman,United States
...,...,...,...,...,...
1695,KellyAnneOsbor1,I try to stay positive but have family behind ...,2624298429,Kelly Pinky Harrison💗,Nottingham
1696,smusyoka,"Passionate about good ideas, sports, outdoors ...",28579644,Stephen Musyoka,Kenya
1697,Ashleighisabel3,"NQN #QUBNurse | Interests in #CancerCare, #Pal...",1207822274654613506,AshleighIsabella,"Belfast, Northern Ireland"
1698,AndrewAstle5,,1502730053737713672,Andrew Astle,"Tamworth, England"


In [51]:
users_df.shape

(1700, 5)

In [57]:
print(list(users_df['id']))

['1293183874386731008', '1051074201082388480', '1625506362515152898', '1300118462329769986', '2586324829', '1028124989566922752', '753776473056616448', '98787862', '592750578', '1603110062381322244', '44456655', '454000776', '28694544', '1610116290152386561', '1006917336', '4657324057', '886223978552975361', '1520280970812964866', '1591207900512403456', '165580863', '1405277396', '802954001755029506', '30156132', '2744340041', '2565776556', '263929545', '1515337761024270340', '16860525', '843492630', '1615342651368431617', '52474202', '1548247580781359105', '371231308', '333295520', '21544235', '281224181', '374231158', '1206274118', '42333137', '210729774', '1257559167352668160', '597396731', '829901064', '968959085184888832', '1426183757833416706', '15588971', '1146495374858883076', '1389783952295186435', '1450124342247972870', '22451063', '328095374', '1411689346990690306', '1534635017560801280', '1339263347062157316', '286377632', '578107566', '854360794506698752', '2998124313', '1

In [54]:
users_list = [users_df['username']]

In [58]:
with open("tweets-with-place.json", "r") as f:
    tweet_json = f.read()

In [59]:
tweet_json

'[\n  {\n    "user_id": "1293183874386731008",\n    "tweet_id": "1636425265797648385",\n    "tweet_text": "@Adventure_36 Ulikuwa hujui wakili wangu",\n    "place_id": "003f4a527524b7ee"\n  },\n  {\n    "user_id": "1051074201082388480",\n    "tweet_id": "1636441725240766464",\n    "tweet_text": "The unknown gives you unlimited freedom.  \\n\\n#AskPanditKatti",\n    "place_id": "5f55bb82cf16ac81"\n  },\n  {\n    "user_id": "1051074201082388480",\n    "tweet_id": "1636438839924498432",\n    "tweet_text": "@patsing10 Wonderful Sir.",\n    "place_id": "5f55bb82cf16ac81"\n  },\n  {\n    "user_id": "1051074201082388480",\n    "tweet_id": "1636436439327186944",\n    "tweet_text": "Ok.  Jaya Shree Rama https://t.co/sdW2J4OXvb",\n    "place_id": "5f55bb82cf16ac81"\n  },\n  {\n    "user_id": "1051074201082388480",\n    "tweet_id": "1636435567197171712",\n    "tweet_text": "@nallanhara Excellent. Go ahead please",\n    "place_id": "5f55bb82cf16ac81"\n  },\n  {\n    "user_id": "1625506362515152898"

In [61]:
tweets_data = json.loads(tweet_json)

In [62]:
tweets_data

[{'user_id': '1293183874386731008',
  'tweet_id': '1636425265797648385',
  'tweet_text': '@Adventure_36 Ulikuwa hujui wakili wangu',
  'place_id': '003f4a527524b7ee'},
 {'user_id': '1051074201082388480',
  'tweet_id': '1636441725240766464',
  'tweet_text': 'The unknown gives you unlimited freedom.  \n\n#AskPanditKatti',
  'place_id': '5f55bb82cf16ac81'},
 {'user_id': '1051074201082388480',
  'tweet_id': '1636438839924498432',
  'tweet_text': '@patsing10 Wonderful Sir.',
  'place_id': '5f55bb82cf16ac81'},
 {'user_id': '1051074201082388480',
  'tweet_id': '1636436439327186944',
  'tweet_text': 'Ok.  Jaya Shree Rama https://t.co/sdW2J4OXvb',
  'place_id': '5f55bb82cf16ac81'},
 {'user_id': '1051074201082388480',
  'tweet_id': '1636435567197171712',
  'tweet_text': '@nallanhara Excellent. Go ahead please',
  'place_id': '5f55bb82cf16ac81'},
 {'user_id': '1625506362515152898',
  'tweet_id': '1636419324675264514',
  'tweet_text': '@mariewalsh18 Hahaha 🤣😂',
  'place_id': '0079932b106eb4c9'},
 

In [63]:
tweets_df = pd.DataFrame(tweets_data)

In [65]:
tweets_df.head()

Unnamed: 0,user_id,tweet_id,tweet_text,place_id
0,1293183874386731008,1636425265797648385,@Adventure_36 Ulikuwa hujui wakili wangu,003f4a527524b7ee
1,1051074201082388480,1636441725240766464,The unknown gives you unlimited freedom. \n\n...,5f55bb82cf16ac81
2,1051074201082388480,1636438839924498432,@patsing10 Wonderful Sir.,5f55bb82cf16ac81
3,1051074201082388480,1636436439327186944,Ok. Jaya Shree Rama https://t.co/sdW2J4OXvb,5f55bb82cf16ac81
4,1051074201082388480,1636435567197171712,@nallanhara Excellent. Go ahead please,5f55bb82cf16ac81


In [66]:
tweets_df.head(20)

Unnamed: 0,user_id,tweet_id,tweet_text,place_id
0,1293183874386731008,1636425265797648385,@Adventure_36 Ulikuwa hujui wakili wangu,003f4a527524b7ee
1,1051074201082388480,1636441725240766464,The unknown gives you unlimited freedom. \n\n...,5f55bb82cf16ac81
2,1051074201082388480,1636438839924498432,@patsing10 Wonderful Sir.,5f55bb82cf16ac81
3,1051074201082388480,1636436439327186944,Ok. Jaya Shree Rama https://t.co/sdW2J4OXvb,5f55bb82cf16ac81
4,1051074201082388480,1636435567197171712,@nallanhara Excellent. Go ahead please,5f55bb82cf16ac81
5,1625506362515152898,1636419324675264514,@mariewalsh18 Hahaha 🤣😂,0079932b106eb4c9
6,1625506362515152898,1636413400808034304,@mariewalsh18 Same.. 3 days is minimal...,0079932b106eb4c9
7,1300118462329769986,1636465093947826176,@cywrId You’re not the only person in the worl...,2406f69310767a43
8,2586324829,1636463062587678727,Get ready for increases in child poverty and t...,7af0fb6f137530df
9,2586324829,1636461534225354752,I hate Republicans. 😒 https://t.co/lXKrejMODb,7af0fb6f137530df
