# Data FakeNewsNet

The twitter tweets id provided in FakeNewsNet dataset, this notebook going to fetch data from twitter by the id.

In [6]:
import os
from dotenv import load_dotenv

load_dotenv()

BEARER_TOKEN = os.getenv("BEARER_TOKEN")


## Load Dataset

In [16]:
import pandas as pd

In [104]:
dataset_path = 'FakeNewsNet/'
dataset_file = os.listdir(dataset_path)
print(dataset_file)

['politifact_fake.csv', 'gossipcop_fake.csv', 'gossipcop_real.csv', 'politifact_real.csv']


In [105]:
# politifact_real.csv
path = 'FakeNewsNet/politifact_fake.csv'
dataset_raw_df = pd.read_csv(path)

In [123]:
# remove nan value fo tweet_ids
dataset_df = dataset_raw_df
dataset_df = dataset_df[dataset_df['tweet_ids'].notna()]

# https://stackoverflow.com/a/57122617
dataset_df = dataset_df.assign(tweet_ids=dataset_df['tweet_ids'].str.split('\t')).explode('tweet_ids').reset_index()

dataset_df

Unnamed: 0,index,id,news_url,title,tweet_ids
0,0,politifact15014,speedtalk.com/forum/viewtopic.php?t=51650,BREAKING: First NFL Team Declares Bankruptcy O...,937349434668498944
1,0,politifact15014,speedtalk.com/forum/viewtopic.php?t=51650,BREAKING: First NFL Team Declares Bankruptcy O...,937379378006282240
2,0,politifact15014,speedtalk.com/forum/viewtopic.php?t=51650,BREAKING: First NFL Team Declares Bankruptcy O...,937380068590055425
3,0,politifact15014,speedtalk.com/forum/viewtopic.php?t=51650,BREAKING: First NFL Team Declares Bankruptcy O...,937384406511005696
4,0,politifact15014,speedtalk.com/forum/viewtopic.php?t=51650,BREAKING: First NFL Team Declares Bankruptcy O...,937387493451862016
...,...,...,...,...,...
165347,431,politifact15030,https://theglobalheadlines.net/breaking-intel-...,Account Suspended,1042857699682209794
165348,431,politifact15030,https://theglobalheadlines.net/breaking-intel-...,Account Suspended,1048920936467107841
165349,431,politifact15030,https://theglobalheadlines.net/breaking-intel-...,Account Suspended,1049026150230646785
165350,431,politifact15030,https://theglobalheadlines.net/breaking-intel-...,Account Suspended,1049809900581388288


## Twitter Client

In [107]:
import tweepy

client = tweepy.Client(bearer_token=BEARER_TOKEN)

In [110]:
# Fetching options
data_dict = {
    'expansions': ['author_id', 'in_reply_to_user_id', "referenced_tweets.id", 'geo.place_id', 'entities.mentions.username', 'referenced_tweets.id.author_id'],
    'tweet_fields': ['id', 'text', 'author_id', 'context_annotations', 'conversation_id', 'created_at', 'entities', 'in_reply_to_user_id', 'lang', 'possibly_sensitive', 'public_metrics', 'referenced_tweets', 'reply_settings', 'source', 'withheld'],
    'place_fields': ['contained_within', 'country', 'country_code', 'full_name', 'geo', 'id', 'name', 'place_type'],
    'user_fields': ['created_at', 'description', 'entities', 'id', 'location', 'name', 'pinned_tweet_id', 'profile_image_url', 'protected', 'public_metrics', 'url', 'username', 'verified', 'withheld'],
}
data_dict = dict((key, ','.join(value)) for key, value in data_dict.items())
data_dict

{'expansions': 'author_id,in_reply_to_user_id,referenced_tweets.id,geo.place_id,entities.mentions.username,referenced_tweets.id.author_id',
 'tweet_fields': 'id,text,author_id,context_annotations,conversation_id,created_at,entities,in_reply_to_user_id,lang,possibly_sensitive,public_metrics,referenced_tweets,reply_settings,source,withheld',
 'place_fields': 'contained_within,country,country_code,full_name,geo,id,name,place_type',
 'user_fields': 'created_at,description,entities,id,location,name,pinned_tweet_id,profile_image_url,protected,public_metrics,url,username,verified,withheld'}

In [152]:
tweet_ids = dataset_df.loc[:99,'tweet_ids'].tolist()
print(tweet_ids)

['937349434668498944', '937379378006282240', '937380068590055425', '937384406511005696', '937387493451862016', '937400766024896512', '937406789686980608', '937411332240011266', '937415066810503168', '937427631661768704', '937429898670600192', '937436145004302337', '937438119468699648', '937449906352152576', '937450317142286336', '937451599320027136', '937452013939494912', '937452151227510784', '937453119478423553', '937462176293437443', '937468710952013824', '937471159246950401', '937473844813508609', '937474875861749760', '937481780088598528', '937485222257586177', '937491440082522117', '937491843851231232', '937493781271732229', '937494245883068421', '937506043604750338', '937506080694947840', '937508442352373760', '937510057322995712', '937520724453089280', '937520726369816576', '937521928738185216', '937522668303147010', '937522695339638785', '937524138121478144', '937526251027402753', '937541294951411712', '937542730162540544', '937543928844374016', '937567120220618752', '93761600

In [178]:
tweets = client.get_tweets(ids=tweet_ids, expansions=data_dict['expansions'], tweet_fields=data_dict['tweet_fields'], user_fields=data_dict['user_fields'])

In [240]:
user_dict = list(map(lambda x: x.data, tweets.includes['users']))
user_dict[0]

{'id': '4219197432',
 'verified': False,
 'created_at': '2015-11-13T03:31:28.000Z',
 'name': 'Ofelia Duchess Arizmendez',
 'profile_image_url': 'https://pbs.twimg.com/profile_images/1381669669942095879/GlKRK5k5_normal.jpg',
 'description': 'Ofelia. Arizmendez @ deplorable me. I am a proud conservative Republican and I am a Trump supporter Trump 2020 Trump 2024, the unvaccinated are not dying!!!!',
 'public_metrics': {'followers_count': 1718,
  'following_count': 2775,
  'tweet_count': 64990,
  'listed_count': 18},
 'protected': False,
 'location': 'Sugar Land, TX',
 'username': 'OfeliasHeaven'}

In [255]:
users_df = pd.DataFrame(user_dict)
users_df = users_df.join(users_df['public_metrics'].apply(pd.Series))
users_df['public_metrics'] = users_df['public_metrics'].astype(str)
users_df['entities'] = users_df['entities'].astype(str)
users_df.head(5)

Unnamed: 0,id,verified,created_at,name,profile_image_url,description,public_metrics,protected,location,username,pinned_tweet_id,url,entities,followers_count,following_count,tweet_count,listed_count
0,4219197432,False,2015-11-13T03:31:28.000Z,Ofelia Duchess Arizmendez,https://pbs.twimg.com/profile_images/138166966...,Ofelia. Arizmendez @ deplorable me. I am a pro...,"{'followers_count': 1718, 'following_count': 2...",False,"Sugar Land, TX",OfeliasHeaven,,,,1718,2775,64990,18
1,3018973429,False,2015-02-13T23:49:44.000Z,Lorn Cramer,https://pbs.twimg.com/profile_images/566385164...,,"{'followers_count': 14, 'following_count': 102...",False,,lorn_cramer,,,,14,102,15428,0
2,115392787,False,2010-02-18T14:26:30.000Z,Information War👁,https://pbs.twimg.com/profile_images/111636433...,"Protect our Constitution,Keeping our Liberty, ...","{'followers_count': 883, 'following_count': 15...",False,top secret,GovDeception,1.1156989778846104e+18,,,883,1554,156680,52
3,817930108732342274,False,2017-01-08T03:05:09.000Z,❌William Nerbonne 🇺🇸🇮🇱,https://pbs.twimg.com/profile_images/919686834...,"John 3:3 Christian, Patriot, Providence RI, US...","{'followers_count': 13086, 'following_count': ...",False,"Rhode Island, USA",WilliamNerbonne,,,,13086,13466,6785,4
4,23162382,False,2009-03-07T04:09:55.000Z,🌹Star Chaser 🌼🇺🇸,https://pbs.twimg.com/profile_images/154808150...,Happily married conservative Pentecostal woman...,"{'followers_count': 1223, 'following_count': 1...",False,Ozarks. Missouri,starchaser57,1.3523483322035855e+18,https://t.co/mTQvTdy3Xr,"{'url': {'urls': [{'start': 0, 'end': 23, 'url...",1223,1311,76092,28


In [242]:
# tweet = tweets.data[0].data
tweets_dict = list(map(lambda x: x.data, tweets.data))
tweets_dict[0]

{'possibly_sensitive': False,
 'reply_settings': 'everyone',
 'edit_history_tweet_ids': ['937349434668498944'],
 'created_at': '2017-12-03T15:54:54.000Z',
 'conversation_id': '937349434668498944',
 'id': '937349434668498944',
 'author_id': '4219197432',
 'text': 'BREAKING: First NFL Team Declares Bankruptcy Over Kneeling Thugs: https://t.co/dR2JLKKn7c',
 'public_metrics': {'retweet_count': 0,
  'reply_count': 0,
  'like_count': 1,
  'quote_count': 0},
 'entities': {'annotations': [{'start': 16,
    'end': 18,
    'probability': 0.8183,
    'type': 'Organization',
    'normalized_text': 'NFL'}],
  'urls': [{'start': 66,
    'end': 89,
    'url': 'https://t.co/dR2JLKKn7c',
    'expanded_url': 'http://potatriotpost.com/breaking-first-nfl-team-declares-bankruptcy-over-kneeling-thugs/',
    'display_url': 'potatriotpost.com/breaking-first…'}]},
 'lang': 'en',
 'context_annotations': [{'domain': {'id': '3',
    'name': 'TV Shows',
    'description': 'Television shows from around the world'},

In [246]:
tweets_df = pd.DataFrame(tweets_dict)
tweets_df = tweets_df.join(tweets_df['public_metrics'].apply(pd.Series))
tweets_df['public_metrics'] = tweets_df['public_metrics'].astype(str)
tweets_df['entities'] = tweets_df['entities'].astype(str)
tweets_df['context_annotations'] = tweets_df['context_annotations'].astype(str)
tweets_df.head(5)

Unnamed: 0,possibly_sensitive,reply_settings,edit_history_tweet_ids,created_at,conversation_id,id,author_id,text,public_metrics,entities,lang,context_annotations,in_reply_to_user_id,retweet_count,reply_count,like_count,quote_count
0,False,everyone,[937349434668498944],2017-12-03T15:54:54.000Z,937349434668498944,937349434668498944,4219197432,BREAKING: First NFL Team Declares Bankruptcy O...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","{'annotations': [{'start': 16, 'end': 18, 'pro...",en,"[{'domain': {'id': '3', 'name': 'TV Shows', 'd...",,0,0,1,0
1,False,everyone,[937379378006282240],2017-12-03T17:53:54.000Z,937379378006282240,937379378006282240,3018973429,BREAKING: First NFL Team Declares Bankruptcy O...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","{'annotations': [{'start': 16, 'end': 18, 'pro...",en,"[{'domain': {'id': '3', 'name': 'TV Shows', 'd...",,0,0,0,0
2,False,everyone,[937380068590055425],2017-12-03T17:56:38.000Z,937380068590055425,937380068590055425,3018973429,BREAKING: First NFL Team Declares Bankruptcy O...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","{'annotations': [{'start': 16, 'end': 18, 'pro...",en,"[{'domain': {'id': '3', 'name': 'TV Shows', 'd...",,0,0,0,0
3,False,everyone,[937384406511005696],2017-12-03T18:13:52.000Z,937384406511005696,937384406511005696,115392787,BREAKING: First NFL Team Declares Bankruptcy O...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","{'annotations': [{'start': 16, 'end': 18, 'pro...",en,"[{'domain': {'id': '3', 'name': 'TV Shows', 'd...",,0,0,0,0
4,False,everyone,[937415066810503168],2017-12-03T20:15:42.000Z,937415066810503168,937415066810503168,817930108732342274,AND SO IT BEGINS - BREAKING: First NFL Team ...,"{'retweet_count': 1, 'reply_count': 1, 'like_c...","{'annotations': [{'start': 37, 'end': 39, 'pro...",en,"[{'domain': {'id': '3', 'name': 'TV Shows', 'd...",,1,1,0,0


In [247]:
tweets_with_author_df = tweets_df.merge(users_df, how='left', left_on='author_id', right_on='id', suffixes=['_tweet', '_author'])
tweets_with_author_df.head(5)

Unnamed: 0,possibly_sensitive,reply_settings,edit_history_tweet_ids,created_at_tweet,conversation_id,id_tweet,author_id,text,public_metrics_tweet,entities_tweet,...,name,profile_image_url,description,public_metrics_author,protected,location,username,pinned_tweet_id,url,entities_author
0,False,everyone,[937349434668498944],2017-12-03T15:54:54.000Z,937349434668498944,937349434668498944,4219197432,BREAKING: First NFL Team Declares Bankruptcy O...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","{'annotations': [{'start': 16, 'end': 18, 'pro...",...,Ofelia Duchess Arizmendez,https://pbs.twimg.com/profile_images/138166966...,Ofelia. Arizmendez @ deplorable me. I am a pro...,"{'followers_count': 1718, 'following_count': 2...",False,"Sugar Land, TX",OfeliasHeaven,,,
1,False,everyone,[937379378006282240],2017-12-03T17:53:54.000Z,937379378006282240,937379378006282240,3018973429,BREAKING: First NFL Team Declares Bankruptcy O...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","{'annotations': [{'start': 16, 'end': 18, 'pro...",...,Lorn Cramer,https://pbs.twimg.com/profile_images/566385164...,,"{'followers_count': 14, 'following_count': 102...",False,,lorn_cramer,,,
2,False,everyone,[937380068590055425],2017-12-03T17:56:38.000Z,937380068590055425,937380068590055425,3018973429,BREAKING: First NFL Team Declares Bankruptcy O...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","{'annotations': [{'start': 16, 'end': 18, 'pro...",...,Lorn Cramer,https://pbs.twimg.com/profile_images/566385164...,,"{'followers_count': 14, 'following_count': 102...",False,,lorn_cramer,,,
3,False,everyone,[937384406511005696],2017-12-03T18:13:52.000Z,937384406511005696,937384406511005696,115392787,BREAKING: First NFL Team Declares Bankruptcy O...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","{'annotations': [{'start': 16, 'end': 18, 'pro...",...,Information War👁,https://pbs.twimg.com/profile_images/111636433...,"Protect our Constitution,Keeping our Liberty, ...","{'followers_count': 883, 'following_count': 15...",False,top secret,GovDeception,1.1156989778846104e+18,,
4,False,everyone,[937415066810503168],2017-12-03T20:15:42.000Z,937415066810503168,937415066810503168,817930108732342274,AND SO IT BEGINS - BREAKING: First NFL Team ...,"{'retweet_count': 1, 'reply_count': 1, 'like_c...","{'annotations': [{'start': 37, 'end': 39, 'pro...",...,❌William Nerbonne 🇺🇸🇮🇱,https://pbs.twimg.com/profile_images/919686834...,"John 3:3 Christian, Patriot, Providence RI, US...","{'followers_count': 13086, 'following_count': ...",False,"Rhode Island, USA",WilliamNerbonne,,,


In [239]:
tweets_with_author_df.columns

Index(['possibly_sensitive', 'reply_settings', 'edit_history_tweet_ids',
       'created_at_tweet', 'conversation_id', 'id_tweet', 'author_id', 'text',
       'public_metrics_tweet', 'entities_tweet', 'lang', 'context_annotations',
       'in_reply_to_user_id', 'retweet_count', 'reply_count', 'like_count',
       'quote_count', 'id_author', 'verified', 'created_at_author', 'name',
       'profile_image_url', 'description', 'public_metrics_author',
       'protected', 'location', 'username', 'pinned_tweet_id', 'url',
       'entities_author'],
      dtype='object')