# Disinformation on tiwtter

#### This chunk of code just imports some programs:

In [1]:
import pandas as pd
import os

#### This lets the program know which is the current folder and which is the folder with the data in it

In [2]:
PROJ_ROOT_DIR = os.getcwd()

DATA_PATH = os.path.join(PROJ_ROOT_DIR, "data")
if not os.path.isdir(DATA_PATH):  
    os.makedirs(DATA_PATH)

#### This loads each file in the data folder and add it to the same file

In [4]:
def load_twitter_data():
    """
    A function to load scraped news data from data folder
    """
    # List of files
    files = [f for f in os.listdir(DATA_PATH) if f.endswith(".csv")]
    
    # List of data frames
    file_list = []
    
    # Append each data frame in files to the file_list
    for filename in files:
        df = pd.read_csv(os.path.join(DATA_PATH, filename), low_memory=False)
        file_list.append(df)
        
    # Concatenate all the news data frames
    df_full = pd.concat(file_list, join='outer', sort = True).drop_duplicates().reset_index().drop(columns='index')
    
    return df_full

tweets = load_twitter_data()

#### This selects only the columns we are interested in

In [5]:
tweets_clean = tweets[['user_screen_name',  'user_display_name', 'user_reported_location', 'account_language', 'tweet_language', 'tweet_text', 'tweet_time', 'urls', 'hashtags', 'is_retweet']]

# Top 10 rows of data
tweets_clean.head(10)

Unnamed: 0,user_screen_name,user_display_name,user_reported_location,account_language,tweet_language,tweet_text,tweet_time,urls,hashtags,is_retweet
0,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,en,one person followed me // automatically checke...,2017-01-11 05:23,['http://fllwrs.com'],[],False
1,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,fa,#IDFTerrorists\nحماسه تروریستهای اسرائیلی http...,2018-05-26 00:48,[],['IDFTerrorists'],False
2,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,en,Stop war on Yemen hospitals\n#ShameOnUN\n#Yemen,2018-06-16 20:06,[],"['ShameOnUN', 'Yemen']",False
3,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,fa,لبیک یا فقیه\n#مجزرة_الدراز https://t.co/nKfQW...,2018-05-23 18:22,[],['مجزرة_الدراز'],False
4,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,fa,اینجا تل ابیب است\nاینها اسراییلیهایی هستند که...,2019-01-28 16:56,[],['زندگی_سگی_اسرائیلیها'],False
5,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,ar,وامروز هم ....\n#زندگی_سگی_اسرائیلیها https://...,2018-09-07 10:42,[],['زندگی_سگی_اسرائیلیها'],False
6,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,fa,حکمت ثابت موندن اسم ماههای قمری بعد از تغییر ز...,2017-11-19 19:40,[],[],False
7,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,fa,جمله ای که سیدحسن امشب گفت در مورد انقلاب اسلا...,2019-02-06 18:26,[],['إن_مع_الصبر_نصرا'],False
8,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,fa,بشنوید مدح حاج محمودآقوی کریمی رو با لهجه شیرا...,2017-08-04 12:25,['https://twitter.com/khanisadiq/status/893438...,[],False
9,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,ar,RT @awadazeinab1: كم ساعة مع ابني بالمستشفى شف...,2019-01-25 17:48,[],[],True


#### This filters tweets and keep those that are
 1. located in Venezuela
 2. account language sp
 3. tweet language sp

In [6]:
tweets_clean = tweets_clean[(tweets_clean.user_reported_location == 'Venezuela') | (tweets_clean.account_language == 'es') | (tweets_clean.tweet_language == 'es')].reset_index().drop(columns= ['index'])

#### This takes out tweets that are set in European and US location 

In [7]:
tweets_clean = tweets_clean[(tweets_clean.user_reported_location != 'London') & (tweets_clean.user_reported_location != 'Manhattan, NY') & (tweets_clean.user_reported_location != 'Brooklyn, NY') & (tweets_clean.user_reported_location != 'Queens, NY') & (tweets_clean.user_reported_location != 'New York, NY') & (tweets_clean.user_reported_location != 'California, USA') & (tweets_clean.user_reported_location != 'New Jersey, USA') &  (tweets_clean.user_reported_location != 'North Holland, The Netherlands') & (tweets_clean.user_reported_location != 'Atlantic City, NJ') & (tweets_clean.user_reported_location != 'Mountain View, CA') & (tweets_clean.user_reported_location != 'New York, USA') & (tweets_clean.user_reported_location != 'Canada') & (tweets_clean.user_reported_location != 'San Francisco, CA') & (tweets_clean.user_reported_location != 'Washington, USA') & (tweets_clean.user_reported_location != 'Washington, DC') & (tweets_clean.user_reported_location != 'España') & (tweets_clean.user_reported_location != 'Germany') & (tweets_clean.user_reported_location != 'Nantes, France') & (tweets_clean.user_reported_location != 'Houston, TX') & (tweets_clean.user_reported_location != 'Texas,San Antonio') & (tweets_clean.user_reported_location != 'Chicago') & (tweets_clean.user_reported_location != 'Atlanta') & (tweets_clean.user_reported_location != 'Washington,Seattle') & (tweets_clean.user_reported_location != 'Fremont, CA') & (tweets_clean.user_reported_location != 'France') & (tweets_clean.user_reported_location != 'England, United Kingdom')  & (tweets_clean.user_reported_location != 'Oregon,Portland')  & (tweets_clean.user_reported_location !='USA')  & (tweets_clean.user_reported_location != 'Florida,Orlando') & (tweets_clean.user_reported_location != 'Califor') & (tweets_clean.user_reported_location !='California,Los Angeles') & (tweets_clean.user_reported_location !='Illinois, USA') & (tweets_clean.user_reported_location !='Arizona,phoenix') & (tweets_clean.user_reported_location !='Pennsylvania,Pittsburgh') & (tweets_clean.user_reported_location !='Pennsylvania,Philadelphia') & (tweets_clean.user_reported_location !='Dallas, TX') ]

# Top 10 rows of data
tweets_clean.head(10)

Unnamed: 0,user_screen_name,user_display_name,user_reported_location,account_language,tweet_language,tweet_text,tweet_time,urls,hashtags,is_retweet
0,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,es,No parallel in history....\n#Hussain https://t...,2018-09-19 20:04,[],['Hussain'],False
1,akhonfellah,⁦🇮🇷⁩أخٌ‌في‌الله,Iran,en,es,HABIL CAFE😂 https://t.co/5ipZYvYA8X,2017-10-04 03:28,['https://twitter.com/KnowKaduna/status/915295...,[],False
2,Romeo1997er,Romeo,,fa,es,RT @countdown2040: Gazans prepare for Hajj\nht...,2017-08-29 03:17,['http://www.countdown2040.com/ShowGallery/69/'],[],True
3,Kasia36790875,Kasia,,fa,es,RT @countdown2040: Abbas sends medical aid to ...,2017-11-18 16:36,['http://www.countdown2040.com/ShowNews/1014/'],[],True
4,Hmn90432381,H.m.n,,fa,es,RT @countdown2040: Gazans prepare for Hajj\nht...,2017-09-24 14:28,['http://www.countdown2040.com/ShowGallery/69/'],[],True
5,koInQlW0rKxQPoTuf5BmVjKyTvYJR5JdKeo8spDdrwM=,koInQlW0rKxQPoTuf5BmVjKyTvYJR5JdKeo8spDdrwM=,,fa,es,RT @countdown2040: Abbas sends medical aid to ...,2017-10-30 16:52,['http://www.countdown2040.com/ShowNews/1014/'],[],True
6,24evm+SfMta5ONKMRjQe1Qj39PdyLGPqMMXl8XYDbg=,24evm+SfMta5ONKMRjQe1Qj39PdyLGPqMMXl8XYDbg=,,en,es,RT @countdown2040: Video: Lana Del Rey Ignores...,2018-08-28 08:33,['http://www.countdown2040.com/ShowMovieList/9...,"['GroupPalestine', 'قروب_فلسطيني']",True
7,GP3PzukVFPWVoLtVWTEyy20m2lRZaMaRmN7n0lz7Bg=,GP3PzukVFPWVoLtVWTEyy20m2lRZaMaRmN7n0lz7Bg=,Earth,fa,es,RT @countdown2040: Gazans prepare for Hajj\nht...,2017-09-28 17:53,['http://www.countdown2040.com/ShowGallery/69/'],[],True
8,Richard80907,Richard,,fa,es,RT @countdown2040: Gazans prepare for Hajj\nht...,2017-11-05 15:47,['http://www.countdown2040.com/ShowGallery/69/'],[],True
9,UW2EZRTAv0C7rCy2LOI2SBiYh8IrdwmQAdI7p7yqok=,UW2EZRTAv0C7rCy2LOI2SBiYh8IrdwmQAdI7p7yqok=,,en,es,"Abbas sends medical aid to Venezuela, sparking...",2017-08-23 08:02,['http://www.UW2EZRTAv0C7rCy2LOI2SBiYh8IrdwmQA...,[],False
