In [40]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import os
import shutil
import random
import platform

%matplotlib inline

In [4]:
summary_stats_df = pd.read_csv("../data/summary_stats.csv")

summary_stats_df["country"] = summary_stats_df["country"].apply(lambda x: "GB" if x == "UK" else x)
summary_stats_df.head()

Unnamed: 0,year,month,country,publisher,num_articles,total_words
0,2010,1,AU,3AW,1,1505
1,2010,1,AU,ABC Message Stick,2,7284
2,2010,1,AU,ABC Online,143,78050
3,2010,1,AU,ABC Regional Online,55,21602
4,2010,1,AU,Architecture and Design,14,4981


In [5]:
by_publisher = summary_stats_df.groupby(["country", "publisher"])
by_publisher = by_publisher[["num_articles", "total_words"]]
by_publisher = by_publisher.sum().reset_index().rename(columns={
    "num_articles": "publisher_tot_articles", 
    "total_words": "publisher_tot_words"
})

bins=[0, 1, 2, 3, 5, 10, 25, 100, 500, 1000, np.inf]
labels=["1", "2", "3", "4-5", "6-10", "11-25", "26-100", "101-500", "501-1000", "1001+"]

by_publisher["publisher_bin"] = pd.cut(by_publisher.publisher_tot_articles, bins=bins, labels=labels)
by_publisher.head()

Unnamed: 0,country,publisher,publisher_tot_articles,publisher_tot_words,publisher_bin
0,AU,'Bourne this way,48,14834,26-100
1,AU,10 News First,37,6240,26-100
2,AU,10 daily,221,103872,101-500
3,AU,123Jump.com,3,778,3
4,AU,2DayFM,15,3771,11-25


In [6]:
big_publishers = summary_stats_df.merge(
    by_publisher.query("publisher_bin == '1001+'"), 
    how="right", on=["country", "publisher"]
)

big_publishers.head()

Unnamed: 0,year,month,country,publisher,num_articles,total_words,publisher_tot_articles,publisher_tot_words,publisher_bin
0,2016,11,AU,9Honey,14,5941,3798,1839552,1001+
1,2016,12,AU,9Honey,52,18680,3798,1839552,1001+
2,2016,11,AU,9Honey,14,5941,3798,1839552,1001+
3,2016,12,AU,9Honey,56,20555,3798,1839552,1001+
4,2017,1,AU,9Honey,75,31747,3798,1839552,1001+


In [7]:
big_publishers

Unnamed: 0,year,month,country,publisher,num_articles,total_words,publisher_tot_articles,publisher_tot_words,publisher_bin
0,2016,11,AU,9Honey,14,5941,3798,1839552,1001+
1,2016,12,AU,9Honey,52,18680,3798,1839552,1001+
2,2016,11,AU,9Honey,14,5941,3798,1839552,1001+
3,2016,12,AU,9Honey,56,20555,3798,1839552,1001+
4,2017,1,AU,9Honey,75,31747,3798,1839552,1001+
...,...,...,...,...,...,...,...,...,...
130552,2020,5,ZA,zoutnet.co.za,156,77085,2216,1014646,1001+
130553,2020,6,ZA,zoutnet.co.za,177,87570,2216,1014646,1001+
130554,2020,7,ZA,zoutnet.co.za,195,93367,2216,1014646,1001+
130555,2020,8,ZA,zoutnet.co.za,135,66479,2216,1014646,1001+


In [8]:
CAP = 10000

def get_num_samples(x, num_samples):
    pct, max_ = x["pct"], x["total_articles"]
    n = int(np.around(num_samples * pct))
    if n > max_:
        n = max_
        
    return n

sampled_counts = []
for country, country_df in big_publishers.groupby("country"):
    
    # Get rid of month
    # For each contry group by years. Articles per year. 
    articles_by_year = country_df.groupby("year")["num_articles"].sum()
    
    # Median from the number of articles that are of the year 2015 and below. 
    num_to_sample = articles_by_year[articles_by_year.index < 2016].median()
    num_to_sample = np.min([CAP, num_to_sample])
    
    # Going through each year at a time per country. 
    for year, year_df in country_df.groupby("year"):
        publishers = year_df.groupby("publisher")["num_articles"].sum().reset_index()
        publishers["country"] = country
        publishers["year"] = year
        
        publishers["pct"] = publishers.num_articles / publishers.num_articles.sum()
        publishers["total_articles"] = publishers.num_articles
        
        publishers["sampled_articles"] = publishers.apply(
            get_num_samples, axis=1, num_samples=num_to_sample
        )
    
        sampled_counts.append(publishers[[
            "country", "year", "publisher", "total_articles", "sampled_articles"
         ]].copy())
        
big_publisher_sampling = pd.concat(sampled_counts)
big_publisher_sampling.to_csv("sampling_amounts.csv", index=False)
big_publisher_sampling.head()

Unnamed: 0,country,year,publisher,total_articles,sampled_articles
0,AU,2010,ABC Local,338,248
1,AU,2010,ABC Online,2743,2010
2,AU,2010,AdNews,7,5
3,AU,2010,Architecture and Design,147,108
4,AU,2010,Australian Personal Computer,263,193


In [9]:
print(big_publisher_sampling[["total_articles", "sampled_articles"]].sum().apply(lambda x: f"{x:,d}"))

print("\nBy Year:")
big_publisher_sampling.groupby("year")[["total_articles", "sampled_articles"]].sum()

total_articles      17,336,805
sampled_articles     1,464,959
dtype: object

By Year:


Unnamed: 0_level_0,total_articles,sampled_articles
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2010,258826,84139
2011,347212,109477
2012,448073,133394
2013,510685,141804
2014,576154,142360
2015,734857,142358
2016,3006000,142357
2017,3064616,142295
2018,2575893,142342
2019,2956593,142332


In [10]:
big_publisher_sampling

Unnamed: 0,country,year,publisher,total_articles,sampled_articles
0,AU,2010,ABC Local,338,248
1,AU,2010,ABC Online,2743,2010
2,AU,2010,AdNews,7,5
3,AU,2010,Architecture and Design,147,108
4,AU,2010,Australian Personal Computer,263,193
...,...,...,...,...,...
60,ZA,2020,timeslive.co.za,2724,211
61,ZA,2020,weetracker.com,662,51
62,ZA,2020,wheels24.co.za,2323,180
63,ZA,2020,za.ign.com,2319,179


## Sampling

In [19]:
base_path = '/Users/leo/Documents/GitHub/peace-speech-project2/'

In [53]:
# Things to change
# 1. base_path
# 2. base_dir:  'clean' -> '?' to a folder name which has all full cleaned data. (Folder split into Country, Year, Publisher)

os_name = platform.system()

base_dir = os.path.join(base_path, 'data', 'clean') 

counter =0

for index, row in big_publisher_sampling.iterrows():
    row.publisher = slugify(row.publisher)
    current_dir = os.path.join(base_dir,row.country,row.publisher,str(row.year))
    
    if os_name == 'Windows':
        print(current_dir)
        first_part, second_part = current_dir.split('clean\\')
        
    else:
        print(current_dir)
        first_part, second_part = current_dir.split('/clean/')
    
    copy_dir = os.path.join(first_part,'clean_subsample',second_part)
    
    if os.path.isdir(current_dir):
        articles = os.listdir(current_dir)
        sample_num = min(row.sampled_articles, len(articles))
        sampled_articles = random.choices(articles, k=sample_num)

        if not os.path.exists(copy_dir):
            os.makedirs(copy_dir)

        for article in sampled_articles:
            shutil.copyfile(os.path.join(current_dir, article), os.path.join(copy_dir, article))
    
    else:
        counter +=1
        print("Not a directory:" + current_dir)

print("Counter: " + counter)

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/abc-local/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/abc-online/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/adnews/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/architecture-and-design/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/australian-personal-computer/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/ballarat-courier/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/bendigo-advertiser/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/brisbane-times/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/crn-australia/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/caradvice/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/computerworld-australia/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/bundaberg-news-mail/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/crn-australia/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/capsule-computers/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/caradvice/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/clarence-valley-daily-examiner/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/coffs-coast-advocate/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/computerworld-australia/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/courier-mail/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/crikey/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/e-online/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/eurosport-com-au/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/cl

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/my-sailing/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/news-com-au/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/nrl-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/newcastle-herald/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/newsmaker/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/noise11/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/northern-star/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/on-line-opinion/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/pedestrian-tv/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/proactive-investors-australia/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/property-observer/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/radio-australia/201

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-australian-financial-review/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-border-mail/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-cairns-post/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-canberra-times/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-conversation/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-conversation-au/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-conversation-uk/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-conversation-us/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-interpreter/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-new-daily/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-queensland-times/2014
/Users/leo/Documents/GitHub/pe

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-australian-financial-review/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-border-mail/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-cairns-post/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-canberra-times/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-conversation-au/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-conversation-uk/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-conversation-us/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-interpreter/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-new-daily/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-queensland-times/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-roar/2015
/Users/leo/Documents/GitHub/peace-spee

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/sydney-morning-herald/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/techly/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/ten-eyewitness-news/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-age/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-australian-financial-review/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-border-mail/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-cairns-post/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-canberra-times/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-conversation-au/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-conversation-uk/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/the-conversation-us/2016
/Users/leo/Documents/GitHub/peace-speech-

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/popculture-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/proactive-investors-australia/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/property-observer/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/radio-australia/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/reneweconomy/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/riverine-herald/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/rockhampton-morning-bulletin/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/rugby-com-au/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/sbs/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/sbs-the-world-game/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/shepparton-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/da

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/mondaq-news-alerts/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/money-management/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/motley-fool-australia/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/mumbrella/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/music-feeds/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/my-sailing/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/news-com-au/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/nrl-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/nt-news/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/newcastle-herald/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/noise11/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/northern-star/2018
/Users/leo/Doc

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/gladstone-observer/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/goal-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/gold-coast-bulletin/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/gympie-times/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/herald-sun/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/illawarra-mercury/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/indaily/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/international-business-times-au/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/investordaily/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/kotaku-australia/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/lawyers-weekly/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/l

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/honey-nine-com-au/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/illawarramercury-com-au/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/indaily-com-au/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/independentaustralia-net/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/kalkinemedia-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/kotaku-com-au/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/livewiremarkets-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/lowyinstitute-org/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/mamamia-com-au/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/miragenews-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/mumbrella-com-au/2019
/Users/leo/Documents/GitHub/peace-speech-pr

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/kotaku-com-au/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/livewiremarkets-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/lowyinstitute-org/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/mamamia-com-au/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/miragenews-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/mumbrella-com-au/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/mysailing-com-au/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/news-com-au/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/nrl-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/pedestrian-tv/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/perthnow-com-au/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/AU/pursuit-uni

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/canadianbusiness-com/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/cape-breton-post/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/castanet-net/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/chatham-daily-news/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/digitaljournal-com/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/edmonton-journal/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/estevan-mercury/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/exclaim/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/financial-post/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/fort-mcmurray-today/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/globe-and-mail/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/canada-com/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/canadian-lawyer-magazine/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/canadianbusiness-com/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/cape-breton-post/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/castanet-net/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/chatham-daily-news/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/delta-optimist/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/digitaljournal-com/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/edmonton-journal/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/edmonton-sun/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/energeticcity-ca/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/ubyssey-online/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/vancouver-courier/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/vancouver-sun/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/vernon-morning-star/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/victoria-times-colonist/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/waterloo-record/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/welland-tribune/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/western-hockey-league/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/western-producer/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/western-star/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/windsor-star/2011
/Users/leo/Documents/GitHub/peace-speech-project2/dat

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/stanley-cup-of-chowder/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/stockhouse/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/straight-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/tsn/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/techvibes/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-barrie-examiner/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-beacon-herald/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-catholic-register/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-coast-halifax/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-globe-and-mail/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-guardian-charlottetown/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/c

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/mining-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/macleans-ca/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/marketwired/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/mcgill-reporter/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/metronews-canada/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/mississauga/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/mondaq-news-alerts/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/montreal-gazette/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/ng-news/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/nhl-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/national-post/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/net-newsledger/2013
/Users/leo/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/blackburnnews-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/brandon-sun/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/brantford-expositor/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/burnaby-now/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/cbc-ca/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/cp24-toronto-s-breaking-news/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/ctv-news/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/calgary-herald/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/calgary-sun/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/canada-news/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/canada-newswire/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/canada-com/2014

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/techvibes/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-barrie-examiner/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-beacon-herald/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-catholic-register/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-coast-halifax/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-daily-courier/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-globe-and-mail/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-guardian/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-guardian-charlottetown/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-hockey-writers/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/the-link/2014
/Users/leo/Documents/GitHub/peace-speech-project

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/good-e-reader/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/government-of-ontario-news/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/guelph-mercury/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/hamilton-spectator/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/hockey-s-future/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/horrormovies-ca/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/huffington-post-canada/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/itbusiness-ca/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/infotel-news-ltd/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/insidehalton-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/journal-pioneer/2015
/Users/leo/Documents/GitHub/peace-speech-projec

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/toronto-sun/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/torontoist/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/truro-daily-news/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/ubyssey-online/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/vancouver-courier/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/vancouver-sun/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/varsity/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/vernon-morning-star/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/voiceonline-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/waterloo-record/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/welland-tribune/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/western-hockey

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/mcgill-reporter/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/medicine-hat-news/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/metronews-canada/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/mississauga/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/mondaq-news-alerts/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/montreal-gazette/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/ng-news/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/nhl-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/nanaimo-news-now-press-release/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/national-observer/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/national-post/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/varsity/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/vernon-morning-star/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/voiceonline-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/waterloo-record/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/welland-tribune/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/western-hockey-league/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/western-producer/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/western-star/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/windows-central/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/windsor-star/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/winnipeg-free-press/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/net-newsledger/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/news-talk-650-ckom/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/news-talk-980-cjme/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/news1130/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/niagara-falls-review/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/niagarathisweek-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/north-shore-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/northumberland-today/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/nunatsiaq-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/oshawa-express/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/ottawa-business-journal/2017
/Users/leo/Documents/GitHub/peace-speech-proje

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/winnipeg-free-press/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/winnipeg-sun/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/woodstock-sentinel-review/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/yahoo-canada-sports/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/yahoo-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/yahoo-sports/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/yorkregion-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/durhamregion-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/inews880-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/ipolitics-ca/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/insidetoronto-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/C

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/ottawa-sun/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/owen-sound-sun-times/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/pembinavalleyonline-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/pembroke-daily-observer/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/peterborough-examiner/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/philippine-canadian-inquirer/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/powell-river-peak/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/prince-albert-daily-herald/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/prince-george-citizen/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/proactive-investors-usa-canada/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/queen-s-journal/2

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/cfjc-today-kamloops/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/cp24-toronto-s-breaking-news/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/ctv-news/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/canindia-news/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/canada-newswir/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/canada-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/canadian-lawyer-magazine/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/cape-breton-post/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/carelton-free-press/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/castanet-net/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/chatham-daily-news/2019
/Users/leo/Documents/GitHub/peace-speech-project2

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/times-colonist/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/timmins-press/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/timminstoday/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/tolerance-ca/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/toronto-life/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/toronto-star/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/truro-daily-news/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/ucalgary-news/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/ubyssey-online/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/vice/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/vancouver-courier/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/varsity/2019
/Users/leo/Doc

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/timminstoday-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/toronto-citynews-ca/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/toronto-ctvnews-ca/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/trurodaily-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/tsn-ca/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/tvo-org/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/vancourier-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/vice-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/whl-ca/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/winnipegfreepress-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/570news-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/680-news/2020
/Users/leo/D

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/theguardian-pe-ca/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/theloop-ca/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/thestar-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/thesudburystar-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/thetelegram-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/thetyee-ca/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/thevarsity-ca/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/thewhig-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/thompsoncitizen-net/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/timescolonist-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/timminstoday-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/CA/toronto-c

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/trustedreviews/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/unreality-tv/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/vitalfootball/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/walesonline/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/wired-co-uk/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/worcester-news/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/world-nuclear-news/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/yorkshire-evening-post/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/yorkshire-post/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/crash-net/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/dailymail-co-uk/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/dailyrecord-c

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-argus/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-bolton-news/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-drinks-business/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-ecologist/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-independent/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-market-oracle/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-northern-echo/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-press-york/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-quietus/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-sport-review/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-star/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-times/20

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/proactive-investors-uk/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/radio-times/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/register/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/reuters-uk/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/risk-net/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/royal-central/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/scotsman/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/scottish-daily-record/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/screen-international/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/sky-news/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/skysports/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/south-wales-argus/2012
/User

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/itproportal/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/itv-news/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/independent/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/inquirer/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/international-business-times-uk/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/international-law-office/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/ipswich-star/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/isle-of-wight-county-press/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/jewish-chronicle/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/kent-online/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/lancashire-evening-post/2013
/Users/leo/Documents/GitHub/peace-speech-project2

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/citywire-co-uk/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/clash-magazine/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/computerweekly-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/cycling-weekly/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/daily-echo/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/daily-mail/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/daily-post-north-wales/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/daily-star/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/den-of-geek/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/derby-telegraph/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/develop/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/dorset-echo/2014
/Use

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/time-out-london/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/tribal-football/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/trustedreviews/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/unreality-tv/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/walesonline/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/wired-co-uk/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/world-nuclear-news/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/yorkshire-evening-post/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/yorkshire-post/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/crash-net/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/expressandstar-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/helloma

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/oxford-mail/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/pcgamesn/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/plymouth-herald/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/pocket-lint-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/press-release-rocket/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/proactive-investors-uk/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/radio-times/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/reuters-uk/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/risk-net/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/royal-central/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/scotsman/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/scottish-daily-record/201

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/east-anglian-daily-times/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/echo/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/eurogamer-net/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/eurosport-co-uk/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/evening-standard/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/evening-telegraph/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/exchange-news-direct/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/exeter-express-and-echo/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/express-co-uk/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/financial-times/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/finextra/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/G

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-guardian-blog/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-independent/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-market-oracle/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-national/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-northern-echo/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-northern-echo-registration/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-press-york/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-quietus/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-register/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-scottish-sun/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-sport-review/2016
/Users/leo/Documents/GitHub/peace-speech-project2/dat

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/hereford-times/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/huddersfield-examiner/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/huffpost-uk/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/huffington-post/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/huffington-post-uk/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/hull-daily-mail/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/itproportal/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/itv-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/inquirer/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/international-business-times-uk/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/ipswich-star/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/is

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/worcester-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/world-nuclear-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/yahoo-finance-uk/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/yahoo-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/yahoo-news-uk/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/yorkshire-evening-post/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/yorkshire-post/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/crash-net/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/expressandstar-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/hellomagazine-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/inews/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/road-cc/20

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/metro-newspaper-uk/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/mirror-co-uk/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/nhs-choices/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/nme-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/nw-evening-mail/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/news-star/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/norfolk-eastern-daily-press/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/nottingham-post/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/ok-magazine/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/oxford-mail/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/pc-gamer/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/pcgamesn/2018
/Users/l

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/bristol-live/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/business-insider-uk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/cnet/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/catholic-herald-online/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/channel-4-news/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/chroniclelive/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/city-a-m/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/clash-magazine/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/clicklancashire/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/cycling-weekly/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/daily-echo/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/daily-mail/2019
/User

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-quietus/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-register/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-scotsman/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-scottish-sun/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-stage/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-star/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-sun/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-sunday-post/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-times/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-week-uk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-westmorland-gazette/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/the-zimbabwe-daily/2019
/Users/le

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/scotsman-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/shropshirestar-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/southwalesargus-co-uk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/spectator-co-uk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/standard-co-uk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/stokesentinel-co-uk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/sundaypost-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/swindonadvertiser-co-uk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/techradar-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/telegraph-co-uk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/tes-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clea

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/gq-magazine-co-uk/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/grimsbytelegraph-co-uk/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/hellomagazine-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/heraldscotland-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/huffingtonpost-co-uk/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/hulldailymail-co-uk/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/ibtimes-co-uk/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/independent-co-uk/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/inews-co-uk/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/irishnews-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GB/itv-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/c

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/citifmonline/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/ghana-broadcasting-corporation/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/ghanaweb/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/ghanasoccernet-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/goal-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/myjoyonline-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/peace-fm-online/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/pulse-com-gh/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/allghananews-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/biztech-africa/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/businessghana/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/GH/citif

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/HK/hong-kong-standard/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/HK/wall-street-journal/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/HK/asia-asset-management/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/HK/ej-insight/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/HK/enterprise-innovation/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/HK/financeasia/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/HK/hong-kong-standard/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/HK/wall-street-journal/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/HK/acn-newswire-press-release/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/HK/asia-asset-management/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/HK/ej-insight/2016
/Users/leo/Documents/GitHub/peace-sp

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/herald-ie/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/highland-radio/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irish-central/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irish-echo/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irish-examiner/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irish-health/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irish-independent/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irish-medical-times/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irish-times/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irishcentral/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/joe/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/kilkenny-people/2011
/Users/leo/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irish-times/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irishcentral/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/joe/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/kildare-nationalist/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/kilkenny-people/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/laois-nationalist/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/leinster-express/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/leinster-leader/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/leitrim-observer/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/limerick-leader/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/limerick-post/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/longford-leade

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/adworld/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/afloat/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/agriland/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/an-phoblacht/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/anglo-celt/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/balls-ie/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/breakingnews-ie/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/clare-champion/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/connaught-telegraph/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/derry-journal/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/digital-spy/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/donegal-democrat/2015
/Users/leo/Documents/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/digital-spy/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/donegal-democrat/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/donegal-now/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/dublin-live/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/dublin-people/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/dundalk-democrat/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/esm-the-european-supermarket-magazine/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/entertainment-ie/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/evening-echo-cork/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/extratime-ie/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/fft-ie/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/connaught-telegraph/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/dailyedge-ie/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/derry-journal/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/digital-spy/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/digitalspy-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/donegal-democrat/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/dublin-live/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/dublin-people/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/dundalk-democrat/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/esm-the-european-supermarket-magazine/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/entertainment-ie/2017
/Users/leo/Documents/GitHub/peace-speech-project2/da

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/an-phoblacht/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/anglo-celt/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/attain-news/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/balls-ie/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/breakingnews-ie/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/business-world-press-release-blog/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/buzz-ie/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/camden-new-journal-newspapers-website/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/clare-champion/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/connacht-tribune-group/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/connaught-telegraph/2018
/Users/leo/Documents/GitHub/peace-speech-p

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/westmeath-independent/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/wicklow-people/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/wicklownews-net/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/antiquestradegazette-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/autosport-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/bne-intellinews/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/frieze-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/hotpress-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/http-theliberal-ie-press-release-blog/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/image-ie/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/thejournal-ie/2018
/Users/leo/Documents/GitHub/peace-speech-project

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/thecork-ie/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/tipperarylive-ie/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/todayfm/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/trade-arabia/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/waterford-live/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/westmeath-examiner/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/westmeath-independent/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/wicklow-people/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/wicklownews-net/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/advertiser-ie/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/afloat-ie/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/agriland-ie/2019


/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/virginmediatelevision-ie/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/waterfordlive-ie/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/westmeathexaminer-ie/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/whufc-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/fiji-times/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/flightglobal/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/independent-ie/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irish-examiner/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/irish-farmers-journal/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/laois-nationalist/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/longford-leader/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/cle

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/trtworld-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/ttgmedia-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/universitytimes-ie/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/virginmediatelevision-ie/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/waterfordlive-ie/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/westmeathexaminer-ie/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IE/whufc-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/assam-tribune/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bollywood-mantra/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/business-standard/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/calcutta-telegraph/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/reuters-india/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/sify/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/siliconindia-com/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/super-good-movies/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-hindu/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/times-of-india/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/topnews/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/twocircles-net/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/vc-circle/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/wall-street-journal/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/washington-bangla-radio/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/zigwheels-com/2011
/U

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/daily-pioneer/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/deccan-herald/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/directionsmag-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/e-pao-net/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/espncricinfo-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/economic-times/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/equity-bulls/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/fibre2fashion-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/financial-express/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/firstpost/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/forbes-india/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/free-press-journa

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/ibnlive/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/india-prwire/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/india-today/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/india-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/indiaglitz/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/indian-autos-blog/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/indian-cars-bikes/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/indian-express/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/indiatimes-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/international-business-times-india-edition/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/investing-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/indiaglitz/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/indiainfoline/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/indian-autos-blog/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/indian-cars-bikes/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/indiatimes-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/international-business-times-india-edition/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/investing-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/jagran-josh/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/jagran-post/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/kashmir-images/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/kashmir-life/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/c

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/etauto-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/etbrandequity-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/etenergyworld-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/ettelecom-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/ettech-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/eastern-mirror/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/economic-times/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/equity-bulls/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/fibre2fashion-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/financial-express/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/firstpost/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/forbes-india/201

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/ahmedabad-mirror/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/all-india-radio/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/asian-correspondent/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/assam-tribune/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/avenue-mail/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bgr-india/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bw-businessworld/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bangalore-mirror/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bloombergquint/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/boldsky/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bollywood-life/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/business-insider

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-better-india/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-echo-of-india/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-hans-india/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-hindu/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-hitavada/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-indian-express/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-navhind-times/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-new-indian-express/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-northlines/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-quint/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/the-sangai-express/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/medianama-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/mensxp-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/merinews/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/mid-day/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/millennium-post/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/moneycontrol-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/morung-express/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/mumbai-mirror/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/ndtv/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/ndtvsports-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/nyoooz/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/nagaland-post/2018
/Users/leo/Documents/GitHub

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/digit/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/doordarshan/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/espn/2019
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/espn/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/etauto-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/etbrandequity-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/etenergyworld-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/ettelecom-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/ettech-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/economic-times/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/fibre2fashion-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/financial-express/2019
/User

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/analyticsindiamag-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/aninews-in/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/asianage-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bangaloremirror-indiatimes-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bgr-in/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bloombergquint-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/business-standard-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/businessinsider-in/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/businesstoday-in/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/businesswireindia-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/cartoq-com/2019
/Users/leo/Documents/GitHub/peace-speech-projec

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/ahmedabadmirror-indiatimes-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/analyticsindiamag-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/aninews-in/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/asianage-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bangaloremirror-indiatimes-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bgr-in/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/bloombergquint-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/business-standard-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/businessinsider-in/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/businesstoday-in/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/IN/businesswireindia-com/2020
/Users/leo/Documents/GitHub

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/JM/jamaica-observer/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/JM/government-of-jamaica-jamaica-information-service/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/JM/jamaica-gleaner/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/JM/jamaica-observer/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/JM/jamaica-star-online/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/JM/government-of-jamaica-jamaica-information-service/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/JM/jamaica-gleaner/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/JM/jamaica-observer/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/JM/jamaica-star-online/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/JM/jamaica-gleaner/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/JM/jamaica

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/daily-nation/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/freshplaza/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/ghafla/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/ghafla-kenya/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/goal-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/hivisasa-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/k24-tv/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/kenya-broadcasting-corporation/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/nairobi-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/nairobi-news-satire-press-release-blog/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/sde-entertainment-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/K

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/the-standard/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/businessdailyafrica-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/capitalfm-co-ke/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/citizentv-co-ke/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/futaa-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/goal-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/hivisasa-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/kbc-co-ke/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/kenyanews-go-ke/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/kenyans-co-ke/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/lankaweb-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/KE/mobile-nation-co-ke/202

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/nation-lk-the-nation-newspaper/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/asian-tribune/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/ceylon-daily-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/colombo-gazette/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/colombo-page/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/daily-mirror/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/economynext/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/hiru-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/lanka-business-online/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/lankaweb/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/newsfirst/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/LK/slbc/2017

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/goal-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/malay-mail-online/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/sin-chew-jit-poh/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/the-malay-mail-online/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/the-malaysian-insider/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/the-rakyat-post/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/the-star-online/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/the-sun-daily/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/malaysiandigest-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/asean-automotive-news/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/aquila-style/2014
/Users/leo/Documents/GitHub/peace-speech-project2/da

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/the-rakyat-post/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/the-star-online/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/the-sun-daily/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/fourthofficial-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/malaysiandigest-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/astro-awani/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/badzine/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/business-insider/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/daily-express/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/edgeprop-my/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/fox-sports-malaysia/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/fourfourt

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/nst-com-my/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/paultan-org/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/says-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/soyacincau-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/theborneopost-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/theedgemarkets-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/themalaysianinsight-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/themalaysianreserve-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/thestar-com-my/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/thesundaily-my/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/MY/worldofbuzz-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/nigerian-entertainment-today/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/osun-defender/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/p-m-news/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/pain-in-the-arsenal/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/pulse-nigeria/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/saharareporters-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/thisday-live/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/the-eagle-online/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/the-nation-newspaper/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/the-punch/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/the-streetjournal/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/c

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/news24-nigeria/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/nigeria-master-web/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/nigeria-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/nigeria-today/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/nigerian-entertainment-today/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/osun-defender-blog/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/p-m-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/pain-in-the-arsenal/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/politics-nigeria/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/proshare-nigeria-limited-press-release-registration/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/pulse-nigeria/2017
/Users/leo/Documents

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/tvc-news/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/techpoint-ng/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/the-eagle-online/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/the-nation-newspaper/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/the-news/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/the-punch/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/the-streetjournal/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/the-tide/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/the-whistler-ng/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/thecable/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/thenewsguru/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/ventures-africa/2018
/Users/leo/Do

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/osundefender-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/pmnewsnigeria-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/premiumtimesng-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/proshareng-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/pulse-ng/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/punchng-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/ripplesnigeria-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/saharareporters-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/signalng-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/sports247-ng/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/sundiatapost-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NG/sunne

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/voxy/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/waikato-times/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/sharechat/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/3news-nz/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/auckland-stuff-co-nz/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/computerworld-new-zealand/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/gameplanet/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/horsetalk/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/independent-community-newspaper/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/indian-newslink/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/indian-weekender/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/intere

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/timaru-herald/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/topnews-new-zealand/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/voxy/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/waikato-times/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/weatherwatch-co-nz/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/sharechat/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/sportal-co-nz/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/3news-nz/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/auckland-stuff-co-nz/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/cio-new-zealand/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/computerworld-new-zealand/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/gameplane

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/independent-community-newspaper/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/indian-newslink/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/indian-weekender/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/interest-co-nz/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/manawatu-standard/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/marlborough-express/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/nzcity/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/nzgamer-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/nelson-mail/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/new-zealand-doctor-online/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/new-zealand-herald/2016
/Users/leo/Documents/GitHub/peace-speech-projec

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/wairarapa-times-age/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/weatherwatch-co-nz/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/yahoo-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/sharechat/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/sportal-co-nz/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/waateanews-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/asia-pacific-report/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/auckland-stuff-co-nz/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/cio-new-zealand/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/computerworld-new-zealand/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/gameplanet/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/cle

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/southland-times/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/stuff-co-nz/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/sunlive/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/tvnz/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/taranaki-daily-news/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/the-dominion-post/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/the-national-business-review/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/the-nelson-mail/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/the-press/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/the-press-west-coast/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/the-southland-times/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/NZ/th

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/minda-news/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/philboxing-com/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/philippine-star/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/tempo/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/visayan-daily-star/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/zamboanga-today-online/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/bulatlat/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/businessworld-online-edition/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/davao-today/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/inquirer-net/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/minda-news/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/philboxing-co

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/minda-news/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/mobile-id-world/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/northbound-philippines-news-online/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/officially-kmusic/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/philboxing-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/philippine-information-agency/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/philippine-star/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/rappler/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/sports-interactive-network-philippines/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/study-international-news/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/sun-star/2016
/Users/leo/Document

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/pna-gov-ph/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/abs-cbn-news/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/autoindustriya-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/business-mirror/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/businessworld-online/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/cnn-philippines/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/cosmopolitan-philippines/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/cricket-addictor/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/davao-today/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/espn/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/fox-sports-asia/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PH/fox-sp

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/the-nation-pakistan/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/the-news-international/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/business-recorder/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/dawn-com/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/news-tribe/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/paktribune-com/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/pakistan-christian-post/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/pakistan-observer/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/pakistantoday-com-pk/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/the-express-tribune/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/the-nation/2011
/Users/leo/Documents/GitHub/peace-speech-project2/d

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/pakistan-observer/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/pakistan-today/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/pakistan-today-blog/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/pakistanherald/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/the-express-tribune/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/the-express-tribune-blog/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/the-nation/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/the-news-international/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/the-news-on-sunday/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/24-news-hd/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/ary-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/en-dailypakistan-com-pk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/enews-hamariweb-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/mangobaaz-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/nation-com-pk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/oyeyeah-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/pakistantoday-com-pk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/phoneworld-com-pk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/profit-pakistantoday-com-pk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/radio-gov-pk/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/researchsnipers-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/PK/samaa-tv/2019
/Users/leo/Documents/GitHub/peace-speech-project

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/the-business-times/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/todayonline/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/techgoondu/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/televisionpost/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/the-new-paper/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/the-online-citizen/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/the-straits-times/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/the-tokyo-reporter/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/traveldailynews-asia-pacific/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/vr-zone/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/e27/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/eco-busin

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/asian-scientist-magazine/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/billionaire-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/biotechin-asia/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/business-insider/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/cfo-innovation-asia/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/campaign-asia-pacific/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/catch-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/channel-newsasia/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/connected-to-india/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/dealstreetasia/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/datacenternews-asia/2017
/Users/leo/Documents/GitHub/peace-speech-pro

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/yahoo-singapore-news/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/yourstory-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/asianews-network/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/e27/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/eco-business-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/finews-asia/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/www-micetimes-asi/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/www-micetimes-asia-press-release/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/asiaone/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/business-insider/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/cna/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/campaign-asi

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/humanresourcesonline-net/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/ibtimes-sg/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/ig-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/jantakareporter-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/marketing-interactive-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/nationnews-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/prestigeonline-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/sbr-com-sg/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/sea-ign-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/sg-asiatatler-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/SG/sg-news-yahoo-com/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clea

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/columbus-dispatch/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/computerworld/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/consequence-of-sound/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/counterpunch/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/daily-beast/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/daily-caller/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/daily-herald/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/daily-press/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/dallas-morning-news/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/deadline-com/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/deadspin/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/democracy-now/2010


/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/perezhilton-com/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/plain-dealer/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/playbill-com/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/politifact/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/politico/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/popmatters/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/popular-science/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/press-herald/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/project-syndicate/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/psychcentral-com/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/radiofreeeurope-radioliberty/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/raw-story/2

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/barron-s/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/billboard/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/billings-gazette/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/bleacher-report/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/bloomberg/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/boing-boing/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/boston-com/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/brookings-institution/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/business-insider/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/business-wire/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cbs-local/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cbs-new-york/2011
/Users/leo/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/masslive-com/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mediaite/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/medical-news-today/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/medical-xpress/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/milwaukee-journal-sentinel/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/minneapolis-star-tribune/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mother-jones/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mother-nature-network/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mynorthwest-com/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/nbc-bay-area/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/nbc-chicago/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/cl

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/upi-com/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/us-department-of-state/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/usa-today/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/usa-today/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/vanity-fair/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/variety/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ventura-county-star/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/venturebeat/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/voice-of-america/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wbtv/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wbur/2011
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wctv/2011
/Users/leo/Documents/GitHub/peace-spe

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/e-online/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/espn/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/engadget/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/entertainment-weekly/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/entrepreneur/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/foxsports-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fairbanks-daily-news-miner/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fast-company/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/flavorwire/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/florida-today/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/forbes/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/foreign-policy/2012
/Users/leo/Doc

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/pc-magazine/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/pr-newswire/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/pri/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/palm-beach-post/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/patch-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/pennlive-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/people-magazine/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/perezhilton-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/phys-org/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/plain-dealer/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/playbill-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/politifact/2012
/Users/leo/Documents/GitHub

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/zdnet/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/al-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fox13now-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fox4kc-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mxdwn-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/nwitimes-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/film/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/89-3-kpcc/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/9news-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/a-v-club/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/abc-news/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/adage-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clea

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/honolulu-star-advertiser/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/hot-air/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/hotnewhiphop/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/houston-chronicle/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/huffington-post/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ign/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/imperial-valley-press/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/inc-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/indianapolis-star/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/infoworld/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/insurance-journal/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/international-bus

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/sb-nation/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/sfgate/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/silive-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/stltoday-com/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/sacramento-bee/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/salon/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/san-francisco-chronicle/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/scientific-american/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/search-engine-land/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/seeking-alpha/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/shacknews/2013
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/slashgear/2013
/Users/leo/Do

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/al-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/adage-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/adweek/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/albuquerque-journal/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/aljazeera-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/allentown-morning-call/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/android-authority/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/arizona-daily-star/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ars-technica/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/asheville-citizen-times/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/atlanta-journal-constitution/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/cl

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/gizmodo/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/gothamist/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/greenwich-time/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/hartford-courant/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/heavy-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/hollywood-life/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/hollywood-reporter/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/honolulu-star-advertiser/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/hoodline/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/hot-air/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/hotnewhiphop/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/houston-chronicle/2014
/User

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/perezhilton-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/philly-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/phys-org/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/plain-dealer/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/playbill-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/politifact/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/politico/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/polygon/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/popmatters/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/popular-science/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/press-herald/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/press-enterprise/2014
/Users/leo/Documents/GitH

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/washington-examiner/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/washington-post/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/washington-times/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/we-got-this-covered/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/webmd/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/winston-salem-journal/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wired/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wired-news/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/women-s-wear-daily/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/yahoo-finance/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/yahoo-news/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/yahoo-sports/20

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/digital-trends/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/discovery-news/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/duluth-news-tribune/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/e-online/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/espn/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/engadget/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/entertainment-tonight/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/entertainment-weekly/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/entrepreneur/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fox-61/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fox31-denver/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fox40/2015
/Users/leo/Docume

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/nbcsports-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ndtv/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/necn/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/nfl-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/nj-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/nola-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/npr/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/naples-daily-news/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/nasdaq/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/national-catholic-reporter/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/national-geographic/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/national-institutes-of-health/2015
/Users/leo/Documen

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/the-root/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/the-san-diego-union-tribune/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/the-seattle-times/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/the-southern/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/the-telegraph/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/the-verge/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/the-week-magazine/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/theblaze-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/thestreet-com/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/thewrap/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/thinkprogress/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/thomson-reuters-fo

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/buffalo-news/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/business-insider/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/business-wire/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/business-wire-press-release/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/bustle/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cbs-local/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cbs-news/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cbssports-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cnbc/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cnet/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cnn/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cnn-international/2016
/Users/leo/Documents/GitHub/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/jezebel/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/journal-record/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/just-jared/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/kabc-tv/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/kbtx/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/kcci-des-moines/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ketv-omaha/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/kgo-tv/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/kiro-seattle/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/komo-news/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ksat-san-antonio/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ksl-com/2016
/Users/leo/Documents/GitHub/peace-speech-pr

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/polygon/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/popmatters/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/popular-science/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/press-herald/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/press-enterprise/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/project-syndicate/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/psychcentral-com/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/q13-fox/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/quartz/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/rt/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/radiofreeeurope-radioliberty/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/raw-story/2016
/Users/leo/Docu

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/weau/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wfmj/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wfmz-allentown/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wftv-orlando/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wgem/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wgn-tv/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/whsv/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wifr/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wilx-tv/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wired/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/witn/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wkyt/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wls-tv/2016
/Users/leo/D

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/christian-science-monitor/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cinema-blend/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cleveland-daily-banner/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/clutchpoints/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/co-exist/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/coindesk/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cointelegraph/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/collider-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/colorado-springs-gazette/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/columbus-dispatch/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/comicbook-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/larry-brown-sports/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/las-vegas-review-journal/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/las-vegas-sun/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/lexington-herald-leader/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/lifehacker/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/lincoln-journal-star/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/los-angeles-times/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mlive-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mspoweruser/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mtv-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/madison-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mar

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/salon/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/san-antonio-express-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/san-francisco-chronicle/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/saturday-down-south/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/science-daily/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/scientific-american/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/search-engine-land/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/seattle-times/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/seeking-alpha/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/shacknews/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/slashgear/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/s

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/witn/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wkyt/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wls-tv/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wral-com/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wsb-atlanta/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wsoc-charlotte/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wtop/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/wall-street-journal/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/washington-examiner/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/washington-post/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/washington-times/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/watertown-public-opinion/2017
/Users/leo/Docum

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/deadline/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/deadspin/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/deseret-news/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/detroit-free-press/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/deutsche-welle/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/digital-trends/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/dispatch-tribunal/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/dot-esports/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/duluth-news-tribune/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/e-online/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/espn/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/espn-blog/2018
/Users/leo/Documen

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mspoweruser/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mtv-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/madison-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/marketwatch/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mashable/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/masslive-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mediaite/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/medical-news-today/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/medical-xpress/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/metro-us/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/miami-herald/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/midland-reporter-telegram/2018
/Users/leo/Docu

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/slate-magazine-blog/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/sports-illustrated/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/stereogum/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/sun-sentinel/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/syracuse-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/time/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/tmz/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/tmz-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/tampa-bay-times/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/tampabay-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/taste-of-country/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/techcrunch/2018
/Users/leo/Documents/Gi

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/cleveland-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/euronews/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fox13now-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fox2now-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fox4kc-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fox6now-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/fox8-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/kfor-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mxdwn-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/myfox8-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/nwitimes-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/seattlepi-com/2018
/Users/leo/Documents/GitHub/peace-s

Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ksl-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ktar-news/2019
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ktar-news/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ktla/2019
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ktla/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ktrk-tv/2019
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ktrk-tv/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ktuu-com/2019
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/ktuu-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/kvoa-4/2019
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/kvoa-4/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clea

Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/arkansasonline-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/arstechnica-com/2019
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/arstechnica-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/baltimoresun-com/2019
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/baltimoresun-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/bostonglobe-com/2019
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/bostonglobe-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/bostonherald-com/2019
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/bostonherald-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/buffalonews-com/2019
Not a directory:/Users/leo/Documents/GitHub/peace-speech-projec

Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/law360/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/lifehacker/2020
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/lifehacker/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/lincoln-journal-star/2020
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/lincoln-journal-star/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/los-angeles-daily-news/2020
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/los-angeles-daily-news/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/los-angeles-times/2020
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/los-angeles-times/2020
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/US/mlb/2020
Not a directory:/Users/leo/Documents/GitHub/peace-speech-project2/data/cl

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/el33tonline/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/esi-africa/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/fair-lady/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/fin24/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/goal-com/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/grocott-s-mail-online/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/health24-com/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/it-news-africa/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/independent-online/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/kickoff/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/lazygamer/2010
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mail-guardian-online/2010
/Users/leo/Docum

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/south-african-broadcasting-corporation/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/sowetan/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/supersport/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/supersport-com/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/techcentral/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/the-media-online/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/times-live/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/university-world-news/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/ventureburn/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/zoutnet/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/eprop-co-za/2012
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/gearburn/2

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/lazygamer/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mail-guardian-online/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/marklives-com/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/media-update/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/memeburn/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mining-mx/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mining-review/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mining-technology/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/moneyweb-co-za/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mybroadband/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/news24/2014
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/politicsweb/2014
/Users/leo/Docume

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/roodepoort-record/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/sa-commercial-prop-news/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/sandton-chronicle/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/soccer-laduma/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/south-africa-info/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/south-african-broadcasting-corporation/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/south-coast-herald/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/southern-courier/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/southern-times-africa/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/southlands-sun/2015
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/sowetan-live/2015
/Users/leo/Documents/Git

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/reuters-africa/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/rising-sun-chatsworth/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/rising-sun-overport/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/roodepoort-northsider/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/roodepoort-record/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/sa-commercial-prop-news/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/sandton-chronicle/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/soccer-laduma/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/south-africa-info/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/south-african-broadcasting-corporation/2016
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/south-coast-herald/2016
/Users/leo/D

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/north-coast-courier/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/northglen-news/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/political-analysis-south-africa/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/politicsweb/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/premier-soccer-league/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/pretoria-east-rekord/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/rnews/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/randburg-sun/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/randfontein-herald/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/reuters-africa/2017
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/rising-sun-chatsworth/2017
/Users/leo/Documents/GitHub/peace-speech-pro

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/marklives-com/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/media-update/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/memeburn/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mining-mx/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mining-review/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mining-technology/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/moneyweb-co-za/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mpumalanga-news/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mybroadband/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mygaming/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/news24/2018
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/north-coast-courier/2018
/Users/leo/Docu

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/highway-mail/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/ign-africa/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/imiesa/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/it-news-africa/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/it-online/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/itweb/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/independent-online/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/kickoff-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/krugersdorp-news/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/lowvelder/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/mail-and-guardian/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/malawi24/2019
/Users/leo/Documents/GitH

/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/southcoastherald-co-za/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/sport24-co-za/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/talkofthetown-co-za/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/techcentral-co-za/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/themediaonline-co-za/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/thesouthafrican-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/timeslive-co-za/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/weetracker-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/wheels24-co-za/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/za-ign-com/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data/clean/ZA/zoutnet-co-za/2019
/Users/leo/Documents/GitHub/peace-speech-project2/data

TypeError: can only concatenate str (not "int") to str

### Number of paths where the directory wasn't found.

In [54]:
counter

1350

#### Two reasons for this. 
#### 1. The folder wasn't created beacuse none of the files in the folder were selected when subsampled. <br> (i.e. our 'clean' folder is output of running Clean Text Files- SAMPLE.ipynb, with MAX_SIZE = 50). <br> For example, none of the articles from year 2010 of the publisher skysports-com were selected thus the folder wasn't created. This won't be a problem on your side since you are using all the data. 
#### 2. Most of the US publishers don't have a 2019/2020 year folder. I remember you saying that there was a problem in how some of the US raw files were formatted. I'm not sure if you solved this problem but I think this is the reason why there are so many US 2019/2020 directories not found. 