# TF-IDF

The goal of this analysis is to find how humanitarian is used in combination with other important words in the corpus. The results from this exercise will be represented on a map that joins countries using arrow lines. This visualisation answers the data holder's question regarding the direction of humanitarian aid.

In [8]:
# Import the necessary libraries
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd # For dataframe analysis
import numpy as np
import re # Regix to remove punctuation from strings I split
import requests # To get Lat Long via API Request
import urllib.parse

In [10]:
# Declare variables
yearMonths = ['201912', '202001', '202002', '202003', '202004', '202005', '202006', '202007', '202008']
yearMonthsWord = ['Dec 2019', 'Jan 2020', 'Feb 2020', 'Mar 2020', 'Apr 2020', 'May 2020', 'Jun 2020', 'Jul 2020', 'Aug 2020']
countryGroup = ['Gulf Countries', 'Euro-Atlantic Countries', 'New Global Media Players']

In [41]:
# Get stopwords and initiate vectorizer
tfidf = TfidfVectorizer(stop_words='english')

## Functions

Declaring important functions at the top so I can call them later.

In [121]:
def get_top_tf_idf_words(response, top_n=3):
    sorted_nzs = np.argsort(response.data)[:-(top_n+1):-1]
    return feature_names[response.indices[sorted_nzs]]

def cleanTextInDf(mystring):
    mystring = mystring.lower() # Text normalization: make string lowercase
    mystring = re.sub(r'[^\w\s]','', mystring) # Text normalization: remove punctuation
    return mystring

def checkYearMonth(row):
    value = row['date']
    return str(value)[0:6]

# def customTfidf(term, corpus):
#     corpusLen = len(corpus)
#     trainingLen = corpusLen * 0.2 # Train it on 20% of the text items
#     trainingLen = int(trainingLen)
#     trainArr = corpus[:trainingLen] # First 20% of corpus
#     restArr = corpus[-(len(corpus)-trainingLen-2):] # Last 80% of corpus
#     X = tfidf.fit_transform(trainArr)
#     feature_names = np.array(tfidf.get_feature_names())
#     responses = tfidf.transform(restArr)
#     resultsArr = [get_top_tf_idf_words(response,2) for response in responses]
#     return [l.tolist() for l in resultsArr]

In [27]:
# Get CSV data and combine it into a dataframe

ae_df = pd.read_csv('Clean Data/AE_cleandf.csv')
cn_df = pd.read_csv('Clean Data/CN_cleandf.csv')
de_df = pd.read_csv('Clean Data/DE_cleandf.csv')
fr_df = pd.read_csv('Clean Data/FR_cleandf.csv')
ir_df = pd.read_csv('Clean Data/IR_cleandf.csv')
kw_df = pd.read_csv('Clean Data/KW_cleandf.csv')
qa_df = pd.read_csv('Clean Data/QA_cleandf.csv')
ru_df = pd.read_csv('Clean Data/RU_cleandf.csv')
sa_df = pd.read_csv('Clean Data/SA_cleandf.csv')
tr_df = pd.read_csv('Clean Data/TR_cleandf.csv')
uk_df = pd.read_csv('Clean Data/UK_cleandf.csv')
us_df = pd.read_csv('Clean Data/US_cleandf.csv')
df = pd.concat([ae_df, cn_df, de_df, fr_df, ir_df, kw_df, qa_df, ru_df, sa_df, tr_df, uk_df, us_df], ignore_index=True)

In [44]:
# Clean the dataframe
cleanText = lambda text: cleanTextInDf(text) # Lambda function applies to all cells in a column
cleandf = pd.DataFrame(df.text.apply(cleanText)) # .apply() the function to all cells
df['text'] = cleandf['text']
df['yearmonth'] = df.apply(checkYearMonth, axis=1)
df

Unnamed: 0,name,path,country,network,date,token_freq,text,yearmonth
0,20200619_AE_EmiratesNewsAgency_NEXIS212.txt,Raw text/AEClean/20200619_AE_EmiratesNewsAgenc...,AE,EmiratesNewsAgency,20200619,4,melbourne 18th june 2020 wam the uae general c...,202006
1,20200422_AE_AlArabiya_FACTIVA2484.txt,Raw text/AEClean/20200422_AE_AlArabiya_FACTIVA...,AE,AlArabiya,20200422,13,020 al arabiya all rights reserved provided by...,202004
2,20200328_AE_TheNational_GDELT136558.txt,Raw text/AEClean/20200328_AE_TheNational_GDELT...,AE,TheNational,20200328,10,uae offers to help syria counter coronavirus t...,202003
3,20200616_AE_KhaleejTimes_NEXIS59.txt,Raw text/AEClean/20200616_AE_KhaleejTimes_NEXI...,AE,KhaleejTimes,20200616,3,loréal middle east has launched a uae solidari...,202006
4,20200318_AE_TheNational_NEXIS18486.txt,Raw text/AEClean/20200318_AE_TheNational_NEXIS...,AE,TheNational,20200318,16,medical staff push a patient on a gurney to a ...,202003
...,...,...,...,...,...,...,...,...
13426,20200402_US_AssociatedPress_SERP11978.txt,Raw text/USClean/20200402_US_AssociatedPress_S...,US,AssociatedPress,20200402,5,world food program usa allocates 333000 in eme...,202004
13427,20200629_US_TheNewYorkTimes_NEXIS794361.txt,Raw text/USClean/20200629_US_TheNewYorkTimes_N...,US,TheNewYorkTimes,20200629,13,the country has been hit with a triplewhammy r...,202006
13428,20200520_US_CNN_GNAPI69344.txt,Raw text/USClean/20200520_US_CNN_GNAPI69344.txt,US,CNN,20200520,18,cnnchinese leader xi jinping made preserving d...,202005
13429,20200402_US_VOA_GDELT131430.txt,Raw text/USClean/20200402_US_VOA_GDELT131430.txt,US,VOA,20200402,22,washington north koreas decision to protect it...,202004


## Test run with US

Testing the TF-IDF algorithm with one country before I map it to the block of countries.

In [138]:
# make a list from the text column of US articles
dfUs = df[df['country']=='US']
arr = dfUs["text"].to_numpy()
arr = arr.tolist()

In [139]:
# Train and run TF-IDF

corpus = arr
corpusLen = len(corpus)
trainingLen = corpusLen * 0.2 # Train it on 20% of the text items
trainingLen = int(trainingLen)
trainArr = corpus[:trainingLen] # First 20% of corpus
restArr = corpus[-(len(corpus)-trainingLen-4):] # Last 80% of corpus
X = tfidf.fit_transform(trainArr)
feature_names = np.array(tfidf.get_feature_names())
responses = tfidf.transform(restArr)
resultsArr = [get_top_tf_idf_words(response,4) for response in responses]
finalArr = [l.tolist() for l in resultsArr]

In [140]:
# Print items with 'interesting keywords' only. Note, we are only interested in knowing about aid and support

for item in finalArr:
    for subitem in item:
        if subitem in interestingKeywords:
            print(item)

['groups', 'rumours', 'aid', 'migrants']
['aid', 'border', 'ngos', 'asylum']
['updated', 'april', 'aid', 'migrants']
['idlib', 'province', 'groups', 'aid']
['faso', 'burkina', 'registration', 'aid']
['yemen', 'houthis', 'yemens', 'aid']
['donated', 'japan', 'yen', 'center']
['vegas', 'las', 'nevada', 'donation']
['tnh', 'aid', 'victims', 'samoa']
['yemen', 'aid', 'lowcock', 'email']
['usaid', 'programs', 'aid', 'foreign']
['billion', 'aid', 'grants', 'spending']
['aid', 'funding', 'generosity', 'administration']
['cross', 'red', 'donations', 'party']
['shop', 'donated', 'retailer', 'donate']
['shop', 'donated', 'retailer', 'donate']
['pakistan', 'aid', 'pakistans', 'groups']
['yemen', 'aid', 'houthis', 'uae']
['yemen', 'houthis', 'aid', 'programs']
['aid', 'yemen', 'rebels', 'programs']
['yemen', 'houthis', 'aid', 'programs']
['yemen', 'wfp', 'yemeni', 'aid']
['funding', 'aid', 'billion', 'spending']
['aid', 'relief', 'money', 'agency']
['advertisement', 'aid', 'relief', 'money']
['3m'

In [141]:
finalArr

[['vaccines', 'china', 'medicines', 'vaccine'],
 ['irgc', 'device', 'jahanpur', 'iranian'],
 ['moscow', 'reset', 'kremlin', 'treaty'],
 ['canada', '3m', 'n95', 'masks'],
 ['indias', 'villages', 'walking', 'kilometers'],
 ['libya', 'hifter', 'dujarric', 'pause'],
 ['food', 'hunger', 'farmers', 'global'],
 ['brewer', 'pastors', 'bhargava', 'cuba'],
 ['afghanistan', 'deliberate', 'kabul', 'afghan'],
 ['mogadishu', 'somalia', 'mohamed', 'ali'],
 ['weather', 'assume', 'romania', 'fauci'],
 ['north', 'korean', 'korea', 'news'],
 ['groups', 'rumours', 'aid', 'migrants'],
 ['iran', 'sanctions', 'iranian', 'hardliners'],
 ['___', 'says', 'lockdown', 'india'],
 ['camps', 'refugee', 'camp', 'refugees'],
 ['health', 'global', 'methodist', 'social'],
 ['yemen', 'saeed', 'initiative', 'hayel'],
 ['venezuelan', 'venezuela', 'maduro', 'caracas'],
 ['iran', 'sanctions', 'iranianbacked', 'iraq'],
 ['robots', 'robotics', 'yang', 'robot'],
 ['friday', 'sanofi', 'said', 'tests'],
 ['aid', 'border', 'ngos',

## For all blocks

Running the same analysis for all blocks of countries.

In [145]:
# Declare global variables

gulfCountries = ['AE', 'KW', 'QA', 'SA']
eaCountries = ['US', 'UK', 'DE', 'FR']
ngmpCountries = ['CN', 'RU', 'IR', 'TR']
interestingKeywords = ['human', 'humanitarian', 'humanitarianism', 'humane', 'donation', 'donations', 'donate',
                       'donated', 'donating', 'aid', 'support', 'medical', 'food', 'supply', 'help', 'helped',
                       'cooperation', 'relief', 'fund']

In [144]:
# Run the script for Gulf countries

for country in gulfCountries:
    print('\n\nProcessing',country,'...\n')
    dfCountry = df[df['country']==country] # Get the df for the country
    arr = dfCountry["text"].to_numpy() # Get the text as array
    corpus = arr.tolist() # Convert to list
    corpusLen = len(corpus) # Get length of list
    trainingLen = corpusLen * 0.2 # Train it on 20% of the text items
    trainingLen = int(trainingLen)
    trainArr = corpus[:trainingLen] # First 20% of corpus
    restArr = corpus[-(len(corpus)-trainingLen-4):] # Last 80% of corpus
    X = tfidf.fit_transform(trainArr) # Fit transform
    feature_names = np.array(tfidf.get_feature_names())
    responses = tfidf.transform(restArr) # TFIDF Transform
    resultsArr = [get_top_tf_idf_words(response,4) for response in responses]
    finalArr = [l.tolist() for l in resultsArr] # Final array output
    for item in finalArr: # Print the Results
        for subitem in item:
            if subitem in interestingKeywords:
                print(item)
    



Processing AE ...

['labour', 'human', 'pakistanis', 'leave']
['yemen', 'food', 'decline', 'reverse']
['yemen', 'august', 'billion', 'aid']
['food', 'afp', 'world', 'programme']
['iran', 'sanctions', 'relief', 'spending']
['water', 'management', 'supply', 'region']
['meals', 'campaign', 'uae', 'donations']
['erc', 'fund', 'initiatives', 'cooperation']
['erc', 'fund', 'initiatives', 'cooperation']
['fund', 'community', 'dubai', 'society']
['metric', 'uae', 'tonnes', 'medical']
['aid', 'idlib', 'extension', 'mandate']
['mbrch', 'fund', 'melha', 'bu']
['donated', 'sector', 'community', 'intercoil']
['uae', 'cooperation', 'assisting', 'covid19']
['water', 'supply', 'region', 'mena']
['uae', 'aid', 'medical', 'evacuated']
['uae', 'aid', 'medical', 'evacuated']
['tallest', 'box', 'meals', 'donation']
['medical', 'aid', 'iraqi', 'iran']
['medical', 'aid', 'iraqi', 'iran']
['erc', 'food', 'segments', 'parcels']
['canadian', 'friendly', 'cooperation', 'abdullah']
['donation', 'million', 'duba

From the above analysis, we can see the following relationships:

* UAE → Yemen: Aid in billions in August 2020
* UAE → UAE: The World's Tallest Donation box initiative where the Burj Khalifa in Dubai transformed into a donation box as part of the campaign aimed at providing meals to Coronavirus-hit communities across the United Arab Emirates.
* UAE → Iran: Coronavirus relief funds
* UAE → Iraq: Coronavirus relief funds
* UAE → Karazkhistan: Metric tons of medical supplies and testing kits sent to Kazakhstan.
* UAE → South Africa: UAE delivers planeload of medical supplies to South Africa 
* UAE → Sudan: Emirati aircraft carrying 38 tonnes of aid


* Qatar → Syria: Qatar pledges food support
* Qatar → Vanuatu: Support for cyclone hit areas
* Qatar → Ethiopia: Qatar deliver relief aid to Ethiopian refugees
* Qatar → Rwanda: Qatar sends tonnes of medical supplies to Rwanda


* Saudi Arabia → Lebanon: KSrelief affiliates take part in Beirut rescue operations
* Saudi Arabia → Palestine: The King Salman Humanitarian Aid and Relief Center (KSRelief) delivered the second batch of medical aid to the Palestine

In [146]:
# Run the script for Euro-Atlantic Countries

for country in eaCountries:
    print('\n\nProcessing',country,'...\n')
    dfCountry = df[df['country']==country] # Get the df for the country
    arr = dfCountry["text"].to_numpy() # Get the text as array
    corpus = arr.tolist() # Convert to list
    corpusLen = len(corpus) # Get length of list
    trainingLen = corpusLen * 0.2 # Train it on 20% of the text items
    trainingLen = int(trainingLen)
    trainArr = corpus[:trainingLen] # First 20% of corpus
    restArr = corpus[-(len(corpus)-trainingLen-4):] # Last 80% of corpus
    X = tfidf.fit_transform(trainArr) # Fit transform
    feature_names = np.array(tfidf.get_feature_names())
    responses = tfidf.transform(restArr) # TFIDF Transform
    resultsArr = [get_top_tf_idf_words(response,4) for response in responses]
    finalArr = [l.tolist() for l in resultsArr] # Final array output
    for item in finalArr: # Print the Results
        for subitem in item:
            if subitem in interestingKeywords:
                print(item)



Processing US ...

['food', 'hunger', 'farmers', 'global']
['groups', 'rumours', 'aid', 'migrants']
['aid', 'border', 'ngos', 'asylum']
['california', 'sacramento', 'newsom', 'food']
['beasley', 'wfp', 'sudan', 'food']
['updated', 'april', 'aid', 'migrants']
['idlib', 'province', 'groups', 'aid']
['faso', 'burkina', 'registration', 'aid']
['says', 'north', 'food', 'quintana']
['yemen', 'houthis', 'yemens', 'aid']
['food', 'insects', 'farmers', 'africa']
['malnutrition', 'malnourished', 'wasting', 'food']
['myanmars', 'abiy', 'food', 'rakhine']
['donated', 'japan', 'yen', 'center']
['food', 'says', 'farmers', 'rice']
['3m', 'direct', 'relief', 'providers']
['vegas', 'las', 'nevada', 'donation']
['tnh', 'aid', 'victims', 'samoa']
['yemen', 'aid', 'lowcock', 'email']
['andrés', 'food', 'says', 'kitchen']
['direct', 'icu', 'relief', 'reliefs']
['meals', 'salvation', 'food', 'hollander']
['humanitarianism', 'explore', 'genocide', 'crises']
['subsaharan', 'food', 'climate', 'africa']
['fao

['people', 'food', 'children', 'foundation']
['wfp', 'food', 'people', 'starvation']
['taliban', 'afghanistan', 'health', 'helped']
['care', 'health', 'zimbabwe', 'medical']
['food', 'africa', 'wfp', 'zimbabwe']
['crisis', 'human', 'far', 'corona']
['wfp', 'food', 'arif', 'hunger']


Processing FR ...

['iran', 'iranian', 'march', 'help']
['food', 'wfp', 'report', 'number']
['northeast', 'syria', 'supplies', 'medical']
['russia', 'russian', 'medical', 'plane']
['nigeria', 'production', 'farmers', 'food']
['idlib', 'syria', 'aid', 'displaced']
['communities', 'food', 'ensure', 'starvation']
['iraq', 'iraqis', 'food', 'iraqi']
['donations', 'blood', 'london', 'italy']
['yemen', 'aid', 'arabia', 'saudi']
['food', 'children', 'hundreds', 'supply']
['food', 'children', 'hundreds', 'supply']
['russia', 'moscow', 'medical', 'sent']
['children', 'malnutrition', 'hunger', 'food']
['yemen', 'saudi', 'aid', 'conference']
['yemen', 'food', 'report', 'grande']
['food', 'children', 'hundreds', 'supp

* US → Japan: YOSHIKI Donates 10 Million Yen To Japan's National Center For Global Health And Medicine
* US → Yemen: U.S. announces 225 million USD in emergency aid to Yemen
* US → Venezuela: The United States will be committing USD36 million in aid to Venezuela, as part of a United Nations program to provide relief to the country
* US → Syria: The United States Announces Additional Humanitarian Assistance for the Syrian People
* Taiwan → US: Taiwan Donates Over 51 Million Masks to Countries Worldwide


* UK → Vanuatu: UK emergency supplies for cyclone victims arrive in Vanuatu
* UK → Yemen: The UK pledged $196 million this year 
* UK → Bangladesh: UK announces extra £87 million funding for Rohingya crisis 
* UK → Sudan: Emergency food relief from UK Government for vulnerable households in South Sudan
* UK → Uganda: WFP In Uganda Welcomes UK Contribution For Refugees


* Germany → Zimbabwe: Amid COVID-19 crises, Germany donates to protect African countries


Mostly, News in France 'reported' humanitarian efforts rather than making humanitarian efforts:

* France → Syria: France sends medical aid to Assad's Syria after deal 
* France → Syria: France unlocked an 50 million euros for humanitarian aid to Syria, half of which will be destined for the Idlib region.

In [147]:
# Run the script for New Global Media Players

for country in ngmpCountries:
    print('\n\nProcessing',country,'...\n')
    dfCountry = df[df['country']==country] # Get the df for the country
    arr = dfCountry["text"].to_numpy() # Get the text as array
    corpus = arr.tolist() # Convert to list
    corpusLen = len(corpus) # Get length of list
    trainingLen = corpusLen * 0.2 # Train it on 20% of the text items
    trainingLen = int(trainingLen)
    trainArr = corpus[:trainingLen] # First 20% of corpus
    restArr = corpus[-(len(corpus)-trainingLen-4):] # Last 80% of corpus
    X = tfidf.fit_transform(trainArr) # Fit transform
    feature_names = np.array(tfidf.get_feature_names())
    responses = tfidf.transform(restArr) # TFIDF Transform
    resultsArr = [get_top_tf_idf_words(response,4) for response in responses]
    finalArr = [l.tolist() for l in resultsArr] # Final array output
    for item in finalArr: # Print the Results
        for subitem in item:
            if subitem in interestingKeywords:
                print(item)



Processing CN ...

['kyrgyzstan', 'medical', 'china', 'du']
['wfp', 'hunger', 'zimbabwe', 'food']
['china', 'cooperation', 'sides', 'mohammed']
['epidemic', 'treatment', 'china', 'medical']
['africa', 'kenya', 'african', 'food']
['report', 'support', 'recovery', 'roadmap']
['xi', 'chinese', 'cooperation', 'china']
['rights', 'china', 'human', 'poverty']
['unemployment', 'insurance', 'relief', 'hearing']
['verde', 'cape', 'donation', 'archipelago']
['dujarric', 'humanitarian', 'food', 'said']
['dujarric', 'humanitarian', 'food', 'said']
['cooperation', 'china', 'world', 'global']
['college', 'medical', 'wuhan', 'hospital']
['lowcock', 'fund', 'money', 'dujarric']
['china', 'solidarity', 'cooperation', 'wha']
['myanmar', '90000', 'pyi', 'donation']
['rights', 'human', 'epidemic', 'lives']
['globalization', 'outbreak', 'cooperation', 'global']
['kyrgyzstan', 'medical', 'du', 'batch']
['rights', 'human', 'law', 'china']
['zimbabwe', 'mnangagwa', 'guo', 'donation']
['ecw', 'education', 'f

* China → Kyrgyzstan: China delivers more medical supplies to Kyrgyzstan
* China → Zimbabwe: Food Assistance
* China → Cape Verde: Medical Donation
* Myanmar → China: Medical Equipment Donation
* China → Lebanon: China donates more medical supplies to Lebanon
* China → Syria: China Contributes USD2 Million To WFP's Response Inside Syria 
* US → China: Non-profit donates to China 
* China → Botswana: China continues to support Botswana


* Russia → Venezuela: Russia Sends Humanitarian Support In Medicine to Venezuela
* Russia → Yemen: Russia earmarks USD4 mln to provide food assistance to Yemen
* Russia → US: Russia sends planeload of medical supplies to US
* Russia → Kyrgyzstan: Russian military medics arrive in Kyrgyzstan


* China → Iran: China to send new humanitarian aid shipments to Iran.
* Iran → Kyrgyzstan: Iran delivers second shipment of anti-corona aid to Kyrgyzstan
* Iran → Venezuela: Venezuela says flight arrives from Iran carrying COVID-19 aid
* Japan → Iran: WFP Iran receives historical contribution from Japan
* Georgia → Iran: Georgian pharmacists dispatch humanitarian aid cargo to Iran
* Qatar → Iran: Qatar delivers 2nd consignment to Iran to fight coronavirus
* South Korea → Iran: S. Korea sends humanitarian medical aid worth USD500000 to Iran
* Turkmenistan → Iran: Turkmenistan sends humanitarian aid to neighboring Iran


* Turkey → Yemen: Turkish charity distributes food aid in Yemen
* Turkey → Iraq: Turkey sends medical aid to Iraq
* Turkey → Azerbaijan: Turkey sends Azerbaijan medical aid to fight COVID-19
* Turkey → South Africa: COVID-19: Turkish medical aid arrives in South Africa
* Turkey → Afghanistan: Turkish aid agency sends medical aid to Afghanistan
* Turkey → Chad: Turkey sends medical aid to Chad
* Turkey → Lebanon: Turkish agency distributes 400 tons of wheat in Lebanon
* Turkey → Tunisia: Turkey sends medical supplies to Tunisia amid outbreak


## Data for Visualisation

Now that we have the direction of where the aid is coming from and where is it moving, we can represent it on a graph using `AmChart's map module`. To do that, we need latitude and longitudes of origin and destination countries.

We can use `OpenStreetMap.org` to get the lat, long. To make a `GET request`, we will need to use the `request` library.

In [150]:
# Test run

address = 'Pakistan'
url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(address) +'?format=json'

response = requests.get(url).json()
print(response[0]["lat"])
print(response[0]["lon"])

30.3308401
71.247499


In [164]:
# Now running with New Global Media Players

humanitarianCountries = [
    ['Turkey', 'Yemen'],
    ['Turkey', 'Iraq'],
    ['Turkey', 'Azerbaijan'],
    ['Turkey', 'South Africa'],
    ['Turkey', 'Afghanistan'],
    ['Turkey', 'Chad'],
    ['Turkey', 'Lebanon'],
    ['Turkey', 'Tunisia'],
    
    ['China', 'Iran'],
    ['Iran', 'Kyrgyzstan'],
    ['Iran', 'Venezuela'],
    ['Japan', 'Iran'],
    ['Georgia', 'Iran'],
    ['Qatar', 'Iran'],
    ['South Korea', 'Iran'],
    ['Turkmenistan', 'Iran'],
    
    
    ['China', 'Kyrgyzstan'],
    ['China', 'Zimbabwe'],
    ['China', 'Cape Verde'],
    ['Myanmar', 'China'],
    ['China', 'Lebanon'],
    ['China', 'Syria'],
    ['US', 'China'],
    ['China', 'Botswana'],
    ['Russia', 'Venezuela'],
    ['Russia', 'Yemen'],
    ['Russia', 'United States'],
    ['Russia', 'Kyrgyzstan'],
]


In [6]:
# Make a function to get Lat Long in a list

def getLatLong(address):
    url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(address) +'?format=json'
    response = requests.get(url).json()
    return [response[0]["lat"], response[0]["lon"]]

In [166]:
# Test run

getLatLong('Yemen')

['16.3471243', '47.8915271']

In [167]:
# Print lat long in the right format that can be copy-pasted into AmCharts

for countries in humanitarianCountries:
    latLongA = getLatLong(countries[0])
    latLongB = getLatLong(countries[1])
    print("{ latitudes: ["+latLongA[0]+", "+latLongB[0]+"], longitudes: ["+latLongA[1]+", "+latLongB[1]+"]},")

{ latitudes: [38.9597594, 16.3471243], longitudes: [34.9249653, 47.8915271]},
{ latitudes: [38.9597594, 33.0955793], longitudes: [34.9249653, 44.1749775]},
{ latitudes: [38.9597594, 40.3936294], longitudes: [34.9249653, 47.7872508]},
{ latitudes: [38.9597594, -28.8166236], longitudes: [34.9249653, 24.991639]},
{ latitudes: [38.9597594, 33.7680065], longitudes: [34.9249653, 66.2385139]},
{ latitudes: [38.9597594, 15.6134137], longitudes: [34.9249653, 19.0156172]},
{ latitudes: [38.9597594, 33.8750629], longitudes: [34.9249653, 35.843409]},
{ latitudes: [38.9597594, 33.8439408], longitudes: [34.9249653, 9.400138]},
{ latitudes: [35.000074, 32.6475314], longitudes: [104.999927, 54.5643516]},
{ latitudes: [32.6475314, 41.5089324], longitudes: [54.5643516, 74.724091]},
{ latitudes: [32.6475314, 8.0018709], longitudes: [54.5643516, -66.1109318]},
{ latitudes: [36.5748441, 32.6475314], longitudes: [139.2394179, 54.5643516]},
{ latitudes: [32.3293809, 32.6475314], longitudes: [-83.1137366, 54.

In [168]:
# Next, we need to add markers on the map, we can do this using another loop

# Function to remove duplicates from list

def remove_duplicates(l):
    return list(set(l))

# Make a list from list of lists

allCountries = [item for sublist in humanitarianCountries for item in sublist]
uniqueCountries = remove_duplicates(allCountries)

# Print JS object using a loop

for country in uniqueCountries:
    latLong = getLatLong(country)
    print('{')
    print('svgPath: targetSVG,')
    print('title: "'+country+'",')
    print('latitude: '+latLong[0]+',')
    print('longitude: '+latLong[1]+',')
    print('scale: 0.5')
    print('},')

{
svgPath: targetSVG,
title: "Chad",
latitude: 15.6134137,
longitude: 19.0156172,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Myanmar",
latitude: 17.1750495,
longitude: 95.9999652,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Syria",
latitude: 34.6401861,
longitude: 39.0494106,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Russia",
latitude: 64.6863136,
longitude: 97.7453061,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Venezuela",
latitude: 8.0018709,
longitude: -66.1109318,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Botswana",
latitude: -23.1681782,
longitude: 24.5928742,
scale: 0.5
},
{
svgPath: targetSVG,
title: "South Korea",
latitude: 36.638392,
longitude: 127.6961188,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Qatar",
latitude: 25.3336984,
longitude: 51.2295295,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Iraq",
latitude: 33.0955793,
longitude: 44.1749775,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Cape Verde",
latitude: 16.0000552,
longitude: -24.0083947,
scale: 0.5
},
{
svgPath

In [4]:
# Doing the same for EURO ATLANTIC COUNTRIES
humanitarianCountries = [
    ['United States', 'Japan'],
    ['United States', 'Yemen'],
    ['United States', 'Venezuela'],
    ['United States', 'Syria'],
    ['Taiwan', 'United States'],
    
    ['United Kingdom', 'Vanuatu'],
    ['United Kingdom', 'Yemen'],
    ['United Kingdom', 'Bangladesh'],
    ['United Kingdom', 'Sudan'],
    ['United Kingdom', 'Uganda'],
    
    ['Germany', 'Zimbabwe'],
    ['France', 'Syria'],
]

In [9]:
for countries in humanitarianCountries:
    latLongA = getLatLong(countries[0])
    latLongB = getLatLong(countries[1])
    print("{ latitudes: ["+latLongA[0]+", "+latLongB[0]+"], longitudes: ["+latLongA[1]+", "+latLongB[1]+"]},")

{ latitudes: [39.7837304, 36.5748441], longitudes: [-100.4458825, 139.2394179]},
{ latitudes: [39.7837304, 16.3471243], longitudes: [-100.4458825, 47.8915271]},
{ latitudes: [39.7837304, 8.0018709], longitudes: [-100.4458825, -66.1109318]},
{ latitudes: [39.7837304, 34.6401861], longitudes: [-100.4458825, 39.0494106]},
{ latitudes: [23.59829785, 39.7837304], longitudes: [120.83536313817521, -100.4458825]},
{ latitudes: [54.7023545, -16.5255069], longitudes: [-3.2765753, 168.1069154]},
{ latitudes: [54.7023545, 16.3471243], longitudes: [-3.2765753, 47.8915271]},
{ latitudes: [54.7023545, 24.4768783], longitudes: [-3.2765753, 90.2932426]},
{ latitudes: [54.7023545, 14.5844444], longitudes: [-3.2765753, 29.4917691]},
{ latitudes: [54.7023545, 1.5333554], longitudes: [-3.2765753, 32.2166578]},
{ latitudes: [51.0834196, -18.4554963], longitudes: [10.4234469, 29.7468414]},
{ latitudes: [46.603354, 34.6401861], longitudes: [1.8883335, 39.0494106]},


In [10]:
def remove_duplicates(l):
    return list(set(l))

allCountries = [item for sublist in humanitarianCountries for item in sublist]
uniqueCountries = remove_duplicates(allCountries)

for country in uniqueCountries:
    latLong = getLatLong(country)
    print('{')
    print('svgPath: targetSVG,')
    print('title: "'+country+'",')
    print('latitude: '+latLong[0]+',')
    print('longitude: '+latLong[1]+',')
    print('scale: 0.5')
    print('},')

{
svgPath: targetSVG,
title: "United States",
latitude: 39.7837304,
longitude: -100.4458825,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Uganda",
latitude: 1.5333554,
longitude: 32.2166578,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Bangladesh",
latitude: 24.4768783,
longitude: 90.2932426,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Sudan",
latitude: 14.5844444,
longitude: 29.4917691,
scale: 0.5
},
{
svgPath: targetSVG,
title: "France",
latitude: 46.603354,
longitude: 1.8883335,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Venezuela",
latitude: 8.0018709,
longitude: -66.1109318,
scale: 0.5
},
{
svgPath: targetSVG,
title: "United Kingdom",
latitude: 54.7023545,
longitude: -3.2765753,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Germany",
latitude: 51.0834196,
longitude: 10.4234469,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Zimbabwe",
latitude: -18.4554963,
longitude: 29.7468414,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Taiwan",
latitude: 23.59829785,
longitude: 120.83536313817521,
s

In [13]:
# Doing the same for GULF DONORS

humanitarianCountries = [
    ['United Arab Emirates', 'Yemen'],
    ['United Arab Emirates', 'Iran'],
    ['United Arab Emirates', 'Kazakhstan'],
    ['United Arab Emirates', 'Iraq'],
    ['United Arab Emirates', 'South Africa'],
    ['United Arab Emirates', 'Sudan'],
    ['Qatar', 'Sudan'],
    ['Qatar', 'Vanuatu'],
    ['Qatar', 'Ethiopia'],
    ['Qatar', 'Rwanda'],
    ['Saudi Arabia', 'Lebanon'],
    ['Saudi Arabia', 'Palestine'],
]

In [14]:
for countries in humanitarianCountries:
    latLongA = getLatLong(countries[0])
    latLongB = getLatLong(countries[1])
    print("{ latitudes: ["+latLongA[0]+", "+latLongB[0]+"], longitudes: ["+latLongA[1]+", "+latLongB[1]+"]},")

{ latitudes: [24.0002488, 16.3471243], longitudes: [53.9994829, 47.8915271]},
{ latitudes: [24.0002488, 32.6475314], longitudes: [53.9994829, 54.5643516]},
{ latitudes: [24.0002488, 47.2286086], longitudes: [53.9994829, 65.2093197]},
{ latitudes: [24.0002488, 33.0955793], longitudes: [53.9994829, 44.1749775]},
{ latitudes: [24.0002488, -28.8166236], longitudes: [53.9994829, 24.991639]},
{ latitudes: [24.0002488, 14.5844444], longitudes: [53.9994829, 29.4917691]},
{ latitudes: [25.3336984, 14.5844444], longitudes: [51.2295295, 29.4917691]},
{ latitudes: [25.3336984, -16.5255069], longitudes: [51.2295295, 168.1069154]},
{ latitudes: [25.3336984, 10.2116702], longitudes: [51.2295295, 38.6521203]},
{ latitudes: [25.3336984, -1.9646631], longitudes: [51.2295295, 30.0644358]},
{ latitudes: [25.6242618, 33.8750629], longitudes: [42.3528328, 35.843409]},
{ latitudes: [25.6242618, 31.462420950000002], longitudes: [42.3528328, 34.27495946066872]},


In [15]:
def remove_duplicates(l):
    return list(set(l))

allCountries = [item for sublist in humanitarianCountries for item in sublist]
uniqueCountries = remove_duplicates(allCountries)

for country in uniqueCountries:
    latLong = getLatLong(country)
    print('{')
    print('svgPath: targetSVG,')
    print('title: "'+country+'",')
    print('latitude: '+latLong[0]+',')
    print('longitude: '+latLong[1]+',')
    print('scale: 0.5')
    print('},')

{
svgPath: targetSVG,
title: "Rwanda",
latitude: -1.9646631,
longitude: 30.0644358,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Vanuatu",
latitude: -16.5255069,
longitude: 168.1069154,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Iraq",
latitude: 33.0955793,
longitude: 44.1749775,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Lebanon",
latitude: 33.8750629,
longitude: 35.843409,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Sudan",
latitude: 14.5844444,
longitude: 29.4917691,
scale: 0.5
},
{
svgPath: targetSVG,
title: "United Arab Emirates",
latitude: 24.0002488,
longitude: 53.9994829,
scale: 0.5
},
{
svgPath: targetSVG,
title: "South Africa",
latitude: -28.8166236,
longitude: 24.991639,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Ethiopia",
latitude: 10.2116702,
longitude: 38.6521203,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Saudi Arabia",
latitude: 25.6242618,
longitude: 42.3528328,
scale: 0.5
},
{
svgPath: targetSVG,
title: "Palestine",
latitude: 31.462420950000002,
longitude: 34.2749

## Conclusion

The insights from the TF-IDF into what the articles were talking about was generally better than topic modelling for our use case, mainly because we mapped just a few topics from topic modelling exercise. We were able to filter out interesting keywords and get the data on direction of aid ready for visualisation.