In [1]:
import re
import random
import numpy as np
import shapefile as spf
from shapely.geometry import shape, Point

In [2]:
%config Completer.use_jedi = False

In [3]:
def get_rcoordinates(country, path='World_Countries.shp'):  
    
    def is_match(text):
        match = re.match(f'.*{country}.*', text)
        return match
    
    # Read the shape file that contains list of countries with their unique ID
    country_list = spf.Reader(path)
    
    # get the record that belong to the country we have queried the function
    try:
        country = [c for c in country_list.records() if is_match(c.__str__()) is not None][0] # [0] to extract the only item from the list
    except IndexError:
        return None
    
    # Extract the country ID from the variable country
    country_id = int(re.findall(r'\d+', country.__str__())[0])
    
    # Get the coordinates from the polygon details dictionary
    coordinates_list = country_list.shapeRecords()[country_id].shape.__geo_interface__['coordinates']
    
    coordinates = None
    tol = 100
    count = 0
    while count < tol:
        if len(coordinates_list) == 2 and isinstance(coordinates_list[0], float):
            coordinates = coordinates_list
            break
        else:
            coordinates_list = coordinates_list[0]
        count += 1
    
    return [coordinates_list[0], coordinates_list[1]]

In [4]:
import pandas as pd

In [5]:
nodes = pd.read_csv('../data/nodes_sample.csv', header=0)

In [6]:
nodes.head(5)

Unnamed: 0,group_id,group_name,startyear,endyear,active,complete,description,new_description,on_any_map,map_name,...,hq_country,init_size_members,max_size_members,avg_size_members,us_designated,un_designated,other_designated,state_sponsor,state_sponsor_names,Notes
0,1.0,The Islamic State,2002.0,0.0,1.0,1.0,The Islamic State (IS) also known as the Islam...,,1.0,Global Al Qaeda,...,Iraq,500.0,30000.0,1.0,1.0,0.0,0.0,0,,
1,3.0,Mujahideen Army,2004.0,0.0,1.0,1.0,The Mujahideen Army (MA) is a Sunni Iraqi mili...,,1.0,Global Al Qaeda,...,Iraq,,,0.0,0.0,0.0,0.0,0,No size estimates,
2,5.0,Islamic Army in Iraq,2003.0,0.0,1.0,1.0,The Islamic Army in Iraq (IAI) is a Sunni Isla...,,1.0,Global Al Qaeda,...,Iraq,10400.0,10400.0,0.0,0.0,0.0,0.0,0,,
3,13.0,Ansar al-Islam,2001.0,2014.0,0.0,1.0,Ansar al-Islam (AI) is a predominantly Kurdish...,,1.0,Global Al Qaeda,...,Iraq,350.0,1000.0,1.0,1.0,1.0,1.0,0,"Australia, New Zealand, Canada, and the EU; Iran",
4,19.0,Al Qaeda in the Arabian Peninsula,2009.0,0.0,1.0,1.0,Al Qaeda in the Arabian Peninsula (AQAP) is a ...,,1.0,Global Al Qaeda,...,Yemen,100.0,3000.0,1.0,1.0,1.0,0.0,0,"Saudi Arabia, UAE",


In [7]:
maps = nodes.hq_country
maps = pd.Series(maps.unique())
maps.dropna(inplace=True)
maps = maps.reset_index().iloc[:, 1]
# maps = maps.hq_country
maps

0               Iraq
1              Yemen
2        Afghanistan
3            Somalia
4            Algeria
5           Pakistan
6            Morocco
7        Philippines
8          Indonesia
9              Egypt
10             Syria
11           Nigeria
12              Mali
13            Russia
14           Germany
15     United States
16            Sweden
17           Ukraine
18    United Kingdom
19           Estonia
Name: 0, dtype: object

In [8]:
maps = nodes.hq_country
maps = pd.Series(maps.unique())
maps.dropna(inplace=True)
maps = maps.reset_index().iloc[:, 1]
maps_coords = {}
missing_maps = []
complete_maps = []
for i in range(len(maps)):
    map_name = maps[i]
    coordinates = get_rcoordinates(map_name)
    if coordinates is not None:
        maps_coords[map_name] = coordinates
        complete_maps.append(map_name)
    else:
        missing_maps.append(map_name)

In [9]:
missing_maps

[]

In [10]:
complete_maps

['Iraq',
 'Yemen',
 'Afghanistan',
 'Somalia',
 'Algeria',
 'Pakistan',
 'Morocco',
 'Philippines',
 'Indonesia',
 'Egypt',
 'Syria',
 'Nigeria',
 'Mali',
 'Russia',
 'Germany',
 'United States',
 'Sweden',
 'Ukraine',
 'United Kingdom',
 'Estonia']

In [11]:
# For now, fill in invalid map_name with random valid location name

for imn in missing_maps:
    maps_coords[imn] = get_rcoordinates(np.random.choice(complete_maps))

# maps_coords['Global Al Qaeda'] = get_rcoordinates('Iraq')
# maps_coords['Global Islamic State'] = get_rcoordinates('Pakistan')
# maps_coords['Kurdistan'] = get_rcoordinates('Iraq')
# maps_coords['North Africa'] = get_rcoordinates('Africa')
# maps_coords['Pakistan -- All'] = get_rcoordinates('Pakistan')
# maps_coords['Northern Ireland'] = get_rcoordinates('Ireland')
# maps_coords['North Caucasus'] = get_rcoordinates('Iraq')
# maps_coords['Aleppo'] = get_rcoordinates('Iraq')
# maps_coords['Global Right-Wing Extremism'] = get_rcoordinates('Iraq')

In [16]:
maps_coords

{'Iraq': [39.19674301147461, 32.15494155883789],
 'Yemen': [48.6863899230957, 14.03749942779541],
 'Afghanistan': [61.27655792236328, 35.60724639892578],
 'Somalia': [42.073883056640625, 4.176146507263184],
 'Algeria': [-5.152134895324707, 30.180469512939453],
 'Pakistan': [63.230438232421875, 29.473697662353516],
 'Morocco': [-3.3861136436462402, 31.726106643676758],
 'Philippines': [121.51332092285156, 19.249160766601562],
 'Indonesia': [123.21846199035645, -10.809165954589787],
 'Egypt': [32.649024963378906, 29.78835678100586],
 'Syria': [35.66961669921875, 33.25171661376953],
 'Nigeria': [11.799439430236816, 7.296663284301758],
 'Mali': [-9.865219116210938, 12.054810523986816],
 'Russia': [58.061378479003906, 81.68775939941406],
 'Germany': [7.369014739990234, 49.16877746582031],
 'United States': [-134.97500610351562, 58.64582824707031],
 'Sweden': [11.769547462463379, 59.21753692626953],
 'Ukraine': [31.852527618408203, 46.522674560546875],
 'United Kingdom': [-0.8350000381469727

In [17]:
maps_coord = pd.DataFrame.from_dict(maps_coords, orient='index', columns=['lat', 'long'])

In [19]:
maps_coord.to_csv('../data/maps_coord.csv')