In [31]:
import re
import random
import numpy as np
import shapefile as spf
from shapely.geometry import shape, Point

In [32]:
%config Completer.use_jedi = False

In [33]:
def get_rcoordinates(country, path='World_Countries.shp'):  
    
    def is_match(text):
        match = re.match(f'.*{country}.*', text)
        return match
    
    # Read the shape file that contains list of countries with their unique ID
    country_list = spf.Reader(path)
    
    # get the record that belong to the country we have queried the function
    try:
        country = [c for c in country_list.records() if is_match(c.__str__()) is not None][0] # [0] to extract the only item from the list
    except IndexError:
        return None
    
    # Extract the country ID from the variable country
    country_id = int(re.findall(r'\d+', country.__str__())[0])
    
    # Get the coordinates from the polygon details dictionary
    coordinates_list = country_list.shapeRecords()[country_id].shape.__geo_interface__['coordinates']
    
    coordinates = None
    tol = 100
    count = 0
    while count < tol:
        if len(coordinates_list) == 2 and isinstance(coordinates_list[0], float):
            coordinates = coordinates_list
            break
        else:
            coordinates_list = coordinates_list[0]
        count += 1
    
    return [coordinates_list[0], coordinates_list[1]]

In [34]:
tmp = get_rcoordinates('Ellala')

In [35]:
tmp

In [36]:
import pandas as pd

In [37]:
edges = pd.read_csv('../data/relationshipsmmp.csv', header=0)

In [38]:
edges.head(5)

Unnamed: 0,link_id,type,group1_id,group2_id,description,group1_name,group2_name,year,multiple,map_name,primary
0,1633.0,Rivals,61.0,113.0,Hizbul Islam fighters attacked the Somali bord...,Al Shabaab,Hizbul Islam,2010.0,0.0,Somalia,Somalia
1,1635.0,Allies,61.0,113.0,Hizbul Islam and Al Shabaab conducted a joint ...,Al Shabaab,Hizbul Islam,2009.0,0.0,Somalia,Somalia
2,1475.0,Allies,5.0,3.0,Although the Mujahideen Army and the Islamic A...,Islamic Army in Iraq,Mujahideen Army,2005.0,1.0,Global Al Qaeda,Iraq
3,1431.0,Allies,5.0,3.0,The Islamic Army of Iraq claimed that it and t...,Islamic Army in Iraq,Mujahideen Army,2014.0,1.0,Global Al Qaeda,Iraq
4,1475.0,Allies,5.0,3.0,Although the Mujahideen Army and the Islamic A...,Islamic Army in Iraq,Mujahideen Army,2005.0,1.0,Iraq,Iraq


In [39]:
maps = edges.map_name.unique()
maps_coords = {}
missing_maps = []
complete_maps = []
for i in range(len(maps)):
    map_name = maps[i]
    coordinates = get_rcoordinates(map_name)
    if coordinates is not None:
        maps_coords[map_name] = coordinates
        complete_maps.append(map_name)
    else:
        missing_maps.append(map_name)

In [40]:
missing_maps

['Global Al Qaeda',
 'Global Islamic State',
 'Kurdistan',
 'North Africa',
 'Pakistan -- All',
 'Northern Ireland',
 'North Caucasus',
 'Aleppo',
 nan,
 'Global Right-Wing Extremism']

In [41]:
complete_maps

['Somalia',
 'Iraq',
 'Syria',
 'Colombia',
 'Pakistan',
 'Italy',
 'Germany',
 'Philippines',
 'Sri Lanka',
 'Mexico']

In [42]:
# For now, fill in invalid map_name with random valid location name

for imn in missing_maps:
    maps_coords[imn] = get_rcoordinates(np.random.choice(complete_maps))

# maps_coords['Global Al Qaeda'] = get_rcoordinates('Iraq')
# maps_coords['Global Islamic State'] = get_rcoordinates('Pakistan')
# maps_coords['Kurdistan'] = get_rcoordinates('Iraq')
# maps_coords['North Africa'] = get_rcoordinates('Africa')
# maps_coords['Pakistan -- All'] = get_rcoordinates('Pakistan')
# maps_coords['Northern Ireland'] = get_rcoordinates('Ireland')
# maps_coords['North Caucasus'] = get_rcoordinates('Iraq')
# maps_coords['Aleppo'] = get_rcoordinates('Iraq')
# maps_coords['Global Right-Wing Extremism'] = get_rcoordinates('Iraq')

In [43]:
maps_coords

{'Somalia': [42.073883056640625, 4.176146507263184],
 'Iraq': [39.19674301147461, 32.15494155883789],
 'Syria': [35.66961669921875, 33.25171661376953],
 'Colombia': [-81.71305847167969, 12.490276336669922],
 'Pakistan': [63.230438232421875, 29.473697662353516],
 'Italy': [12.75357437133789, 43.97100067138672],
 'Germany': [7.369014739990234, 49.16877746582031],
 'Philippines': [121.51332092285156, 19.249160766601562],
 'Sri Lanka': [81.78526306152344, 6.67249870300293],
 'Mexico': [-97.77687072753906, 22.268054962158203],
 'Global Al Qaeda': [-81.71305847167969, 12.490276336669922],
 'Global Islamic State': [12.75357437133789, 43.97100067138672],
 'Kurdistan': [42.073883056640625, 4.176146507263184],
 'North Africa': [35.66961669921875, 33.25171661376953],
 'Pakistan -- All': [-97.77687072753906, 22.268054962158203],
 'Northern Ireland': [39.19674301147461, 32.15494155883789],
 'North Caucasus': [-81.71305847167969, 12.490276336669922],
 'Aleppo': [81.78526306152344, 6.67249870300293],

In [44]:
maps_coord = pd.DataFrame.from_dict(maps_coords, orient='index', columns=['lat', 'long'])

In [45]:
maps_coord.to_csv('../data/maps_coord.csv')