In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from geopy.geocoders import Nominatim

In [2]:
# read in sample drought data
data = pd.read_csv('resources/test_spi_date.csv')
data

Unnamed: 0.1,Unnamed: 0,lat,lon,time,spi12
0,745,-55.5,-70.5,2012-01-16,-1.013668
1,746,-55.5,-70.5,2012-02-15,-1.142956
2,747,-55.5,-70.5,2012-03-16,-0.407607
3,748,-55.5,-70.5,2012-04-16,-0.726904
4,749,-55.5,-70.5,2012-05-16,-0.798656
...,...,...,...,...,...
206317,13015466,83.5,-25.5,2012-08-16,-0.060361
206318,13015467,83.5,-25.5,2012-09-16,-0.055499
206319,13015468,83.5,-25.5,2012-10-16,-0.056157
206320,13015469,83.5,-25.5,2012-11-16,-0.056157


In [3]:
# drop nuisance column
data = data.drop(['Unnamed: 0'], axis=1)

In [4]:
# convert lat, lon from float to string
data.lat = [str(x) for x in data.lat]
data.lon = [str(x) for x in data.lon]

In [5]:
# use concat method to create unique lat, lon string pairing
data['coor1'] = data['lat'].str.cat(data['lon'], sep=' ')

# use numpy .values method to create lat, lon pairing as a list
data['coor2'] = data[['lat','lon']].values.tolist()

In [6]:
data

Unnamed: 0,lat,lon,time,spi12,coor1,coor2
0,-55.5,-70.5,2012-01-16,-1.013668,-55.5 -70.5,"[-55.5, -70.5]"
1,-55.5,-70.5,2012-02-15,-1.142956,-55.5 -70.5,"[-55.5, -70.5]"
2,-55.5,-70.5,2012-03-16,-0.407607,-55.5 -70.5,"[-55.5, -70.5]"
3,-55.5,-70.5,2012-04-16,-0.726904,-55.5 -70.5,"[-55.5, -70.5]"
4,-55.5,-70.5,2012-05-16,-0.798656,-55.5 -70.5,"[-55.5, -70.5]"
...,...,...,...,...,...,...
206317,83.5,-25.5,2012-08-16,-0.060361,83.5 -25.5,"[83.5, -25.5]"
206318,83.5,-25.5,2012-09-16,-0.055499,83.5 -25.5,"[83.5, -25.5]"
206319,83.5,-25.5,2012-10-16,-0.056157,83.5 -25.5,"[83.5, -25.5]"
206320,83.5,-25.5,2012-11-16,-0.056157,83.5 -25.5,"[83.5, -25.5]"


In [7]:
# create list of coordinates
coordinates = data.coor1.unique().tolist()

# create list of lat and lng combinations
lat_lngs = [x.split() for x in coordinates]

In [8]:
print(lat_lngs)

[['-55.5', '-70.5'], ['-55.5', '-69.5'], ['-55.5', '-68.5'], ['-55.5', '-67.5'], ['-54.5', '-72.5'], ['-54.5', '-71.5'], ['-54.5', '-70.5'], ['-54.5', '-69.5'], ['-54.5', '-68.5'], ['-54.5', '-67.5'], ['-54.5', '-66.5'], ['-54.5', '-65.5'], ['-54.5', '-64.5'], ['-54.5', '-37.5'], ['-54.5', '158.5'], ['-54.5', '159.5'], ['-53.5', '-73.5'], ['-53.5', '-72.5'], ['-53.5', '-71.5'], ['-53.5', '-70.5'], ['-53.5', '-69.5'], ['-53.5', '-68.5'], ['-53.5', '73.5'], ['-52.5', '-74.5'], ['-52.5', '-73.5'], ['-52.5', '-72.5'], ['-52.5', '-71.5'], ['-52.5', '-70.5'], ['-52.5', '-69.5'], ['-52.5', '-68.5'], ['-52.5', '-60.5'], ['-52.5', '-59.5'], ['-51.5', '-74.5'], ['-51.5', '-73.5'], ['-51.5', '-72.5'], ['-51.5', '-71.5'], ['-51.5', '-70.5'], ['-51.5', '-69.5'], ['-51.5', '-60.5'], ['-51.5', '-59.5'], ['-50.5', '-75.5'], ['-50.5', '-74.5'], ['-50.5', '-73.5'], ['-50.5', '-72.5'], ['-50.5', '-71.5'], ['-50.5', '-70.5'], ['-50.5', '-69.5'], ['-50.5', '-68.5'], ['-49.5', '-75.5'], ['-49.5', '-74.5'], 

In [9]:
# initialize Nominatim API with the geoapiExercises parameter
geolocator = Nominatim(user_agent="geoapiExercises")

In [10]:
# assign lat and long into a geolocator.reverse() method
lat = '-47.5'
lon = '-67.5'

location = geolocator.reverse(lat+","+lon)
print(location)

Deseado, Santa Cruz, Argentina


In [11]:
# get information from the given list and parsed into a dict with raw function()
address = location.raw['address']
print(address)

{'state_district': 'Deseado', 'state': 'Santa Cruz', 'country': 'Argentina', 'country_code': 'ar'}


In [12]:
# traverse names for country name
country = address.get('country', '')
print(country)

Argentina


In [14]:
# full implementation using a for loop and exception handler
coor1_new = []
countries = []

for lat_lng in lat_lngs:
    lat = lat_lng[0]
    lon = lat_lng[1]
    location = geolocator.reverse(lat+","+lon)
    
    if location is not None:
        address = location.raw['address']
        country = address.get('country', '')
        coor1_new.append(lat_lng)
        countries.append(country)
    else:
        print(f'{lat_lng}: None')

['-55.5', '-70.5']: None
['-54.5', '-64.5']: None
['-54.5', '-37.5']: None
['-54.5', '158.5']: None
['-54.5', '159.5']: None
['-53.5', '73.5']: None
['-52.5', '-60.5']: None
['-50.5', '-68.5']: None
['-43.5', '-176.5']: None
['-43.5', '-64.5']: None
['-40.5', '145.5']: None
['-40.5', '147.5']: None
['-39.5', '-61.5']: None
['-39.5', '173.5']: None
['-39.5', '177.5']: None
['-38.5', '144.5']: None
['-38.5', '147.5']: None
['-37.5', '150.5']: None
['-36.5', '137.5']: None
['-36.5', '139.5']: None
['-36.5', '150.5']: None
['-35.5', '117.5']: None
['-34.5', '22.5']: None
['-34.5', '23.5']: None
['-34.5', '24.5']: None
['-34.5', '25.5']: None
['-34.5', '122.5']: None
['-34.5', '151.5']: None
['-33.5', '124.5']: None
['-32.5', '126.5']: None
['-32.5', '127.5']: None
['-32.5', '132.5']: None
['-31.5', '153.5']: None
['-31.5', '159.5']: None
['-30.5', '153.5']: None
['-26.5', '153.5']: None
['-25.5', '14.5']: None
['-25.5', '33.5']: None
['-25.5', '46.5']: None
['-25.5', '153.5']: None
['-24.5

GeocoderUnavailable: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /reverse?lat=49.5&lon=-0.5&format=json&addressdetails=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))

In [15]:
print(countries)

['Chile', 'Chile', 'Chile', 'Chile', 'Chile', 'Chile', 'Chile', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Chile', 'Chile', 'Chile', 'Chile', 'Chile', 'Argentina', 'Chile', 'Chile', 'Chile', 'Chile', 'Chile', 'Chile', 'Argentina', 'Falkland Islands', 'Chile', 'Chile', 'Chile', 'Argentina', 'Argentina', 'Argentina', 'Falkland Islands', 'Falkland Islands', 'Chile', 'Chile', 'Chile', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Chile', 'Chile', 'Chile', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'France', 'France', 'France', 'Chile', 'Chile', 'Chile', 'Chile', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Chile', 'Chile', 'Chile', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Chile', 'Chile', 'Chile', 'Chile', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'Argentina', 'New Zealand / Aotearoa', 'New Zealand / Aotearoa', 'New Zealand / Aotearoa', 'Chile',

In [16]:
len(countries)

10147

In [17]:
len(coor1_new)

10147

In [19]:
print(len(lat_lngs) - len(coor1_new))

7047


In [20]:
# create new dataframe to merge country labels with
country_df = pd.DataFrame({'coor1': coor1_new, 'country': countries})
country_df

Unnamed: 0,coor1,country
0,"[-55.5, -69.5]",Chile
1,"[-55.5, -68.5]",Chile
2,"[-55.5, -67.5]",Chile
3,"[-54.5, -72.5]",Chile
4,"[-54.5, -71.5]",Chile
...,...,...
10142,"[49.5, -56.5]",Canada
10143,"[49.5, -55.5]",Canada
10144,"[49.5, -54.5]",Canada
10145,"[49.5, -2.5]",Guernsey


In [21]:
# list comprehension to apply join method to coor1 column
country_df.coor1 = [" ".join(coor) for coor in country_df.coor1]
country_df

Unnamed: 0,coor1,country
0,-55.5 -69.5,Chile
1,-55.5 -68.5,Chile
2,-55.5 -67.5,Chile
3,-54.5 -72.5,Chile
4,-54.5 -71.5,Chile
...,...,...
10142,49.5 -56.5,Canada
10143,49.5 -55.5,Canada
10144,49.5 -54.5,Canada
10145,49.5 -2.5,Guernsey


In [22]:
data

Unnamed: 0,lat,lon,time,spi12,coor1,coor2
0,-55.5,-70.5,2012-01-16,-1.013668,-55.5 -70.5,"[-55.5, -70.5]"
1,-55.5,-70.5,2012-02-15,-1.142956,-55.5 -70.5,"[-55.5, -70.5]"
2,-55.5,-70.5,2012-03-16,-0.407607,-55.5 -70.5,"[-55.5, -70.5]"
3,-55.5,-70.5,2012-04-16,-0.726904,-55.5 -70.5,"[-55.5, -70.5]"
4,-55.5,-70.5,2012-05-16,-0.798656,-55.5 -70.5,"[-55.5, -70.5]"
...,...,...,...,...,...,...
206317,83.5,-25.5,2012-08-16,-0.060361,83.5 -25.5,"[83.5, -25.5]"
206318,83.5,-25.5,2012-09-16,-0.055499,83.5 -25.5,"[83.5, -25.5]"
206319,83.5,-25.5,2012-10-16,-0.056157,83.5 -25.5,"[83.5, -25.5]"
206320,83.5,-25.5,2012-11-16,-0.056157,83.5 -25.5,"[83.5, -25.5]"


In [23]:
# merge country information and original data
data_merge = pd.merge(data, country_df, how='inner', on='coor1')
data_merge

Unnamed: 0,lat,lon,time,spi12,coor1,coor2,country
0,-55.5,-69.5,2012-01-16,-2.178343,-55.5 -69.5,"[-55.5, -69.5]",Chile
1,-55.5,-69.5,2012-02-15,-2.066250,-55.5 -69.5,"[-55.5, -69.5]",Chile
2,-55.5,-69.5,2012-03-16,-1.502927,-55.5 -69.5,"[-55.5, -69.5]",Chile
3,-55.5,-69.5,2012-04-16,-1.350606,-55.5 -69.5,"[-55.5, -69.5]",Chile
4,-55.5,-69.5,2012-05-16,-1.475947,-55.5 -69.5,"[-55.5, -69.5]",Chile
...,...,...,...,...,...,...,...
121759,49.5,-1.5,2012-08-16,-0.288657,49.5 -1.5,"[49.5, -1.5]",France
121760,49.5,-1.5,2012-09-16,-0.227863,49.5 -1.5,"[49.5, -1.5]",France
121761,49.5,-1.5,2012-10-16,0.738760,49.5 -1.5,"[49.5, -1.5]",France
121762,49.5,-1.5,2012-11-16,1.282952,49.5 -1.5,"[49.5, -1.5]",France


In [25]:
# drop the coor2 column
data_merge = data_merge.drop('coor2', axis=1)

In [26]:
# export to CSV for testing
data_merge.to_csv('resources/dirty_countries.csv', index=False, header=True)