# Load CSV file into memory

In [1]:
import pandas as pd
import requests
import json
import time
from pandas.io.json import json_normalize
 
df = pd.read_csv('Global_country_populations_2013.csv') 
df = df[['CountryName','lat','lon']]
df.head()

Unnamed: 0,CountryName,lat,lon
0,Aruba,12.52368,-70.03487
1,Andorra,42.5592,1.5702
2,Afghanistan,34.53156,69.12535
3,Angola,-12.3329,16.8309
4,Albania,41.32233,19.82516


# Define function to call LocationIQ API for reverse lookup of lat and long

In [2]:
def get_reverse_geocode_data(row, key):
    try: 
        API_KEY = key
        url = 'https://us1.locationiq.org/v1/reverse.php?key=' + API_KEY + '&lat=' + str(row['lat']) + '&lon=' + str(row['lon']) + '&format=json'
        
        response = (requests.get(url).text)
        #print(API_KEY)
        response_json = json.loads(response)
        time.sleep(0.5)
        return response_json
    
    except Exception as e:
        raise e

# Call the function get_reverse_geocode_data using df columns as parameters

In [11]:
with open("config/secret.json", "r") as read_file:
    data = json.load(read_file)
    key = data['key']
    
read_file.close()
        
df['API_response'] = df.apply(get_reverse_geocode_data, args=(key,), axis=1)
df['API_response'].head()


0    {'place_id': '179547064', 'licence': '© Locati...
1    {'place_id': '102241428', 'licence': '© Locati...
2    {'place_id': '79678403', 'licence': '© Locatio...
3    {'place_id': '119510435', 'licence': '© Locati...
4    {'place_id': '65011078', 'licence': '© Locatio...
Name: API_response, dtype: object

# Normalize or flatten the JSON response and add unique identifiers.

In [14]:
new_df = json_normalize(df['API_response'])
new_df = new_df[['lat','lon','display_name']]
new_df[0:10]

Unnamed: 0,lat,lon,display_name
0,12.5235876,-70.0342341218734,"University Of Aruba. UA, Caya Ernesto O. Petr..."
1,42.556065,1.5716344,"CS-340, Ordino, AD300, Andorra"
2,34.5297584,69.1233639,"Kārte Parwān, Kabul, 0093, Afghanistan"
3,-12.3641268,16.8400437,"Kuito, Bié, Angola"
4,41.3221849,19.825391,"Tobacco Station, Rruga e Elbasanit, Lapraka, T..."
5,24.4747961,54.3705762,"Abu Dhabi, 2157, United Arab Emirates"
6,-34.60881915,-58.3737098017414,"Cabildo, 65, Bolívar, Microcentro, Comuna 1, M..."
7,40.17387085,44.514053861989,Pedagogical University after Khachatur Abovyan...
8,-14.2710076,-170.6968418,"Route 001, Anua, Ma'oputasi County, Eastern Di..."
9,17.0843995,-61.8108206,"Sea View Farm, ANU, Antigua and Barbuda"


Add unique identifier for each row.

In [13]:
import uuid
new_df['id'] = pd.Series([uuid.uuid1() for i in range(len(new_df))])
new_df[0:10]

Unnamed: 0,lat,lon,display_name,id
0,12.5235876,-70.0342341218734,"University Of Aruba. UA, Caya Ernesto O. Petr...",d36a38e8-807a-11e8-b743-3c15c2e27af4
1,42.556065,1.5716344,"CS-340, Ordino, AD300, Andorra",d36b863a-807a-11e8-b596-3c15c2e27af4
2,34.5297584,69.1233639,"Kārte Parwān, Kabul, 0093, Afghanistan",d36b8fd8-807a-11e8-a5ce-3c15c2e27af4
3,-12.3641268,16.8400437,"Kuito, Bié, Angola",d36b900a-807a-11e8-8537-3c15c2e27af4
4,41.3221849,19.825391,"Tobacco Station, Rruga e Elbasanit, Lapraka, T...",d36b903a-807a-11e8-a410-3c15c2e27af4
5,24.4747961,54.3705762,"Abu Dhabi, 2157, United Arab Emirates",d36b9064-807a-11e8-b14e-3c15c2e27af4
6,-34.60881915,-58.3737098017414,"Cabildo, 65, Bolívar, Microcentro, Comuna 1, M...",d36b908c-807a-11e8-adcb-3c15c2e27af4
7,40.17387085,44.514053861989,Pedagogical University after Khachatur Abovyan...,d36b90be-807a-11e8-a3b9-3c15c2e27af4
8,-14.2710076,-170.6968418,"Route 001, Anua, Ma'oputasi County, Eastern Di...",d36b91c2-807a-11e8-9a0e-3c15c2e27af4
9,17.0843995,-61.8108206,"Sea View Farm, ANU, Antigua and Barbuda",d36b921c-807a-11e8-a00e-3c15c2e27af4


# Generate CSV file and upload to S3 bucket

In [None]:
new_df.to_csv(path_or_buf=file_name,index=False)

import tinys3
import os
access_key = 'xxxxxxxxx'
secret_key = 'xxxxxxxxx'
endpoint = 'xxxxxxxx'
Bucket_name = 'xxxxxxxx'
conn = tinys3.Connection(access_key, secret_key, tls=False, endpoint)
f = open(file_name,'rb')
conn.upload(file_name, f, Bucket_name)
f.close()
os.remove(file_name)