In [1]:
#Import dependencies
import pandas as pd

In [2]:
#Get the null dataframe to work on
file_path='../Resources/TrynaBDifficult.csv'
nulls=pd.read_csv(file_path)
nulls.head(30)

Unnamed: 0.1,Unnamed: 0,City,State/Province,Country,Latitude,Longitude
0,2,Cook Islands​,,Cook Islands​,,
1,13,Antigua and Barbuda​,,Antigua and Barbuda​,,
2,16,Turks and Caicos Islands​,,Turks and Caicos Islands​,,
3,24,Great Ocean Road,Victoria,Australia,-24.776109,134.755
4,27,Pembrokeshire,Wales,United Kingdom,54.702355,-3.276575
5,29,Mauritius​,,Mauritius​,,
6,31,Cayman Islands​,,Cayman Islands​,,
7,32,Seychelles​,,Seychelles​,,
8,36,Cumbria,England,United Kingdom,54.702355,-3.276575
9,41,Greater Geelong & the Bellarine,Victoria,Australia,-24.776109,134.755


In [3]:
#Drop the null row
nulls=nulls.dropna(subset=['City'])
nulls.head(30)

Unnamed: 0.1,Unnamed: 0,City,State/Province,Country,Latitude,Longitude
0,2,Cook Islands​,,Cook Islands​,,
1,13,Antigua and Barbuda​,,Antigua and Barbuda​,,
2,16,Turks and Caicos Islands​,,Turks and Caicos Islands​,,
3,24,Great Ocean Road,Victoria,Australia,-24.776109,134.755
4,27,Pembrokeshire,Wales,United Kingdom,54.702355,-3.276575
5,29,Mauritius​,,Mauritius​,,
6,31,Cayman Islands​,,Cayman Islands​,,
7,32,Seychelles​,,Seychelles​,,
8,36,Cumbria,England,United Kingdom,54.702355,-3.276575
9,41,Greater Geelong & the Bellarine,Victoria,Australia,-24.776109,134.755


In [4]:
#Manually fix the Honolulu row
nulls.loc[nulls['City'] == 'Honolulu', 'Country'] = 'United States'
import requests

In [5]:
#run the same lat-lng function from the other jupyter notebook on this dataframe, but drop the 'city' part of the request 
#running just country should net us the islands we want
def lat_lng(city, state, country):
    if city is None:
        return None, None
    if state is None:
        return None, None
    location=f'{city},{country}'
    response=requests.get(f"https://nominatim.openstreetmap.org/search?country={country}&format=json")
    if response.status_code==200:
        Json=response.json()
        if len(Json) > 0:
            # Extract latitude and longitude coordinates
            lat = float(Json[0]['lat'])
            lng = float(Json[0]['lon'])
            return lat, lng
        else:
            return None, None
    else:
        return None, None
nulls['Latitude'], nulls['Longitude'] = zip(*nulls.apply(lambda row: lat_lng(row['City'], row['State/Province'], row['Country']), axis=1))
nulls.head(30)


Unnamed: 0.1,Unnamed: 0,City,State/Province,Country,Latitude,Longitude
0,2,Cook Islands​,,Cook Islands​,-19.997,-157.786
1,13,Antigua and Barbuda​,,Antigua and Barbuda​,17.2235,-61.9555
2,16,Turks and Caicos Islands​,,Turks and Caicos Islands​,21.7217,-71.5528
3,24,Great Ocean Road,Victoria,Australia,-24.7761,134.755
4,27,Pembrokeshire,Wales,United Kingdom,54.7024,-3.27658
5,29,Mauritius​,,Mauritius​,-20.2759,57.5704
6,31,Cayman Islands​,,Cayman Islands​,19.7032,-79.9175
7,32,Seychelles​,,Seychelles​,-4.6575,55.454
8,36,Cumbria,England,United Kingdom,54.7024,-3.27658
9,41,Greater Geelong & the Bellarine,Victoria,Australia,-24.7761,134.755


In [6]:
#Drop everything that didn't populate a geolocation & drop duplicates as well
nulls=nulls.dropna(subset=['Latitude'])
nulls.drop_duplicates(subset=['Latitude'], inplace=True)

nulls.head(30)

Unnamed: 0.1,Unnamed: 0,City,State/Province,Country,Latitude,Longitude
0,2,Cook Islands​,,Cook Islands​,-19.997,-157.786
1,13,Antigua and Barbuda​,,Antigua and Barbuda​,17.2235,-61.9555
2,16,Turks and Caicos Islands​,,Turks and Caicos Islands​,21.7217,-71.5528
3,24,Great Ocean Road,Victoria,Australia,-24.7761,134.755
4,27,Pembrokeshire,Wales,United Kingdom,54.7024,-3.27658
5,29,Mauritius​,,Mauritius​,-20.2759,57.5704
6,31,Cayman Islands​,,Cayman Islands​,19.7032,-79.9175
7,32,Seychelles​,,Seychelles​,-4.6575,55.454
10,46,French Polynesia,France,France,46.6034,1.88833
11,47,Barbados​,,Barbados​,13.15,-59.525


In [7]:
from citipy import citipy

In [8]:
#Check that these are correct by running the lat-lng pairs through citipy
latitudes_list=nulls["Latitude"].tolist()
longitudes_list=nulls["Longitude"].tolist()
lat_lngs=zip(latitudes_list, longitudes_list)

print(lat_lngs)

<zip object at 0x00000194970B1708>


In [9]:
cities=[]
for lat_lng in lat_lngs:
    city= citipy.nearest_city(lat_lng[0],lat_lng[1]).city_name
    if city not in cities:
        cities.append(city)
print(cities)

['avarua', 'carlisle', 'cockburn harbour', 'alice springs', 'annan', 'dubreuil', 'bodden town', 'victoria', 'chateauroux', 'blackmans', 'albert town', 'meiringen', 'rentina', 'lexington', 'isangel', 'micoud', 'andros town']


In [10]:
#confirmed that these are not correct geolocations for anything that isn't an island so we're going to drop all the rows that have
#bad data
Islands=nulls[nulls['State/Province'].isnull()]
Islands.head(30) 


Unnamed: 0.1,Unnamed: 0,City,State/Province,Country,Latitude,Longitude
0,2,Cook Islands​,,Cook Islands​,-19.997,-157.786
1,13,Antigua and Barbuda​,,Antigua and Barbuda​,17.2235,-61.9555
2,16,Turks and Caicos Islands​,,Turks and Caicos Islands​,21.7217,-71.5528
5,29,Mauritius​,,Mauritius​,-20.2759,57.5704
6,31,Cayman Islands​,,Cayman Islands​,19.7032,-79.9175
7,32,Seychelles​,,Seychelles​,-4.6575,55.454
11,47,Barbados​,,Barbados​,13.15,-59.525
14,56,Jamaica​,,Jamaica​,18.1851,-77.3948
19,86,Fiji​,,Fiji​,-18.124,179.012
20,89,Saint Lucia​,,Saint Lucia​,13.825,-60.975


In [12]:
#roll that out to a csv to pick back up on other notebook
Islands.to_csv('../Resources/FixedNulls.csv',index=False)