In [1]:
import pandas as pd
import difflib
from IPython.display import clear_output

In [2]:
# Load city names
cities_names = pd.read_csv('wikitravel_city_URLs.csv', index_col = 0)
cities_names.columns = ['Country','City','URL']
cities_names.head()

Unnamed: 0,Country,City,URL
0,Afghanistan,Herat,wikitravel.org/en/Herat
1,Afghanistan,Kabul,wikitravel.org/en/Kabul
2,Afghanistan,Jalalabad,wikitravel.org/en/Jalalabad
3,Afghanistan,Kandahar,wikitravel.org/en/Kandahar
4,Afghanistan,Kunduz,wikitravel.org/en/Kunduz


In [3]:
# Long, lat, altitude information from
# https://github.com/bahar/WorldCityLocations

locations = pd.read_csv('data/World_Cities_Location_table.csv',sep=';', header=None)
locations.drop(columns = 0,inplace = True)
locations.columns = ['Country','City','Lat','Lon','Alt']

In [4]:
# Build extra column for later joining
locations['City_alternative'] = locations['City']

In [5]:
locations.head()

Unnamed: 0,Country,City,Lat,Lon,Alt,City_alternative
0,Afghanistan,Kabul,34.516667,69.183334,1808.0,Kabul
1,Afghanistan,Kandahar,31.61,65.699997,1015.0,Kandahar
2,Afghanistan,Mazar-e Sharif,36.706944,67.112221,369.0,Mazar-e Sharif
3,Afghanistan,Herat,34.34,62.189999,927.0,Herat
4,Afghanistan,Jalalabad,34.42,70.449997,573.0,Jalalabad


In [6]:
# Second source for lon lat
# https://www.maxmind.com/en/free-world-cities-database

locations2 = pd.read_table('data/worldcitiespop.txt', encoding='latin-1', delimiter=',')
locations2.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,Country,City,AccentCity,Region,Population,Latitude,Longitude
0,ad,aixas,Aixàs,6,,42.483333,1.466667
1,ad,aixirivali,Aixirivali,6,,42.466667,1.5
2,ad,aixirivall,Aixirivall,6,,42.466667,1.5
3,ad,aixirvall,Aixirvall,6,,42.466667,1.5
4,ad,aixovall,Aixovall,6,,42.466667,1.483333


In [7]:
cities_df = pd.read_csv('cities_df', index_col = 0)
cities_df['City'] = cities_df['City'].str.replace(r" \(.*\)","") # Remove brackets from city names
cities_df['Country'] = cities_df['Country'].str.replace("United States of America","United States") 
cities_df['Country'] = cities_df['Country'].str.replace("Russian Federation","Russia") 
cities_df.head()

Unnamed: 0,Country,City,Understand,Get in,Get around,See,Do,Buy,Eat,Sleep,Cope,Stay safe,Get out
0,Afghanistan,Herat,Herat is the second largest city in Afghanista...,The Herat International Airport is situated 15...,,"Ghala Ekhteyaradin, Takht Safar, Bagh Milat, ...",Take a shower in the huge subterranean Hammams.,There are several antique shops on the north ...,Herat has a mixture of traditional and modern ...,"Budget[edit] Mowafaq Hotel, northeast corner ...",,Herat is one of the safer cities in Afghanista...,Chisht-i-Sharif is some 177 km from Herat city...
1,Afghanistan,Kabul,Kabul is a very historic city of the region th...,By plane[edit] Kabul International Airport (IA...,Maps of Kabul are available from Afghanistan I...,Bagh-e Babur (Gardens of Babur). The gardens ...,Kabul Wall. A pleasant hike with rewarding vi...,The Share-e Naw area has some shops. The Kab...,The once thriving restaurant scene in Kabul ha...,"Kabul is not a cheap place to stay, principall...",Read the Scene magazine for restaurant reviews...,Kabul is generally considered one of the safer...,Most expats take any opportunity they can to l...
2,Afghanistan,Jalalabad,,Jalalabad is on the major highway that links K...,,The large reservoir near Sarobi. This is loca...,Go fishing along the river side. Go swimming .,Handi Craft Handicraft is one of most famous ...,Pakora go for special “Pakora” Food in Hindu ...,The government hotel Spingar is on the easter...,,,This article is an outline and needs more co...
3,Afghanistan,Kandahar,Kandahār or Qandahār (Pashto: کندهار ) (Persia...,By plane[edit] Kandahar International Airport ...,,"Kandahar Museum, (Western end of the Eidgah ...",,"Afghanistan International Bank (AIB), Kabul Ba...","There are plenty of food choices in Kandahar, ...","Budget[edit] Armani Hotel, (Two miles from c...",,"As of 2013, the city centre of Kandahar is qui...",Kabul Helmand Harat Zabul Spin Boldak (Spin Bu...
4,Afghanistan,Kunduz,,Most visitors will arrive by road from Pol-e K...,"The city is fairly small. The local roads, li...",There's not much for tourists here.,,,The usual Afghan fare - kebab or lamb with ric...,,,,This article is an outline and needs more ...


In [8]:
# Auto correct city names by finding closest match in country

def nameguess(row):
    possible_rows = locations[locations['Country']==row['Country']]
    possible_cities = possible_rows['City'].values.tolist()
    cleaned_possible_cities = [item for item in possible_cities if isinstance(item, str)]
    best_match = difflib.get_close_matches(row['City'], cleaned_possible_cities,1)
    if len(best_match)==1:
        return best_match[0]
    else:
        return None

cities_df['City_alternative'] = cities_df.apply(nameguess, axis = 1)

In [9]:
cities_df.head()

Unnamed: 0,Country,City,Understand,Get in,Get around,See,Do,Buy,Eat,Sleep,Cope,Stay safe,Get out,City_alternative
0,Afghanistan,Herat,Herat is the second largest city in Afghanista...,The Herat International Airport is situated 15...,,"Ghala Ekhteyaradin, Takht Safar, Bagh Milat, ...",Take a shower in the huge subterranean Hammams.,There are several antique shops on the north ...,Herat has a mixture of traditional and modern ...,"Budget[edit] Mowafaq Hotel, northeast corner ...",,Herat is one of the safer cities in Afghanista...,Chisht-i-Sharif is some 177 km from Herat city...,Herat
1,Afghanistan,Kabul,Kabul is a very historic city of the region th...,By plane[edit] Kabul International Airport (IA...,Maps of Kabul are available from Afghanistan I...,Bagh-e Babur (Gardens of Babur). The gardens ...,Kabul Wall. A pleasant hike with rewarding vi...,The Share-e Naw area has some shops. The Kab...,The once thriving restaurant scene in Kabul ha...,"Kabul is not a cheap place to stay, principall...",Read the Scene magazine for restaurant reviews...,Kabul is generally considered one of the safer...,Most expats take any opportunity they can to l...,Kabul
2,Afghanistan,Jalalabad,,Jalalabad is on the major highway that links K...,,The large reservoir near Sarobi. This is loca...,Go fishing along the river side. Go swimming .,Handi Craft Handicraft is one of most famous ...,Pakora go for special “Pakora” Food in Hindu ...,The government hotel Spingar is on the easter...,,,This article is an outline and needs more co...,Jalalabad
3,Afghanistan,Kandahar,Kandahār or Qandahār (Pashto: کندهار ) (Persia...,By plane[edit] Kandahar International Airport ...,,"Kandahar Museum, (Western end of the Eidgah ...",,"Afghanistan International Bank (AIB), Kabul Ba...","There are plenty of food choices in Kandahar, ...","Budget[edit] Armani Hotel, (Two miles from c...",,"As of 2013, the city centre of Kandahar is qui...",Kabul Helmand Harat Zabul Spin Boldak (Spin Bu...,Kandahar
4,Afghanistan,Kunduz,,Most visitors will arrive by road from Pol-e K...,"The city is fairly small. The local roads, li...",There's not much for tourists here.,,,The usual Afghan fare - kebab or lamb with ric...,,,,This article is an outline and needs more ...,Konduz


In [10]:
# Join dataframes to add lat/lon to cities df
cities_geo_df = pd.merge(cities_df, locations[['Country','City_alternative','Lat','Lon','Alt']], how = 'left',
                         on = ['Country','City_alternative'])

In [11]:
cities_geo_df.head()

Unnamed: 0,Country,City,Understand,Get in,Get around,See,Do,Buy,Eat,Sleep,Cope,Stay safe,Get out,City_alternative,Lat,Lon,Alt
0,Afghanistan,Herat,Herat is the second largest city in Afghanista...,The Herat International Airport is situated 15...,,"Ghala Ekhteyaradin, Takht Safar, Bagh Milat, ...",Take a shower in the huge subterranean Hammams.,There are several antique shops on the north ...,Herat has a mixture of traditional and modern ...,"Budget[edit] Mowafaq Hotel, northeast corner ...",,Herat is one of the safer cities in Afghanista...,Chisht-i-Sharif is some 177 km from Herat city...,Herat,34.34,62.189999,927.0
1,Afghanistan,Kabul,Kabul is a very historic city of the region th...,By plane[edit] Kabul International Airport (IA...,Maps of Kabul are available from Afghanistan I...,Bagh-e Babur (Gardens of Babur). The gardens ...,Kabul Wall. A pleasant hike with rewarding vi...,The Share-e Naw area has some shops. The Kab...,The once thriving restaurant scene in Kabul ha...,"Kabul is not a cheap place to stay, principall...",Read the Scene magazine for restaurant reviews...,Kabul is generally considered one of the safer...,Most expats take any opportunity they can to l...,Kabul,34.516667,69.183334,1808.0
2,Afghanistan,Jalalabad,,Jalalabad is on the major highway that links K...,,The large reservoir near Sarobi. This is loca...,Go fishing along the river side. Go swimming .,Handi Craft Handicraft is one of most famous ...,Pakora go for special “Pakora” Food in Hindu ...,The government hotel Spingar is on the easter...,,,This article is an outline and needs more co...,Jalalabad,34.42,70.449997,573.0
3,Afghanistan,Kandahar,Kandahār or Qandahār (Pashto: کندهار ) (Persia...,By plane[edit] Kandahar International Airport ...,,"Kandahar Museum, (Western end of the Eidgah ...",,"Afghanistan International Bank (AIB), Kabul Ba...","There are plenty of food choices in Kandahar, ...","Budget[edit] Armani Hotel, (Two miles from c...",,"As of 2013, the city centre of Kandahar is qui...",Kabul Helmand Harat Zabul Spin Boldak (Spin Bu...,Kandahar,31.61,65.699997,1015.0
4,Afghanistan,Kunduz,,Most visitors will arrive by road from Pol-e K...,"The city is fairly small. The local roads, li...",There's not much for tourists here.,,,The usual Afghan fare - kebab or lamb with ric...,,,,This article is an outline and needs more ...,Konduz,36.72,68.860001,394.0


In [12]:
len(cities_geo_df)

2706

In [13]:
# How many cities do not have Lon/Lat info from first pass
cities_geo_df['Lat'].isna().sum()

658

In [14]:
# Try second locations
def nameguess2(row):
    if row['City_alternative']==None: # If city has no match with coordinate base the first round
        print(row['Country']+' '+row['City'])
        clear_output(wait=True)
        second_match = difflib.get_close_matches(row['City'], locations2['AccentCity'].values,1)
        if len(second_match)==1:
            return second_match[0]
        else:
            return None
    else:
        return None

cities_geo_df['City_alternative2'] = cities_geo_df.apply(nameguess2, axis = 1)

KeyboardInterrupt: 

In [None]:
cities_geo_df[cities_geo_df['Lat'].isna()]

In [None]:
cities_geo_df.to_csv('TEMP SAVE WITH SECOND FILLED CITIES.csv')

In [None]:
cities_geo_df.to_csv('cities_geo_df.csv')