In [48]:
import pandas as pd
import difflib
from IPython.display import clear_output

In [2]:
# Load city names
cities_names = pd.read_csv('wikitravel_city_URLs.csv', index_col = 0)
cities_names.columns = ['Country','City','URL']
cities_names.head()

Unnamed: 0,Country,City,URL
0,Afghanistan,Herat,wikitravel.org/en/Herat
1,Afghanistan,Kabul,wikitravel.org/en/Kabul
2,Afghanistan,Jalalabad,wikitravel.org/en/Jalalabad
3,Afghanistan,Kandahar,wikitravel.org/en/Kandahar
4,Afghanistan,Kunduz,wikitravel.org/en/Kunduz


In [3]:
# Long, lat, altitude information from
# https://github.com/bahar/WorldCityLocations

locations = pd.read_csv('data/World_Cities_Location_table.csv',sep=';', header=None)
locations.drop(columns = 0,inplace = True)
locations.columns = ['Country','City','Lat','Lon','Alt']

In [4]:
# Build extra column for later joining
locations['City_alternative'] = locations['City']

In [5]:
locations.head()

Unnamed: 0,Country,City,Lat,Lon,Alt,City_alternative
0,Afghanistan,Kabul,34.516667,69.183334,1808.0,Kabul
1,Afghanistan,Kandahar,31.61,65.699997,1015.0,Kandahar
2,Afghanistan,Mazar-e Sharif,36.706944,67.112221,369.0,Mazar-e Sharif
3,Afghanistan,Herat,34.34,62.189999,927.0,Herat
4,Afghanistan,Jalalabad,34.42,70.449997,573.0,Jalalabad


In [6]:
# Second source for lon lat
# https://www.maxmind.com/en/free-world-cities-database

locations2 = pd.read_table('data/worldcitiespop.txt', encoding='latin-1', delimiter=',')
locations2.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,Country,City,AccentCity,Region,Population,Latitude,Longitude
0,ad,aixas,Aixàs,6,,42.483333,1.466667
1,ad,aixirivali,Aixirivali,6,,42.466667,1.5
2,ad,aixirivall,Aixirivall,6,,42.466667,1.5
3,ad,aixirvall,Aixirvall,6,,42.466667,1.5
4,ad,aixovall,Aixovall,6,,42.466667,1.483333


In [7]:
cities_df = pd.read_csv('cities_df', index_col = 0)
cities_df['City'] = cities_df['City'].str.replace(r" \(.*\)","") # Remove brackets from city names
cities_df['Country'] = cities_df['Country'].str.replace("United States of America","United States") 
cities_df['Country'] = cities_df['Country'].str.replace("Russian Federation","Russia") 
cities_df.head()

Unnamed: 0,Country,City,Understand,Get in,Get around,See,Do,Buy,Eat,Sleep,Cope,Stay safe,Get out
0,Afghanistan,Herat,Herat is the second largest city in Afghanista...,The Herat International Airport is situated 15...,,"Ghala Ekhteyaradin, Takht Safar, Bagh Milat, ...",Take a shower in the huge subterranean Hammams.,There are several antique shops on the north ...,Herat has a mixture of traditional and modern ...,"Budget[edit] Mowafaq Hotel, northeast corner ...",,Herat is one of the safer cities in Afghanista...,Chisht-i-Sharif is some 177 km from Herat city...
1,Afghanistan,Kabul,Kabul is a very historic city of the region th...,By plane[edit] Kabul International Airport (IA...,Maps of Kabul are available from Afghanistan I...,Bagh-e Babur (Gardens of Babur). The gardens ...,Kabul Wall. A pleasant hike with rewarding vi...,The Share-e Naw area has some shops. The Kab...,The once thriving restaurant scene in Kabul ha...,"Kabul is not a cheap place to stay, principall...",Read the Scene magazine for restaurant reviews...,Kabul is generally considered one of the safer...,Most expats take any opportunity they can to l...
2,Afghanistan,Jalalabad,,Jalalabad is on the major highway that links K...,,The large reservoir near Sarobi. This is loca...,Go fishing along the river side. Go swimming .,Handi Craft Handicraft is one of most famous ...,Pakora go for special “Pakora” Food in Hindu ...,The government hotel Spingar is on the easter...,,,This article is an outline and needs more co...
3,Afghanistan,Kandahar,Kandahār or Qandahār (Pashto: کندهار ) (Persia...,By plane[edit] Kandahar International Airport ...,,"Kandahar Museum, (Western end of the Eidgah ...",,"Afghanistan International Bank (AIB), Kabul Ba...","There are plenty of food choices in Kandahar, ...","Budget[edit] Armani Hotel, (Two miles from c...",,"As of 2013, the city centre of Kandahar is qui...",Kabul Helmand Harat Zabul Spin Boldak (Spin Bu...
4,Afghanistan,Kunduz,,Most visitors will arrive by road from Pol-e K...,"The city is fairly small. The local roads, li...",There's not much for tourists here.,,,The usual Afghan fare - kebab or lamb with ric...,,,,This article is an outline and needs more ...


In [8]:
# Auto correct city names by finding closest match in country

def nameguess(row):
    possible_rows = locations[locations['Country']==row['Country']]
    possible_cities = possible_rows['City'].values.tolist()
    cleaned_possible_cities = [item for item in possible_cities if isinstance(item, str)]
    best_match = difflib.get_close_matches(row['City'], cleaned_possible_cities,1)
    if len(best_match)==1:
        return best_match[0]
    else:
        return None

cities_df['City_alternative'] = cities_df.apply(nameguess, axis = 1)

In [9]:
cities_df.head()

Unnamed: 0,Country,City,Understand,Get in,Get around,See,Do,Buy,Eat,Sleep,Cope,Stay safe,Get out,City_alternative
0,Afghanistan,Herat,Herat is the second largest city in Afghanista...,The Herat International Airport is situated 15...,,"Ghala Ekhteyaradin, Takht Safar, Bagh Milat, ...",Take a shower in the huge subterranean Hammams.,There are several antique shops on the north ...,Herat has a mixture of traditional and modern ...,"Budget[edit] Mowafaq Hotel, northeast corner ...",,Herat is one of the safer cities in Afghanista...,Chisht-i-Sharif is some 177 km from Herat city...,Herat
1,Afghanistan,Kabul,Kabul is a very historic city of the region th...,By plane[edit] Kabul International Airport (IA...,Maps of Kabul are available from Afghanistan I...,Bagh-e Babur (Gardens of Babur). The gardens ...,Kabul Wall. A pleasant hike with rewarding vi...,The Share-e Naw area has some shops. The Kab...,The once thriving restaurant scene in Kabul ha...,"Kabul is not a cheap place to stay, principall...",Read the Scene magazine for restaurant reviews...,Kabul is generally considered one of the safer...,Most expats take any opportunity they can to l...,Kabul
2,Afghanistan,Jalalabad,,Jalalabad is on the major highway that links K...,,The large reservoir near Sarobi. This is loca...,Go fishing along the river side. Go swimming .,Handi Craft Handicraft is one of most famous ...,Pakora go for special “Pakora” Food in Hindu ...,The government hotel Spingar is on the easter...,,,This article is an outline and needs more co...,Jalalabad
3,Afghanistan,Kandahar,Kandahār or Qandahār (Pashto: کندهار ) (Persia...,By plane[edit] Kandahar International Airport ...,,"Kandahar Museum, (Western end of the Eidgah ...",,"Afghanistan International Bank (AIB), Kabul Ba...","There are plenty of food choices in Kandahar, ...","Budget[edit] Armani Hotel, (Two miles from c...",,"As of 2013, the city centre of Kandahar is qui...",Kabul Helmand Harat Zabul Spin Boldak (Spin Bu...,Kandahar
4,Afghanistan,Kunduz,,Most visitors will arrive by road from Pol-e K...,"The city is fairly small. The local roads, li...",There's not much for tourists here.,,,The usual Afghan fare - kebab or lamb with ric...,,,,This article is an outline and needs more ...,Konduz


In [10]:
# Join dataframes to add lat/lon to cities df
cities_geo_df = pd.merge(cities_df, locations[['Country','City_alternative','Lat','Lon','Alt']], how = 'left',
                         on = ['Country','City_alternative'])

In [11]:
cities_geo_df.head()

Unnamed: 0,Country,City,Understand,Get in,Get around,See,Do,Buy,Eat,Sleep,Cope,Stay safe,Get out,City_alternative,Lat,Lon,Alt
0,Afghanistan,Herat,Herat is the second largest city in Afghanista...,The Herat International Airport is situated 15...,,"Ghala Ekhteyaradin, Takht Safar, Bagh Milat, ...",Take a shower in the huge subterranean Hammams.,There are several antique shops on the north ...,Herat has a mixture of traditional and modern ...,"Budget[edit] Mowafaq Hotel, northeast corner ...",,Herat is one of the safer cities in Afghanista...,Chisht-i-Sharif is some 177 km from Herat city...,Herat,34.34,62.189999,927.0
1,Afghanistan,Kabul,Kabul is a very historic city of the region th...,By plane[edit] Kabul International Airport (IA...,Maps of Kabul are available from Afghanistan I...,Bagh-e Babur (Gardens of Babur). The gardens ...,Kabul Wall. A pleasant hike with rewarding vi...,The Share-e Naw area has some shops. The Kab...,The once thriving restaurant scene in Kabul ha...,"Kabul is not a cheap place to stay, principall...",Read the Scene magazine for restaurant reviews...,Kabul is generally considered one of the safer...,Most expats take any opportunity they can to l...,Kabul,34.516667,69.183334,1808.0
2,Afghanistan,Jalalabad,,Jalalabad is on the major highway that links K...,,The large reservoir near Sarobi. This is loca...,Go fishing along the river side. Go swimming .,Handi Craft Handicraft is one of most famous ...,Pakora go for special “Pakora” Food in Hindu ...,The government hotel Spingar is on the easter...,,,This article is an outline and needs more co...,Jalalabad,34.42,70.449997,573.0
3,Afghanistan,Kandahar,Kandahār or Qandahār (Pashto: کندهار ) (Persia...,By plane[edit] Kandahar International Airport ...,,"Kandahar Museum, (Western end of the Eidgah ...",,"Afghanistan International Bank (AIB), Kabul Ba...","There are plenty of food choices in Kandahar, ...","Budget[edit] Armani Hotel, (Two miles from c...",,"As of 2013, the city centre of Kandahar is qui...",Kabul Helmand Harat Zabul Spin Boldak (Spin Bu...,Kandahar,31.61,65.699997,1015.0
4,Afghanistan,Kunduz,,Most visitors will arrive by road from Pol-e K...,"The city is fairly small. The local roads, li...",There's not much for tourists here.,,,The usual Afghan fare - kebab or lamb with ric...,,,,This article is an outline and needs more ...,Konduz,36.72,68.860001,394.0


In [12]:
len(cities_geo_df)

2706

In [13]:
# How many cities do not have Lon/Lat info from first pass
cities_geo_df['Lat'].isna().sum()

658

In [14]:
# Try second locations
def nameguess2(row):
    if row['City_alternative']==None: # If city has no match with coordinate base the first round
        print(row['Country']+' '+row['City'])
        clear_output(wait=True)
        second_match = difflib.get_close_matches(row['City'], locations2['AccentCity'].values,1)
        if len(second_match)==1:
            return second_match[0]
        else:
            return None
    else:
        return None

cities_geo_df['City_alternative2'] = cities_geo_df.apply(nameguess2, axis = 1)

Western Sahara El Aaiun


In [15]:
cities_geo_df[cities_geo_df['Lat'].isna()]

Unnamed: 0,Country,City,Understand,Get in,Get around,See,Do,Buy,Eat,Sleep,Cope,Stay safe,Get out,City_alternative,Lat,Lon,Alt,City_alternative2
13,Algeria,Ghardaïa,,There's only one way you are likely to get to ...,,M'zab architecture in each of the fortified c...,,,,,,,This article is an outline and needs more ...,,,,,Ghardaïa
19,Algeria,Sétif,,,,,,,,,,,Djemila a UNESCO World Heritage Site due to i...,,,,,Sétif
21,American Samoa,Pago Pago,,By plane[edit] Pago Pago is approximately 16 m...,,Jean P. Haydon Museum. A museum that showcases...,,,,"Sadie's by the Sea, ☎ (684) 633-5981 ([email ...",,,,,,,,Pago Pago
28,Antigua and Barbuda,Saint John's,,By plane[edit] VC Bird International Airport i...,Many Taxi services exist and one may even rent...,Carnival - annual festival from late July to ...,Sample local cuisine and drinks. Mingle with...,"See this topic under ""Antigua"". (edit) 1000...",,At one of the many hotels or guest houses. For...,"Embassies[edit] Venezuela, Old Parham Road P...",,Barbuda Falmouth English Harbour This art...,,,,,Saint John's
39,Argentina,Morón,,The main highway from the Carretera Central in...,,,Visit the Laguna de Leche (Lake of Milk) - the...,,,"The Hotel Morón, close to the centre of town, ...",,,This article is an outline and needs more co...,,,,,Morón
53,Aruba,Oranjestad,Oranjestad consists of hundreds of local store...,,To get into the downtown one may suggest a sim...,"Fort Zoutman/Historical Museum [9], Aruba’s ol...",Oranjestad is a fun place for water activities...,For those travelers who have a budget keep to ...,While in Oranjestad you can look any which dir...,When one thinks of hospitality they think of h...,Supporting facilities are also very prevalent ...,Most likely the only crime one may come across...,This article is an outline and needs more co...,,,,,Oranjestad
66,Australia,Sunshine Coast,Renowned for its relaxed approach to Queenslan...,By car[edit] The Sunshine Coast is between 1 a...,"All Age Car Rentals, [6]. Choice Car Rental...","Australia Zoo [16], owned and run by the famil...",Dive the wreck of the HMAS Brisbane with Sunre...,,Thai Street - Located in Alexandra Headland a...,,,,Brisbane Gold Coast This article is an o...,,,,,Sunshine Camp
76,Bahamas,Nassau,Founded around 1650 by the British as Charles ...,By air[edit] Nassau's Lynden Pindling Internat...,By water taxi[edit] A water taxi service is an...,"Take a walk around Old Town, an interesting mi...",The bus tours are pretty interesting. They'll...,"Straw Market, Bay St. Originally a locals' m...",Get out of the hotel and try real Bahamaian fa...,Many of Nassau's hotels are located outside th...,"Embassies[edit] Greece, Olympia Building, We...","The ""Over-the-Hill"" area south of downtown is ...",Paradise Island Located just across a bridge ...,,,,,Nassau
93,Belarus,Gomel,,"By train or car from Minsk, Kiev, and Moscow.",Trolley buses and normal buses in Gomel are pl...,There is a fantastic military museum about one...,,,Located in one corner of the Gomel market is a...,"Hotel Homel, (next door to train station). Ni...",,Gomel is a very safe city. You would have to b...,This article is an outline and needs more ...,,,,,Gomel
97,Belarus,Mogilev,"Mahilioŭ is a city in eastern Belarus, the thi...",By plane[edit] Threre are no direct flights to...,"Local transportation system includes buses, tr...",The best is to walk throgh pedestrian Leninska...,Festivals[edit] М@rt.kantakt [7] — internatio...,The most well known grocery stores are - Gippo...,"Budget[edit] Buffet - 5, Kamsamoĺskaja St. (i...",Budget[edit] Mid-range[edit] Hotel Mogilev - ...,,Mahilioŭ usually is a safe place but everyone ...,Smolensk - A very worthwhile day or half-day ...,,,,,Mogilev


In [38]:
# Join dataframes to add lat/lon to cities df
cities_geo2_df = pd.merge(cities_geo_df, locations2[['Latitude','Longitude','AccentCity']], how = 'left',
                         left_on = ['City_alternative2'], right_on = ['AccentCity'])

In [39]:
cities_geo2_df

Unnamed: 0,Country,City,Understand,Get in,Get around,See,Do,Buy,Eat,Sleep,...,Stay safe,Get out,City_alternative,Lat,Lon,Alt,City_alternative2,Latitude,Longitude,AccentCity
0,Afghanistan,Herat,Herat is the second largest city in Afghanista...,The Herat International Airport is situated 15...,,"Ghala Ekhteyaradin, Takht Safar, Bagh Milat, ...",Take a shower in the huge subterranean Hammams.,There are several antique shops on the north ...,Herat has a mixture of traditional and modern ...,"Budget[edit] Mowafaq Hotel, northeast corner ...",...,Herat is one of the safer cities in Afghanista...,Chisht-i-Sharif is some 177 km from Herat city...,Herat,34.340000,62.189999,927.0,,,,
1,Afghanistan,Kabul,Kabul is a very historic city of the region th...,By plane[edit] Kabul International Airport (IA...,Maps of Kabul are available from Afghanistan I...,Bagh-e Babur (Gardens of Babur). The gardens ...,Kabul Wall. A pleasant hike with rewarding vi...,The Share-e Naw area has some shops. The Kab...,The once thriving restaurant scene in Kabul ha...,"Kabul is not a cheap place to stay, principall...",...,Kabul is generally considered one of the safer...,Most expats take any opportunity they can to l...,Kabul,34.516667,69.183334,1808.0,,,,
2,Afghanistan,Jalalabad,,Jalalabad is on the major highway that links K...,,The large reservoir near Sarobi. This is loca...,Go fishing along the river side. Go swimming .,Handi Craft Handicraft is one of most famous ...,Pakora go for special “Pakora” Food in Hindu ...,The government hotel Spingar is on the easter...,...,,This article is an outline and needs more co...,Jalalabad,34.420000,70.449997,573.0,,,,
3,Afghanistan,Kandahar,Kandahār or Qandahār (Pashto: کندهار ) (Persia...,By plane[edit] Kandahar International Airport ...,,"Kandahar Museum, (Western end of the Eidgah ...",,"Afghanistan International Bank (AIB), Kabul Ba...","There are plenty of food choices in Kandahar, ...","Budget[edit] Armani Hotel, (Two miles from c...",...,"As of 2013, the city centre of Kandahar is qui...",Kabul Helmand Harat Zabul Spin Boldak (Spin Bu...,Kandahar,31.610000,65.699997,1015.0,,,,
4,Afghanistan,Kunduz,,Most visitors will arrive by road from Pol-e K...,"The city is fairly small. The local roads, li...",There's not much for tourists here.,,,The usual Afghan fare - kebab or lamb with ric...,,...,,This article is an outline and needs more ...,Konduz,36.720000,68.860001,394.0,,,,
5,Afghanistan,Mazar-i-Sharif,"With a population of more than 300,000, Mazar-...",By road From Kabul can take as little as 5 ho...,Taxis are plentiful and should cost 30 Afg or...,In the way of sights Mazar has little to offer...,The mosque is open for daily prayers. sufi ha...,Hand woven Afghan rugs and other souvenirs ...,Between the Farhat Hotel and the Blue Mosque ...,Budget Local hotels offer multi-bed rooms from...,...,Aside from the inherent risks faced by coming...,By plane Kam Air [10] flies to Kabul on Thurs...,Mazar-e Sharif,36.706944,67.112221,369.0,,,,
6,Albania,Tirana,"Sulejman Pasha Bargjini, a native feudal lord ...",By plane[edit] Tirana is served by Tirana Inte...,By bus[edit] Public transportation in Tirana c...,"Clock Tower (Kulla e Sahatit), Skanderbeg Squ...",Go for a walk in the evening along the main st...,There are many small-scale shops in Tirana. Th...,"There are many excellent, inexpensive restaura...",Budget[edit] Budget options in Tirana are hind...,...,Violent crime and drug related crime are a rea...,Durrës Shkodra Elbasan Vlorë Pogradec ...,Tirana,41.327500,19.818890,103.0,,,,
7,Albania,Durres,,"By boat[edit] To Bari in Italy [5], [6] and [...",Many taxis are available in Durres. You can fi...,"Durres Amphitheater, the biggest in the Balkan...",Go for an evening walk along the new beach fro...,Souvenirs Kokomani Vineyard wine,Bar Restorant Kajser. Located near Kig Zog Vil...,Durres Hostel Address: Sheshi Liria street. Fr...,...,,Bari Ancona Trieste Saranda Vlora Tirana Thi...,Durres,41.323056,19.441389,-9999.0,,,,
8,Algeria,Algiers,Algiers is the capital city of Algeria in Nort...,By plane[edit] Houari Boumediene Airport (ICAO...,Public transportation[edit] Algiers Public Tra...,"Monument of Martyrs, (in city center). edit...",,,Eat the lovely couscous.,Sofitel Algiers Hamma Garden. Mercure Alger...,...,This article is an outline and needs more co...,,Algiers,36.763056,3.050556,1.0,,,,
9,Algeria,Annaba,,By plane Annaba is best reached by air. Its a...,From the airport there is a regular bus shuttl...,One of the must sees of Annaba is the Church o...,A visit to Annaba is not complete without enjo...,,Annaba has countless places to eat. Whether th...,,...,,There are a number of places to visit that are...,Annaba,36.900000,7.766667,1.0,,,,


In [40]:
# Fill Lon Lat columns with coordinates obtained on second pass
cities_geo2_df['Lat'].fillna(cities_geo2_df['Latitude'], inplace = True);
cities_geo2_df['Lon'].fillna(cities_geo2_df['Longitude'], inplace = True);

In [41]:
# How many cities do not have Lon/Lat info from second pass
cities_geo2_df['Lat'].isna().sum()

1

In [42]:
len(cities_geo2_df)

6554

In [43]:
cities_geo2_df.drop_duplicates(['Country','City'],keep='last', inplace=True)

In [44]:
len(cities_geo2_df)

2671

In [61]:
# What cities still don't have coordinates 
cities_geo2_df[cities_geo2_df['Lat'].isnull()]

Unnamed: 0,Country,City,Understand,Get in,Get around,See,Do,Buy,Eat,Sleep,...,Stay safe,Get out,City_alternative,Lat,Lon,Alt,City_alternative2,Latitude,Longitude,AccentCity
3523,Poland,Łódź,Name and history[edit] The exact origin of the...,By plane[edit] Łódź Władysław Reymont Airport ...,The Ultimate Tram Łódź was one of the firs...,Łódź is well-known for its architectural monum...,The main idea for promoting Łódź is make it a ...,Shopping areas[edit] Piotrkowska Street is the...,Łódź does not offer a lot when it comes to Pol...,This guide uses the following price ranges ...,...,Łódź has a rough reputation among Polish peopl...,Useful for one day trips: Łęczyca a medieval...,,,,,,,,


In [65]:
# Manual fill
cities_geo2_df.loc[3523,'Lat'] = 51.75
cities_geo2_df.loc[3523,'Lon'] = 19.46667

In [67]:
cities_geo2_df.to_csv('cities_geo2_df.csv')