In [19]:
from geopy.geocoders import Nominatim
import pandas as pd
import numpy as np
import folium
from folium import Marker

In [10]:
geolocator = Nominatim(user_agent="kaggle_learn")
location = geolocator.geocode("HD1 3AY")

print(location.point)
print(location.address)

53 38m 21.444s N, 1 46m 49.44s W
HD1 3AY, Kirklees, West Yorkshire, England, United Kingdom


In [11]:
point = location.point

print(location.latitude)
print(location.longitude)



53.63929
-1.7804


In [12]:
universities = pd.read_csv('top_universities.csv')

universities.head()

Unnamed: 0,Name
0,University of Oxford
1,University of Cambridge
2,Imperial College London
3,ETH Zurich
4,UCL


In [13]:
def my_geocoder(row):
    try:
        point = geolocator.geocode(row).point
        return pd.Series({'Latitude': point.latitude, 'Longitude': point.longitude})
    except:
        return None

universities[['Latitude', 'Longitude']] = universities.apply(lambda x: my_geocoder(x['Name']), axis=1)


print("{}% of addresses were geocoded!".format(
    (1 - sum(np.isnan(universities["Latitude"])) / len(universities)) * 100))

90.0% of addresses were geocoded!


In [14]:
# Drop universities that were not successfully geocoded
import geopandas as gpd
universities = universities.loc[~np.isnan(universities["Latitude"])]
universities = gpd.GeoDataFrame(
    universities, geometry=gpd.points_from_xy(universities.Longitude, universities.Latitude), crs='epsg:4326')
# universities.crs = {'init': 'epsg:4326'}
universities.head()

Unnamed: 0,Name,Latitude,Longitude,geometry
0,University of Oxford,51.758708,-1.255668,POINT (-1.25567 51.75871)
1,University of Cambridge,52.210946,0.092005,POINT (0.092 52.21095)
2,Imperial College London,51.498959,-0.175641,POINT (-0.17564 51.49896)
3,ETH Zurich,47.413218,8.537491,POINT (8.53749 47.41322)
4,UCL,51.521785,-0.135151,POINT (-0.13515 51.52179)


In [22]:
m = folium.Map(location=[54, 15], tiles='openstreetmap', zoom_start=2)

for idx, row in universities.iterrows():
     Marker([row['Latitude'], row['Longitude']], popup=row['Name']).add_to(m)

m

In [81]:
from common_utils import EUROPEAN_COUNTRIES
world = gpd.read_file('../natural-earth-countries-1_110m.geojson')
europe = world[world.sovereignt.isin(EUROPEAN_COUNTRIES)].reset_index(drop=True)

europe_stats = europe[['featurecla', 'scalerank', 'sovereignt']]
europe_boundries = europe[['sovereignt', 'geometry']]


europe_stats.head()

Unnamed: 0,featurecla,scalerank,sovereignt
0,Admin-0 country,1,Croatia
1,Admin-0 country,1,France
2,Admin-0 country,1,Ukraine
3,Admin-0 country,1,Switzerland
4,Admin-0 country,1,Luxembourg


In [85]:
europe = europe_boundries.merge(europe_stats, on='sovereignt')
europe.head()

Unnamed: 0,sovereignt,geometry,featurecla,scalerank
0,Croatia,"POLYGON ((16.56481 46.50375, 16.88252 46.38063...",Admin-0 country,1
1,France,"POLYGON ((165.77999 -21.08, 166.59999 -21.7000...",Admin-0 country,1
2,France,"POLYGON ((165.77999 -21.08, 166.59999 -21.7000...",Admin-0 country,3
3,France,"POLYGON ((165.77999 -21.08, 166.59999 -21.7000...",Admin-0 country,1
4,Ukraine,"POLYGON ((31.78599 52.10168, 32.15944 52.06125...",Admin-0 country,1


In [91]:
european_universities = gpd.sjoin(universities, europe)


print("We located {} universities.".format(len(universities)))
print("Only {} of the universities were located in Europe (in {} different countries).".format(
    len(european_universities), len(european_universities.sovereignt.unique())))

european_universities.head()

We located 90 universities.
Only 124 of the universities were located in Europe (in 15 different countries).


Unnamed: 0,Name,Latitude,Longitude,geometry,index_right,sovereignt,featurecla,scalerank
0,University of Oxford,51.758708,-1.255668,POINT (-1.25567 51.75871),11,United Kingdom,Admin-0 country,1
0,University of Oxford,51.758708,-1.255668,POINT (-1.25567 51.75871),12,United Kingdom,Admin-0 country,1
1,University of Cambridge,52.210946,0.092005,POINT (0.092 52.21095),11,United Kingdom,Admin-0 country,1
1,University of Cambridge,52.210946,0.092005,POINT (0.092 52.21095),12,United Kingdom,Admin-0 country,1
2,Imperial College London,51.498959,-0.175641,POINT (-0.17564 51.49896),11,United Kingdom,Admin-0 country,1


'common_utils'