In [20]:
from geopandas.tools import geocode
import geopy
import pandas as pd
import geopandas as gpd
import folium
from folium import Marker
import math
from folium.plugins import MarkerCluster
from IPython.display import IFrame

In [2]:
result = geocode("The Great Pyramid of Giza", provider=geopy.geocoders.GeocodeFarm)
result

Unnamed: 0,geometry,address
0,POINT (31.08151 29.98449),"The Ring Road, Giza, Egypt"


In [3]:
point = result.geometry.iloc[0]
print("Latitude:", point.y)
print("Longitude:", point.x)

Latitude: 29.984491259822
Longitude: 31.0815092373299


In [4]:
universities = pd.read_csv("../Data/top_universities.csv")
universities.head()

Unnamed: 0,Name
0,University of Oxford
1,University of Cambridge
2,Imperial College London
3,ETH Zurich
4,UCL


In [7]:
def my_geocoder(row):
    try:
        point = geocode(row, provider=geopy.geocoders.GeocodeFarm).geometry.iloc[0]
        return pd.Series({'Latitude': point.y, 'Longitude': point.x, 'geometry': point})
    except:
        return None

universities[['Latitude', 'Longitude', 'geometry']] = universities.apply(lambda x: my_geocoder(x['Name']), axis=1)

print("{}% of addresses were geocoded!".format(
    (1 - sum(np.isnan(universities["Latitude"])) / len(universities)) * 100))

# Drop universities that were not successfully geocoded
universities = universities.loc[~np.isnan(universities["Latitude"])]
universities = gpd.GeoDataFrame(universities, geometry=universities.geometry)
universities.crs = {'init': 'epsg:4326'}
universities.head()

98.0% of addresses were geocoded!


  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,Name,Latitude,Longitude,geometry
0,University of Oxford,51.756802,-1.254726,POINT (-1.25473 51.75680)
1,University of Cambridge,52.205303,0.116613,POINT (0.11661 52.20530)
2,Imperial College London,51.498997,-0.175495,POINT (-0.17549 51.49900)
3,ETH Zurich,47.376415,8.548102,POINT (8.54810 47.37642)
4,UCL,51.523815,-0.13306,POINT (-0.13306 51.52382)


In [12]:
# Create a map
m = folium.Map(location=[54, 15], tiles='openstreetmap', zoom_start=2)

# Add points to the map
for idx, row in universities.iterrows():
    Marker([row['Latitude'], row['Longitude']], popup=row['Name']).add_to(m)

# Display the map
m

In [13]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
europe = world.loc[world.continent == 'Europe'].reset_index(drop=True)

europe_stats = europe[["name", "pop_est", "gdp_md_est"]]
europe_boundaries = europe[["name", "geometry"]]

In [14]:
europe_boundaries.head()

Unnamed: 0,name,geometry
0,Russia,"MULTIPOLYGON (((178.725 71.099, 180.000 71.516..."
1,Norway,"MULTIPOLYGON (((15.143 79.674, 15.523 80.016, ..."
2,France,"MULTIPOLYGON (((-51.658 4.156, -52.249 3.241, ..."
3,Sweden,"POLYGON ((11.027 58.856, 11.468 59.432, 12.300..."
4,Belarus,"POLYGON ((28.177 56.169, 29.230 55.918, 29.372..."


In [15]:
europe_stats.head()

Unnamed: 0,name,pop_est,gdp_md_est
0,Russia,142257519,3745000.0
1,Norway,5320045,364700.0
2,France,67106161,2699000.0
3,Sweden,9960487,498100.0
4,Belarus,9549747,165400.0


In [17]:
# Use an attribute join to merge data about countries in Europe
europe = europe_boundaries.merge(europe_stats, on="name")
europe.head()

Unnamed: 0,name,geometry,pop_est,gdp_md_est
0,Russia,"MULTIPOLYGON (((178.725 71.099, 180.000 71.516...",142257519,3745000.0
1,Norway,"MULTIPOLYGON (((15.143 79.674, 15.523 80.016, ...",5320045,364700.0
2,France,"MULTIPOLYGON (((-51.658 4.156, -52.249 3.241, ...",67106161,2699000.0
3,Sweden,"POLYGON ((11.027 58.856, 11.468 59.432, 12.300...",9960487,498100.0
4,Belarus,"POLYGON ((28.177 56.169, 29.230 55.918, 29.372...",9549747,165400.0


In [18]:
# Use spatial join to match universities to countries in Europe
european_universities = gpd.sjoin(universities, europe)

# Investigate the result
print("We located {} universities.".format(len(universities)))
print("Only {} of the universities were located in Europe (in {} different countries).".format(
    len(european_universities), len(european_universities.name.unique())))

european_universities.head()

We located 98 universities.
Only 96 of the universities were located in Europe (in 15 different countries).


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  


Unnamed: 0,Name,Latitude,Longitude,geometry,index_right,name,pop_est,gdp_md_est
0,University of Oxford,51.756802,-1.254726,POINT (-1.25473 51.75680),28,United Kingdom,64769452,2788000.0
1,University of Cambridge,52.205303,0.116613,POINT (0.11661 52.20530),28,United Kingdom,64769452,2788000.0
2,Imperial College London,51.498997,-0.175495,POINT (-0.17549 51.49900),28,United Kingdom,64769452,2788000.0
4,UCL,51.523815,-0.13306,POINT (-0.13306 51.52382),28,United Kingdom,64769452,2788000.0
5,London School of Economics and Political Science,51.513889,-0.11694,POINT (-0.11694 51.51389),28,United Kingdom,64769452,2788000.0


In [21]:
def embed_map(m, file_name):
    m.save(file_name)
    return IFrame(file_name, width='100%', height='500px')

In [22]:
# Load and preview Starbucks locations in California
starbucks = pd.read_csv("../Data/starbucks_locations.csv")
starbucks.head()

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
0,10429-100710,Palmdale & Hwy 395,14136 US Hwy 395 Adelanto CA,Adelanto,-117.4,34.51
1,635-352,Kanan & Thousand Oaks,5827 Kanan Road Agoura CA,Agoura,-118.76,34.16
2,74510-27669,Vons-Agoura Hills #2001,5671 Kanan Rd. Agoura Hills CA,Agoura Hills,-118.76,34.15
3,29839-255026,Target Anaheim T-0677,8148 E SANTA ANA CANYON ROAD AHAHEIM CA,AHAHEIM,-117.75,33.87
4,23463-230284,Safeway - Alameda 3281,2600 5th Street Alameda CA,Alameda,-122.28,37.79


In [26]:
print(starbucks.isnull().sum())

# View rows with missing locations
rows_with_missing = starbucks[starbucks["City"]=="Berkeley"]
rows_with_missing

Store Number    0
Store Name      0
Address         0
City            0
Longitude       5
Latitude        5
dtype: int64


Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
153,5406-945,2224 Shattuck - Berkeley,2224 Shattuck Avenue Berkeley CA,Berkeley,,
154,570-512,Solano Ave,1799 Solano Avenue Berkeley CA,Berkeley,,
155,17877-164526,Safeway - Berkeley #691,1444 Shattuck Place Berkeley CA,Berkeley,,
156,19864-202264,Telegraph & Ashby,3001 Telegraph Avenue Berkeley CA,Berkeley,,
157,9217-9253,2128 Oxford St.,2128 Oxford Street Berkeley CA,Berkeley,,


In [27]:
def my_geocoder(row):
    point = geocode(row, provider=geopy.geocoders.GeocodeFarm).geometry[0]
    return pd.Series({'Longitude': point.x, 'Latitude': point.y})

berkeley_locations = rows_with_missing.apply(lambda x: my_geocoder(x['Address']), axis=1)
starbucks.update(berkeley_locations)

In [28]:
starbucks[starbucks["City"]=="Berkeley"]

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
153,5406-945,2224 Shattuck - Berkeley,2224 Shattuck Avenue Berkeley CA,Berkeley,-122.26812,37.868762
154,570-512,Solano Ave,1799 Solano Avenue Berkeley CA,Berkeley,-122.28002,37.891621
155,17877-164526,Safeway - Berkeley #691,1444 Shattuck Place Berkeley CA,Berkeley,-122.269998,37.88148
156,19864-202264,Telegraph & Ashby,3001 Telegraph Avenue Berkeley CA,Berkeley,-122.25942,37.85562
157,9217-9253,2128 Oxford St.,2128 Oxford Street Berkeley CA,Berkeley,-122.26602,37.870362


In [29]:
m_2 = folium.Map(location=[37.88,-122.26], zoom_start=13)

# Add a marker for each Berkeley location
for idx, row in starbucks[starbucks["City"]=='Berkeley'].iterrows():
    Marker([row['Latitude'], row['Longitude']]).add_to(m_2)

embed_map(m_2, 'starbucks_berkeley.html')