In [None]:
!pip install --upgrade numpy

In [None]:
!pip install --upgrade pandas

In [None]:
!pip install --upgrade geopandas

In [None]:
!pip install --upgrade geopy

In [None]:
!pip install --upgrade folium

In [2]:
import folium
from folium import Marker
import geopandas as gpd
from geopy.geocoders import Nominatim
import pandas as pd
import numpy as np

In [3]:
# If the geocoding is successful, it returns a geopy.location.Location object with two important attributes:

# the "point" attribute contains the (latitude, longitude) location, and
# the "address" attribute contains the full address.

geolocator = Nominatim(user_agent="mrembo")
location = geolocator.geocode("Konza Technopolis")

print(location.point)
print(location.address)

1 41m 15.7279s S, 37 11m 4.6788s E
Konza Technopolis, Kalama ward, Machakos Town, Machakos, Eastern, Kenya


In [4]:
point = location.point
print(f"Konza Technopolis Latitude : {point.latitude}")
print(f"Konza Technopolis longitude : {point.longitude}")

Konza Technopolis Latitude : -1.6877022
Konza Technopolis longitude : 37.184633


# read a list of univertities from a text file and geocode them

In [5]:
sa_universities = pd.read_csv("south_african_universities.txt", names=['university_name'])
sa_universities

Unnamed: 0,university_name
0,University of Cape Town
1,University of Witwatersrand
2,University of Pretoria
3,University of South Africa
4,tshwane university of technology
5,Maseno Univeristy
6,Egerton University
7,Makelele University
8,Kampala University
9,University of Cape Town


In [6]:
# Drop any duplicated univeristy names
sa_universities = sa_universities.drop_duplicates().reset_index(drop=True)
sa_universities

Unnamed: 0,university_name
0,University of Cape Town
1,University of Witwatersrand
2,University of Pretoria
3,University of South Africa
4,tshwane university of technology
5,Maseno Univeristy
6,Egerton University
7,Makelele University
8,Kampala University
9,University of Fort Hare


In [7]:
def university_geocoder(row):
    try:
        point = geolocator.geocode(row).point
        
        return pd.Series(
            {
                'Latitude': point.latitude, 
                'Longitude': point.longitude
            })
    except:
        return None

In [8]:
sa_universities[['Latitude', 'Longitude']] = sa_universities.apply(lambda x: university_geocoder(x['university_name']), axis=1)
sa_universities

Unnamed: 0,university_name,Latitude,Longitude
0,University of Cape Town,-33.956756,18.467578
1,University of Witwatersrand,-26.188877,28.024791
2,University of Pretoria,-25.754335,28.230858
3,University of South Africa,-25.750999,28.192093
4,tshwane university of technology,-25.751184,28.186667
5,Maseno Univeristy,,
6,Egerton University,-0.368569,35.932296
7,Makelele University,,
8,Kampala University,0.260518,32.635627
9,University of Fort Hare,-32.786067,26.850842


# Notice that some universities were not geocoded

In [9]:
# Count of universities not geocoded

sum(sa_universities['Longitude'].isna())

2

In [10]:
# percentage of univeristy that were not geocoded

sum(np.isnan(sa_universities['Longitude']))/len(sa_universities)*100

9.523809523809524

In [11]:
# percentage of Universities that were successfully geocoded

successfully_geocoded = (1 - sum(np.isnan(sa_universities["Longitude"])) / len(sa_universities)) * 100

# rounded to 3 decimal places

successfully_geocoded = np.around(successfully_geocoded, 3)

print("{}% of universities were successfully geocoded!".format(successfully_geocoded))

90.476% of universities were successfully geocoded!


In [12]:
sa_universities


Unnamed: 0,university_name,Latitude,Longitude
0,University of Cape Town,-33.956756,18.467578
1,University of Witwatersrand,-26.188877,28.024791
2,University of Pretoria,-25.754335,28.230858
3,University of South Africa,-25.750999,28.192093
4,tshwane university of technology,-25.751184,28.186667
5,Maseno Univeristy,,
6,Egerton University,-0.368569,35.932296
7,Makelele University,,
8,Kampala University,0.260518,32.635627
9,University of Fort Hare,-32.786067,26.850842


# drop the unsuccessfully geocoded universities

In [13]:
np.isnan(sa_universities["Longitude"])

0     False
1     False
2     False
3     False
4     False
5      True
6     False
7      True
8     False
9     False
10    False
11    False
12    False
13    False
14    False
15    False
16    False
17    False
18    False
19    False
20    False
Name: Longitude, dtype: bool

In [14]:
sa_universities = sa_universities.loc[~np.isnan(sa_universities["Longitude"])]
sa_universities = sa_universities.reset_index(drop=True)
sa_universities

Unnamed: 0,university_name,Latitude,Longitude
0,University of Cape Town,-33.956756,18.467578
1,University of Witwatersrand,-26.188877,28.024791
2,University of Pretoria,-25.754335,28.230858
3,University of South Africa,-25.750999,28.192093
4,tshwane university of technology,-25.751184,28.186667
5,Egerton University,-0.368569,35.932296
6,Kampala University,0.260518,32.635627
7,University of Fort Hare,-32.786067,26.850842
8,University of the Free State,-29.110668,26.186476
9,University of KwaZulu-Natal,-29.866443,30.98131


# Adding a geometry column to the dataset

In [19]:
sa_universities = gpd.GeoDataFrame(sa_universities, geometry=gpd.points_from_xy(sa_universities.Longitude, sa_universities.Latitude))
sa_universities.crs = {'init': 'epsg:4326'}
sa_universities

  in_crs_string = _prepare_from_proj_string(in_crs_string)


Unnamed: 0,university_name,Latitude,Longitude,geometry
0,University of Cape Town,-33.956756,18.467578,POINT (18.46758 -33.95676)
1,University of Witwatersrand,-26.188877,28.024791,POINT (28.02479 -26.18888)
2,University of Pretoria,-25.754335,28.230858,POINT (28.23086 -25.75434)
3,University of South Africa,-25.750999,28.192093,POINT (28.19209 -25.751)
4,tshwane university of technology,-25.751184,28.186667,POINT (28.18667 -25.75118)
5,Egerton University,-0.368569,35.932296,POINT (35.9323 -0.36857)
6,Kampala University,0.260518,32.635627,POINT (32.63563 0.26052)
7,University of Fort Hare,-32.786067,26.850842,POINT (26.85084 -32.78607)
8,University of the Free State,-29.110668,26.186476,POINT (26.18648 -29.11067)
9,University of KwaZulu-Natal,-29.866443,30.98131,POINT (30.98131 -29.86644)


# Visualize the locations

In [18]:
# Create a map
import folium
from folium import Marker

m = folium.Map(location=[54, 15], tiles='openstreetmap', zoom_start=4)

# Add points to the map
for idx, row in sa_universities.iterrows():
    Marker([row['Latitude'], row['Longitude']], popup=row['university_name']).add_to(m)

# Display the map
m