We shall begin by importing the following libraries.

In [1]:
import pandas as pd
import numpy as np
import geocoder
from geopy.geocoders import Nominatim
import folium
import requests
from sklearn.cluster import KMeans

Let us obtain the Ogun data by Scraping the wikipedia page:https://en.wikipedia.org/wiki/List_of_Ogun_State_local_government_areas_by_population by pandas using read_html() function.
The dataframe obtained is cleaned by dropping columns that are not needed to obtain the dataframe below.        

In [2]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_Ogun_State_local_government_areas_by_population')
df1 = df[0]
#df1.head()
df_1 = df1.drop(['Rank','Population (1991)','Rank.1'], axis = 1)
df_1.head()

Unnamed: 0,LGA,Population (2006)
0,Ifo,539170
1,Ado-Odo/Ota,527242
2,Ijebu North,280520
3,Shagamu,255885
4,Abeokuta South,250295


Further cleaning results in the dataframe df2

In [3]:
df2 = df_1[0:20]
df2

Unnamed: 0,LGA,Population (2006)
0,Ifo,539170
1,Ado-Odo/Ota,527242
2,Ijebu North,280520
3,Shagamu,255885
4,Abeokuta South,250295
5,Obafemi-Owode,235071
6,Abeokuta North,198793
7,Egbado North,183844
8,Egbado South,168336
9,Ijebu Ode,157161


The latitude and longitude of the various local government aress(LGA) of the state is read using geopy library package.
The information is processed into a dataframe called Ogun_data.

In [7]:
location = [x for x in df2['LGA'].unique().tolist() 
            if type(x) == str]
latitude = []
longitude =  []
for i in range(0, len(location)):
    
    try:
        address = location[i] 
        geolocator = Nominatim(user_agent="ny_explorer")
        loc = geolocator.geocode(address)
        latitude.append(loc.latitude)
        longitude.append(loc.longitude)        
    except:
        # in the case the geolocator does not work, then add nan element to list
        # to keep the right size
        latitude.append(np.nan)
        longitude.append(np.nan)
# create a dataframe with the locatio, latitude and longitude
Ogun_data = pd.DataFrame({'LGA':location, 
                    'Latitude': latitude,
                    'Longitude':longitude})
Ogun_data



Unnamed: 0,LGA,Latitude,Longitude
0,Ifo,6.756297,3.241557
1,Ado-Odo/Ota,6.624476,3.082307
2,Ijebu North,7.032131,3.991457
3,Shagamu,6.847716,3.644055
4,Abeokuta South,7.159289,3.364475
5,Obafemi-Owode,6.924008,3.444462
6,Abeokuta North,7.234495,3.201259
7,Egbado North,7.126063,2.940857
8,Egbado South,6.777802,2.958357
9,Ijebu Ode,6.814008,3.915167


The latitude and longitude of Ogun state is obtained using geopy library package in order to draw the map of the state.

In [8]:
address = 'Ogun State'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Ogun State are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Ogun State are 6.9788582, 3.4389293.


Using folium package, the map of the state is drawn at a zoom level of 11.

In [9]:
map_Ogun = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Ogun_data['Latitude'], Ogun_data['Longitude'], Ogun_data['LGA']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Ogun)  
    
map_Ogun

We need to get the client id, client_secret, version and limit before making foursquare API calls
to retrieve the geolocation information about each venue in the LGAs

In [10]:
CLIENT_ID = 'EQXML33DSFG2S5PDDJWTIHIWRRMXHAMZFAOBXFPYNLKKEHFH'
CLIENT_SECRET = 'XO2QIJCNWWXQTLWO35HXE1XCIS321JJVUNLOCMFB10BVPEGB' 
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EQXML33DSFG2S5PDDJWTIHIWRRMXHAMZFAOBXFPYNLKKEHFH
CLIENT_SECRET:XO2QIJCNWWXQTLWO35HXE1XCIS321JJVUNLOCMFB10BVPEGB


Define a function getNearbyVenues to explore the venue of each LGA of the state making use of foursquare API calls.

In [11]:
def getNearbyVenues(names, latitudes, longitudes, radius=20000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['LGA', 
                  'LGA Latitude', 
                  'LGA Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

The resulting dataframe returned by the function is called Ogun_venues.

In [12]:
Ogun_venues = getNearbyVenues(names=Ogun_data['LGA'],
                                   latitudes=Ogun_data['Latitude'],
                                   longitudes=Ogun_data['Longitude']
                                  )

Ifo
Ado-Odo/Ota
Ijebu North
Shagamu
Abeokuta South
Obafemi-Owode
Abeokuta North
Egbado North
Egbado South
Ijebu Ode
Ipokia
Odogbolu
Ikenne
Odeda
Ijebu East
Imeko Afon
Ogun Waterside
Ijebu North East
Remo North
Ewekoro


Display the shape of Ogun_venues dataframe and its first five rows.

In [13]:
print(Ogun_venues.shape)
Ogun_venues

(92, 7)


Unnamed: 0,LGA,LGA Latitude,LGA Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Ifo,6.756297,3.241557,comedyonradio.com,6.703059,3.258409,Comedy Club
1,Ifo,6.756297,3.241557,justrite superstore,6.677279,3.284469,Shopping Mall
2,Ifo,6.756297,3.241557,Tantalisers,6.649299,3.265609,Burger Joint
3,Ifo,6.756297,3.241557,New Afrika Shrine,6.622918,3.356946,Performing Arts Venue
4,Ifo,6.756297,3.241557,Tastee Fried Chicken,6.631432,3.339814,Fast Food Restaurant
...,...,...,...,...,...,...,...
87,Ewekoro,6.961699,3.173441,Ewekoro,6.903178,3.206988,Scenic Lookout
88,Ewekoro,6.961699,3.173441,Abokı Spot,6.869381,3.192989,BBQ Joint
89,Ewekoro,6.961699,3.173441,Food Court,6.824277,3.211899,Seafood Restaurant
90,Ewekoro,6.961699,3.173441,Ilaro Main Park,6.897541,3.022292,Bus Station


Group the dataframe by LGA and find the number of venue category.

In [14]:
Ogun_venues.groupby('LGA').count()

Unnamed: 0_level_0,LGA Latitude,LGA Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
LGA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Abeokuta North,4,4,4,4,4,4
Abeokuta South,7,7,7,7,7,7
Ado-Odo/Ota,7,7,7,7,7,7
Egbado North,1,1,1,1,1,1
Egbado South,7,7,7,7,7,7
Ewekoro,7,7,7,7,7,7
Ifo,10,10,10,10,10,10
Ijebu East,3,3,3,3,3,3
Ijebu North,6,6,6,6,6,6
Ijebu North East,5,5,5,5,5,5


In [15]:
print('There are {} uniques categories.'.format(len(Ogun_venues['Venue Category'].unique())))

There are 42 uniques categories.


In [16]:
Ogun_onehot = pd.get_dummies(Ogun_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Ogun_onehot['LGA'] = Ogun_venues['LGA'] 

# move neighborhood column to the first column
fixed_columns = [Ogun_onehot.columns[-1]] + list(Ogun_onehot.columns[:-1])
Ogun_onehot = Ogun_onehot[fixed_columns]

Ogun_onehot.head(30)

Unnamed: 0,LGA,African Restaurant,Athletics & Sports,BBQ Joint,Bank,Bar,Bed & Breakfast,Bistro,Border Crossing,Botanical Garden,...,Performing Arts Venue,Plaza,Pub,Rock Climbing Spot,Scenic Lookout,Seafood Restaurant,Shopping Mall,Stadium,Strip Club,Tea Room
0,Ifo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Ifo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,Ifo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Ifo,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,Ifo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Ifo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Ifo,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Ifo,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Ifo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Ifo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
Ogun_onehot.shape

(92, 43)

In [18]:
Ogun_grouped = Ogun_onehot.groupby('LGA').mean().reset_index()
Ogun_grouped

Unnamed: 0,LGA,African Restaurant,Athletics & Sports,BBQ Joint,Bank,Bar,Bed & Breakfast,Bistro,Border Crossing,Botanical Garden,...,Performing Arts Venue,Plaza,Pub,Rock Climbing Spot,Scenic Lookout,Seafood Restaurant,Shopping Mall,Stadium,Strip Club,Tea Room
0,Abeokuta North,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Abeokuta South,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0
2,Ado-Odo/Ota,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0
3,Egbado North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Egbado South,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.142857,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0
5,Ewekoro,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.142857,0.142857,0.142857,0.0,0.0,0.0,0.0
6,Ifo,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.1,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0
7,Ijebu East,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Ijebu North,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667
9,Ijebu North East,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0


In [19]:
Ogun_grouped.shape

(20, 43)

In [20]:
num_top_venues = 5

for hood in Ogun_grouped['LGA']:
    #print("----"+hood+"----")
    temp = Ogun_grouped[Ogun_grouped['LGA'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
   # print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))


In [21]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Find the top ten venues across all the local government areas in the state.

In [22]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['LGA']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['LGA'] = Ogun_grouped['LGA']

for ind in np.arange(Ogun_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Ogun_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(10)

Unnamed: 0,LGA,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Abeokuta North,African Restaurant,Golf Course,Hotel Bar,Bus Stop,Food Truck,Fast Food Restaurant,Convenience Store,Comedy Club,Chinese Restaurant,Campground
1,Abeokuta South,Football Stadium,Rock Climbing Spot,Bus Stop,Golf Course,Hotel Bar,Park,African Restaurant,Pub,Botanical Garden,Chinese Restaurant
2,Ado-Odo/Ota,Hotel,Convenience Store,Market,Strip Club,Shopping Mall,Pub,Comedy Club,Chinese Restaurant,Campground,Café
3,Egbado North,Motorcycle Shop,Tea Room,Fast Food Restaurant,Convenience Store,Comedy Club,Chinese Restaurant,Campground,Café,Bus Stop,Bus Station
4,Egbado South,Bus Station,Shopping Mall,Plaza,Park,Tea Room,Burger Joint,Convenience Store,Comedy Club,Chinese Restaurant,Campground
5,Ewekoro,Bus Station,BBQ Joint,Seafood Restaurant,Scenic Lookout,Rock Climbing Spot,Bank,Mobile Phone Shop,Tea Room,Convenience Store,Comedy Club
6,Ifo,Fast Food Restaurant,Athletics & Sports,Shopping Mall,BBQ Joint,Comedy Club,Chinese Restaurant,Performing Arts Venue,Hotel,Burger Joint,Market
7,Ijebu East,Kids Store,Bistro,Border Crossing,Tea Room,Bus Stop,Fast Food Restaurant,Convenience Store,Comedy Club,Chinese Restaurant,Campground
8,Ijebu North,Lounge,Tea Room,Mobile Phone Shop,Bank,Campground,Fast Food Restaurant,Convenience Store,Comedy Club,Chinese Restaurant,Café
9,Ijebu North East,Market,Stadium,Shopping Mall,Bank,Campground,Tea Room,Convenience Store,Comedy Club,Chinese Restaurant,Café


In [23]:
kclusters = 5

Ogun_grouped_clustering = Ogun_grouped.drop('LGA', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Ogun_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 0, 4, 0, 0, 3, 3, 3, 3])

In [24]:
print('Thank You')

Thank You
