**Importing packages**

In [0]:
import requests
import pandas as pd
import re
# import json
import pandas as pd
from bs4 import BeautifulSoup as BSoup
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

**Initializing source and destination variables**

In [0]:
BASE_URI = 'https://en.wikipedia.org'
page = requests.get(BASE_URI+'/wiki/List_of_neighbourhoods_in_Toronto')
soup = BSoup(page.content, 'html.parser')

boroughs_list = soup.select(".mw-parser-output h3")
neighbours_list = soup.select(".mw-parser-output div table.multicol")

city_info = list()

column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)


**Making use of List of neighbourhoods page to scrape neighbourhoods in Toronto**

The latitude and longitude values are also scraped out from Wikipedia itself using these functions:

In [0]:
def geo_calculator(value):
  if len(value) == 4:
    decimal = float(value[0]) + (float(value[1])/60) + (float(value[2])/3600)
  elif len(value) == 3:
    decimal = float(value[0]) + (float(value[1])/60)
  elif len(value) == 2:
    decimal = float(value[0])
  else:
    raise ValueError
  return decimal if value[-1].strip() in ['N', 'E'] else -decimal

def scrape_geodata(url):
  page_ = requests.get(url)
  soup_ = BSoup(page_.content, 'html.parser')
  lat_elem = soup_.select('.geo-default .geo-dms .latitude')
  lon_elem = soup_.select('.geo-default .geo-dms .longitude')
  if lat_elem:
    if 'append' in dir(lat_elem):
      lat_elem = lat_elem[0]
    lt = re.split(u'[°′″]', lat_elem.get_text())
    latitude = geo_calculator(lt)
  
  else:
    lat_elem = soup_.select('.geo-default .geo')[0].get_text()
    latitude = lat_elem.split('; ')[0]

  if lon_elem:
    if 'append' in dir(lon_elem):
      lon_elem = lon_elem[0]
    ln = re.split(u'[°′″]', lon_elem.get_text())
    longitude = geo_calculator(ln)

  else:
    lon_elem = soup_.select('.geo-default .geo')[0].get_text()
    longitude = lon_elem.split('; ')[1]

  return latitude, longitude

def get_geodata(url):
  geolocator = Nominatim()
  geo_name = url.split('/')[-1].replace('_', ' ')
  location = geolocator.geocode(geo_name)
  if location:
    return location.latitude, location.longitude
  else:
    lat, lon = scrape_geodata(BASE_URI + url)
    return lat, lon

neighbourhood_data = list()
for index in range(len(neighbours_list)):
  this_borough = boroughs_list[index].find('a').get_text()
  neighbours_in_borough = neighbours_list[index].select('td li')
  for neighbours in neighbours_in_borough:
    neighbour_dict = dict()
    neighbour_url = neighbours.find('a').get('href')
    neighbour_name = neighbours.find('a').get_text()
    neighbour_dict['Borough'] = this_borough.encode('ascii', 'ignore')
    neighbour_dict['Neighborhood'] = neighbour_name.encode('ascii', 'ignore')
    neighbour_dict['Latitude'], neighbour_dict['Longitude'] = get_geodata(neighbour_url)
    neighbourhood_data.append(neighbour_dict)

for data in neighbourhood_data:
  neighborhoods = neighborhoods.append(data, ignore_index=True)

print(neighborhoods.shape)