# Toronto Child Care Centres
---

[Data Source](https://www.toronto.ca/data/children/dmc/a2z/a2za.html)

### Importing libraries

In [1]:
import numpy as np
import pandas as pd

%matplotlib inline

from bs4 import BeautifulSoup as bs

import urllib

import string

import geocoder

### Collecting Data

In [2]:
page_index = []

for letter in string.ascii_lowercase:
    page_index.append(letter)

In [3]:
page_index.remove('x')
page_index.remove('z')
# print(page_index)

In [4]:
pages = []

for letter in page_index:
    pages.append('https://www.toronto.ca/data/children/dmc/a2z/a2z' + letter + '.html')

In [5]:
# pages

In [6]:
centres = []

for page in pages:
    link_html = urllib.request.urlopen(urllib.request.Request(page)).read()
    soup_obj = bs(link_html, 'html.parser')
      
    for link in soup_obj.findAll('a'):
        centres.append(link.get('href'))

In [7]:
len(centres)

1870

In [8]:
centresfiltered = [centre for centre in centres if 'webreg' in centre]
len(centresfiltered)

1030

In [9]:
# centresfiltered[1000:1004]

In [10]:
centre_links = []

for i in centresfiltered:
    centre_links.append('https://www.toronto.ca/data/children/dmc/' + i[3:])

In [11]:
# centre_links[1000:1004]

In [12]:
%%time

name = []
address = []
coordinates = []
latitude = []
longitude = []

counter = 0

for i in centre_links:
    
    link_html = urllib.request.urlopen(urllib.request.Request(i)).read()
    soup_obj = bs(link_html, 'html.parser')
    
    nameX = soup_obj.findAll("h2", {"class", "csd_title"})[0].text[:-7]
    name.append(nameX)
      
    address1 = soup_obj.find("p").text
    address2 = address1.replace("\r", "")
    address3 = address2.replace("\t", "")
    address4 = address3.replace("\n", "")
    address5 = address4.replace("\xa0", "")
    address6 = address5.replace(")", "")
    address7 = address6.split("(")
    addressX = address7[0] + ', Toronto, Ontario'
    address.append(addressX)
    
    g = geocoder.google(addressX)
    coordinateX = g.latlng
    coordinates.append(coordinateX)
    
    try:
        latitudeX = coordinateX[0]
    except:
        latitudeX = ''
        
    latitude.append(latitudeX)
    
    try:
        longitudeX = coordinateX[1]
    except:
        longitudeX = ''
        
    longitude.append(longitudeX)

    counter = counter + 1
    
    # print(counter,". " ,nameX,"\t",addressX,"\t",coordinateX,"\t",latitudeX,"\t",longitudeX)


Wall time: 12min 21s


### Checking Data Collected

In [13]:
# Number of records found
print(len(name), len(address), len(coordinates), len(latitude), len(longitude))

1030 1030 1030 1030 1030


In [14]:
df = pd.DataFrame({'name': name, 'address': address, 'coordinates': coordinates, 'latitude': latitude, 'longitude': longitude}, columns = ['name', 'address', 'coordinates', 'latitude', 'longitude'])

df.dropna(axis=0, how = 'any', inplace = True)

df.reset_index(inplace=True)

df.drop(['index'], axis = 1, inplace = True)

# sample of data collection (table view)
df.sample(5)

Unnamed: 0,name,address,coordinates,latitude,longitude
198,Holy Family Childcare Centre-Queen Victoria,"141 Close Ave, Toronto, Ontario","[43.63857410000001, -79.4346151]",43.6386,-79.4346
419,St. Clement's Early Learning School,"70 St Clements Ave, Toronto, Ontario","[43.7128576, -79.4020283]",43.7129,-79.402
493,Waterfront Montessori Childrens Centre,"18 Wyandot Ave, Toronto, Ontario","[43.6264831, -79.3621758]",43.6265,-79.3622
16,Balmy Beach Community Day Care Centre,"14 Pine Ave,Room 107, Toronto, Ontario","[43.6764445, -79.2898302]",43.6764,-79.2898
281,Martingrove Child Care Centre,"256 John Garland Blvd,131 & 132, Toronto, Ontario","[43.7346432, -79.5910821]",43.7346,-79.5911


In [15]:
# Total Number of Records
len(df)

517

In [16]:
import folium
from folium.plugins import MarkerCluster

In [17]:
location = np.mean(df['latitude']), np.mean(df['longitude'])

locationlist = df[['latitude', 'longitude']].values.tolist()

labels = df['name']

map = folium.Map(location = location, 
                 zoom_start=12,
                tiles = 'OpenStreetMap')

marker_cluster = MarkerCluster().add_to(map)

for point in range(len(locationlist)):
                  popup = folium.Popup(labels[point], parse_html = True)
                  folium.Marker(locationlist[point], 
                                popup=popup, 
                               icon = folium.Icon(color = 'blue', icon='fas fa-child', prefix='fa')).add_to(marker_cluster)
display(map)