# Toronto Child Care Centres
---

[Data Source](https://www.toronto.ca/data/children/dmc/a2z/a2za.html)

### Importing libraries

In [1]:
import numpy as np
import pandas as pd

%matplotlib inline

from bs4 import BeautifulSoup as bs

import urllib

import string

import geocoder

### Collecting Data

In [3]:
page_index = []

for letter in string.ascii_lowercase:
    page_index.append(letter)

In [4]:
page_index.remove('x')
page_index.remove('z')
# print(page_index)

In [5]:
pages = []

for letter in page_index:
    pages.append('https://www.toronto.ca/data/children/dmc/a2z/a2z' + letter + '.html')

In [6]:
centres = []

for page in pages:
    link_html = urllib.request.urlopen(urllib.request.Request(page)).read()
    soup_obj = bs(link_html, 'html.parser')
      
    for link in soup_obj.findAll('a'):
        centres.append(link.get('href'))
        

In [7]:
len(centres)

1868

In [8]:
centres[:5]

['#main',
 'https://www.toronto.ca/',
 'https://www.toronto.ca/globalnav/header/',
 'https://www.toronto.ca/',
 'https://www.toronto.ca/community-people/']

In [9]:
centresfiltered = [centre for centre in centres if 'webreg' in centre]
len(centresfiltered)

1028

In [9]:
# centresfiltered[1000:1004]

In [10]:
centre_links = []

for i in centresfiltered:
    centre_links.append('https://www.toronto.ca/data/children/dmc/' + i[3:])

In [10]:
centre_links[1000:1004]

['https://www.toronto.ca/data/children/dmc/webreg/gcreg4110.html',
 'https://www.toronto.ca/data/children/dmc/webreg/gcreg1938.html',
 'https://www.toronto.ca/data/children/dmc/webreg/gcreg13586.html',
 'https://www.toronto.ca/data/children/dmc/webreg/gcreg1934.html']

In [31]:
%%time

data = []

counter = 0

for i in centre_links:
    
    link_html = urllib.request.urlopen(urllib.request.Request(i)).read()
    soup_obj = bs(link_html, 'html.parser')
    
    nameX = soup_obj.findAll("h2", {"class", "csd_title"})[0].text[:-7]

    paragraph = soup_obj.findAll('p')
    par_clean_1 = paragraph[1]
    par_clean_2 = par_clean_1.get_text()
    par_clean_3 = par_clean_2.replace("\r", "")
    par_clean_4 = par_clean_3.replace("\n", "")
    par_clean_5 = par_clean_4.replace("\t", " ")
    par_clean_6 = par_clean_5.replace("  ", " ")
    
    if 'not' in par_clean_6:
        subsidy = 'No'
    else:
        subsidy = 'Yes'

    address = soup_obj.findAll('p')
    add_clean_1 = address[0]
    add_clean_2 = add_clean_1.get_text()
    add_clean_3 = add_clean_2.replace("\r", "")
    add_clean_4 = add_clean_3.replace("\n", "")
    add_clean_5 = add_clean_4.replace("\t", " ")
    add_clean_6 = add_clean_5.replace("\xa0", "")
    add_clean_7 = add_clean_6.replace("  ", " ")
    add_clean_8 = add_clean_7 + ', Toronto, Ontario'

    g = geocoder.google(add_clean_8)
    coordinateX = g.latlng
    
    try:
        latitudeX = coordinateX[0]
    except:
        latitudeX = ''
    
    try:
        longitudeX = coordinateX[1]
    except:
        longitudeX = ''

    counter = counter + 1
    
    data.append([nameX, par_clean_6, subsidy, add_clean_8, latitudeX, longitudeX])
    
    if counter % 10 == 0:
        print(counter,". " ,nameX,"\t", subsidy,"\t",latitudeX,"\t",longitudeX)
    
df1 = pd.DataFrame(data, columns = ['Centre Name', 'Description', 'Subsidized', 'Address', 'Latitude', 'Longitude'])

#df1.dropna(axis=0, how = 'any', inplace = True)

10 .  Absorbent Minds (Op1756925 Ontario Inc.)  	 No 	 43.784063 	 -79.1698905
20 .  Albion Heights School Age YMCA 	 Yes 	  	 
30 .  Alternative Primary School Parent Group 	 Yes 	  	 
40 .  Antica Village Children's Day Care Centre (646508 Ontario Ltd) 	 Yes 	 43.7734741 	 -79.5233442
50 .  Ascot Avenue Community Daycare  	 Yes 	  	 
60 .  Banting And Best Child Care Centre (380 Goldhawk) 	 Yes 	 43.8267879 	 -79.2798585
70 .  Beechgrove Sunshine Child Care 	 No 	  	 
80 .  Blacksmith Child Care Centre 	 Yes 	  	 
90 .  Blue Bell Academy Inc.  	 No 	  	 
100 .  Bowmore Child Enrichment Centre (OpEarl Haig)(80 Bowmore Road) 	 Yes 	 43.6777766 	 -79.3144026
110 .  Bright Minds Childcare & Montessori School  	 No 	 43.7138075 	 -79.3073254
120 .  Brookside Child Care Centre  	 Yes 	  	 
130 .  Candy Factory Swallows Day Care (The) 	 Yes 	 43.6572069 	 -79.4473515
140 .  Castlebar YMCA  	 Yes 	  	 
150 .  Centennial College East York Day Care (Plains Rd) 	 Yes 	 43.6951878 	 -79.33208739

In [35]:
df1.to_csv('Toronto Licensed Child Care Centres.csv')

In [36]:
df = df1[df1['Latitude'] != '']
df.to_csv('Toronto_Licensed_Daycare_clean.csv')

### Checking Data Collected

In [19]:
df = pd.read_csv('Toronto_Licensed_Daycare_clean.csv')
df.drop('Unnamed: 0', axis=1, inplace = True)
df['Subsidized_color'] = df['Subsidized'].apply(lambda x: 'red' if x == 'Yes' else 'green' )
df.sample(10)

Unnamed: 0,Centre Name,Description,Subsidized,Address,Latitude,Longitude,Subsidized_color
495,Scarborough Citadel Child Care,"This non-profit, licensed child care, s...",Yes,"2021 Lawrence Ave E (Lawrence / Warden), Toron...",43.746297,-79.294565,red
567,Teach Me To Fly Preschool,"This commercial, licensed child care, s...",No,"2560 Gerrard St E, Toronto, Ontario",43.689183,-79.278422,green
223,Grand Avenue Montessori School - 600,"This commercial, licensed child care, s...",No,"600 The Queensway (Queensway / Park Lawn Rd.),...",43.629454,-79.490412,green
573,Tfs Canada's International School (Mildenhall ...,"This French language, non-profit...",No,101 Mildenhall Rd (Lawrence Ave East & Bayview...,43.727267,-79.381567,green
84,Casa Vera Montessori School,"This commercial, licensed child care, s...",No,"2000 Keele St (Keele St. / Gulliver Rd.), Toro...",43.697365,-79.475881,green
210,General Mercer Early Learning Centre,"This non-profit, licensed child care, s...",Yes,"30 Turnberry Ave (Old Weston/St. Clair), Toron...",43.679068,-79.464016,red
75,Brookhaven Child Care,"This non-profit, licensed child care, s...",Yes,"70 Brookhaven Dr (Black Creek / Lawrence W.), ...",43.702114,-79.494889,red
242,Heritage Child Care & Early Learning Centre,"This non-profit, licensed child care, s...",Yes,"44 Port Royal Trail (Brimley / McNicoll), Toro...",43.819475,-79.287661,red
160,Earl Haig Community Day Care (15 Earl Haig Ave...,"This non-profit, licensed child care, s...",Yes,"15 Earl Haig Ave (Danforth Ave. / Coxwell), To...",43.681287,-79.321338,red
164,East Scarb. Child Care Centre - East Scarb. Bo...,"This non-profit, licensed child care, s...",Yes,230 Morningside Ave (Morningside / Lawrence E....,43.766922,-79.186554,red


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 657 entries, 0 to 656
Data columns (total 7 columns):
Centre Name         657 non-null object
Description         657 non-null object
Subsidized          657 non-null object
Address             657 non-null object
Latitude            657 non-null float64
Longitude           657 non-null float64
Subsidized_color    657 non-null object
dtypes: float64(2), object(5)
memory usage: 36.0+ KB


In [3]:
import folium
from folium.plugins import MarkerCluster

In [11]:
# import IPython.display.display

from IPython.display import display 

In [29]:
location = np.mean(df['Latitude'].values.tolist()[0]), np.mean(df['Longitude'].values.tolist()[0])

locationlist = df[['Latitude', 'Longitude']].values.tolist()

labels = df['Centre Name']

subsidy = df['Subsidized_color']

map = folium.Map(location = location, 
                 zoom_start=12,
                tiles = 'OpenStreetMap')

marker_cluster = MarkerCluster().add_to(map)

for point in range(len(locationlist)):
                  popup = folium.Popup(labels[point], parse_html = True, max_width=500)
                  folium.Marker(locationlist[point], 
                                popup=popup, 
                               icon = folium.Icon(color = subsidy[point], icon='fas fa-child', prefix='fa')).add_to(marker_cluster)
display(map)