In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import urllib.request

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

### En esta línea lo que haces es leer la URL para extraer la información ###

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
# El data frame es de 3 columnas : PostalCode, Borough, and Neighborhood
source = urllib.request.urlopen(url).read()

### En esta sección utilizando Beautiul Soup parseamos el HTML previamente extraído ###

In [3]:
soup = BeautifulSoup(source, 'html.parser')

In [4]:
## Datos de la cabecera de la tabla ####

p = soup.find_all('th')
paragraphs = []
for x in p:
    paragraphs.append(str(x))

postcode = paragraphs[0][4:12]
borough  = paragraphs[1][4:11]
vecinda  = paragraphs[2][4:17]


In [5]:
#### Datos de la tabla #####

table = soup.find('table')
table_rows = table.find_all('tr')
l = []

for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    l.append(row)

data = pd.DataFrame(l, columns=[postcode, borough, vecinda])

data= data[data.Borough != 'Not assigned']
data.dropna(inplace=True)

data = data[[postcode,borough,vecinda]]

### Limpiamos el data frame y lo ajustamos a los requerimientos ###

In [6]:
for index_label, row_series in data.iterrows():
    data.at[index_label , 'Neighbourhood'] = row_series['Neighbourhood'].replace("\n", "")

In [7]:
data.loc[data['Neighbourhood']=='Not assigned', ['Neighbourhood']] = 'Queen\'s Park'

In [8]:
data=data.groupby("Postcode").agg(lambda x:','.join(set(x)))

In [9]:
data.loc[data['Neighbourhood']=="Not assigned",'Neighbourhood']=data.loc[data['Neighbourhood']=="Not assigned",'Borough']

In [10]:
data.shape

(103, 2)

In [11]:
print(data)

                   Borough                                      Neighbourhood
Postcode                                                                     
M1B            Scarborough                                      Rouge,Malvern
M1C            Scarborough               Port Union,Highland Creek,Rouge Hill
M1E            Scarborough                    Guildwood,West Hill,Morningside
M1G            Scarborough                                             Woburn
M1H            Scarborough                                          Cedarbrae
M1J            Scarborough                                Scarborough Village
M1K            Scarborough          Ionview,Kennedy Park,East Birchmount Park
M1L            Scarborough                      Clairlea,Golden Mile,Oakridge
M1M            Scarborough      Cliffcrest,Cliffside,Scarborough Village West
M1N            Scarborough                         Cliffside West,Birch Cliff
M1P            Scarborough  Wexford Heights,Scarborough Town Cen

In [12]:
localizacion=pd.read_csv("https://cocl.us/Geospatial_data")
localizacion

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [15]:
data['Latitud']=localizacion['Latitude'].values
data['Longitud']=localizacion['Longitude'].values
data = data.reset_index()
data

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitud,Longitud
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Port Union,Highland Creek,Rouge Hill",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,West Hill,Morningside",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Ionview,Kennedy Park,East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West,Birch Cliff",43.692657,-79.264848


In [17]:
!conda install -c conda-forge folium --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    folium-0.10.0              |             py_0          59 KB  conda-forge
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.3 MB

The following NEW packages will be INSTAL

In [21]:
import folium

longitud=data.Longitud.mean()
latitud=data.Latitud.mean()

mapa = folium.Map(location=[latitud, longitud], zoom_start=11)

for lat, lng, label in zip(data.Latitud, data.Longitud, data.Borough):
    folium.CircleMarker(
            [lat, lng],
            radius=4,
            color='red',
            popup=label,
            fill = True,
            fill_color='blue',
            fill_opacity=0.6
    ).add_to(mapa)

mapa