# Test Week 3 First Part

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
html_data = requests.get(url).text
soup = BeautifulSoup(html_data, "lxml")

table_contents = []
table = soup.find("table")
for row in table.findAll("td"):
    cell = {}
    if row.span.text == "Not assigned":
        pass
    else:
        cell["CódigoPostal"] = row.p.text[:3]
        cell["Municipio"] = (row.span.text).split("(")[0]
        cell["Vecindario"] = (((((row.span.text).split("(")[1]).strip(")")).replace(" /", ",")).replace(")", " ")).strip(" ")
        table_contents.append(cell)

df = pd.DataFrame(table_contents)
df["Municipio"] = df["Municipio"].replace({"Downtown TorontoStn A PO Boxes25 The Esplanade": "Downtown Toronto Stn A",
                                           "East TorontoBusiness reply mail Processing Centre969 Eastern": "East Toronto Business",
                                           "EtobicokeNorthwest": "Etobicoke Northwest",
                                           "East YorkEast Toronto": "East York/East Toronto",
                                           "MississaugaCanada Post Gateway Processing Centre": "Mississauga"})

print(df)

# Imprimir el número de filas del marco de datos
print("\nNúmero de filas en el marco de datos: ", df.shape[0])


    CódigoPostal              Municipio  \
0            M3A             North York   
1            M4A             North York   
2            M5A       Downtown Toronto   
3            M6A             North York   
4            M7A           Queen's Park   
..           ...                    ...   
98           M8X              Etobicoke   
99           M4Y       Downtown Toronto   
100          M7Y  East Toronto Business   
101          M8Y              Etobicoke   
102          M8Z              Etobicoke   

                                            Vecindario  
0                                            Parkwoods  
1                                     Victoria Village  
2                            Regent Park, Harbourfront  
3                     Lawrence Manor, Lawrence Heights  
4                        Ontario Provincial Government  
..                                                 ...  
98       The Kingsway, Montgomery Road, Old Mill North  
99                         

# Test Week 3 Second Part

In [2]:
import pandas as pd

# Cargar el archivo CSV de coordenadas
coordinates_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv"
coordinates_df = pd.read_csv(coordinates_url)

# Renombrar la columna 'Postal Code' a 'CódigoPostal'
coordinates_df.rename(columns={"Postal Code": "CódigoPostal"}, inplace=True)

# Combinar el DataFrame original 'df' con el DataFrame de coordenadas 'coordinates_df' usando la columna 'CódigoPostal'
df_with_coordinates = pd.merge(df, coordinates_df, on="CódigoPostal")

print(df_with_coordinates)


    CódigoPostal              Municipio  \
0            M3A             North York   
1            M4A             North York   
2            M5A       Downtown Toronto   
3            M6A             North York   
4            M7A           Queen's Park   
..           ...                    ...   
98           M8X              Etobicoke   
99           M4Y       Downtown Toronto   
100          M7Y  East Toronto Business   
101          M8Y              Etobicoke   
102          M8Z              Etobicoke   

                                            Vecindario   Latitude  Longitude  
0                                            Parkwoods  43.753259 -79.329656  
1                                     Victoria Village  43.725882 -79.315572  
2                            Regent Park, Harbourfront  43.654260 -79.360636  
3                     Lawrence Manor, Lawrence Heights  43.718518 -79.464763  
4                        Ontario Provincial Government  43.662301 -79.389494  
..       

# Test Week 3 Third Part 


In [3]:
import folium
import requests
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

# Filtrar los municipios que contienen la palabra 'Toronto'
toronto_data = df_with_coordinates[df_with_coordinates['Municipio'].str.contains('Toronto')].reset_index(drop=True)

# Coordenadas de Toronto
latitude = 43.6532
longitude = -79.3832

# Crear mapa de Toronto con los vecindarios marcados
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Vecindario']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

# Mostrar mapa
map_toronto

In [4]:
# Cantidad de agrupaciones
k = 5

toronto_grouped_clustering = toronto_data.drop(['CódigoPostal', 'Municipio', 'Vecindario'], axis=1)


# Ejecutar KMeans
kmeans = KMeans(n_clusters=k, random_state=0).fit(toronto_grouped_clustering)

# Agregar las etiquetas de agrupamiento al DataFrame
toronto_data.insert(0, 'EtiquetaAgrupamiento', kmeans.labels_)

# Función para obtener colores
def get_color(n, max_n):
    norm = colors.Normalize(vmin=0, vmax=max_n)
    cmap = cm.get_cmap("viridis")
    return cmap(norm(n))

# Crear mapa de agrupaciones de vecindarios
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, label, cluster in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Vecindario'], toronto_data['EtiquetaAgrupamiento']):
    label = folium.Popup(f"Cluster {cluster}: {label}", parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=get_color(cluster, k),
        fill=True,
        fill_color=get_color(cluster, k),
        fill_opacity=0.7,
        parse_html=False).add_to(map_clusters)

# Mostrar mapa de agrupaciones
map_clusters
