In [1]:
import pandas as pd
import geopandas as gpd
import folium
from json import loads, dumps
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from shapely.geometry import Point
import time
import googlemaps
import osmnx as ox
from sklearn.cluster import KMeans
import numpy as np

In [123]:
df = pd.read_csv('ibsj_sector.csv')
df = df.drop(['prefix', 'name', 'city', 'state'], axis=1)
#df = df[:39]

null_address = len(df.loc[df['address_1'].isnull()])
print("El archivo tiene {} registros".format(len(df)))
#print("Hay {} direcciones vacias en el archivo".format(null_address))

df = df[df['address_1'].notna()]
df = df[df['address_1'] != 'Actualizar']
df['address_1'] = df['address_1'].replace({'#':' No. ', 'C/':'Calle '}, regex=True)
df = df.reset_index()
print("Luego de eliminar los registros sin direccion, quedan {} registros".format(len(df)))



El archivo tiene 772 registros
Luego de eliminar los registros sin direccion, quedan 657 registros


In [124]:

# Configurar la API de Google Maps
API_KEY = "AIzaSyDE6vYAGZphN5uHNma_0Vg6uMrIBGu5Aeg"  # Reemplaza con tu clave de Google Maps
gmaps = googlemaps.Client(key=API_KEY)

# Función para obtener coordenadas y datos extra
def obtener_datos_direccion(direccion):
    try:
        geocode_result = gmaps.geocode(direccion, components={"country": "DO"})  # Restringido a República Dominicana
        if geocode_result:
            location = geocode_result[0]
            lat = location["geometry"]["location"]["lat"]
            lng = location["geometry"]["location"]["lng"]
            address_components = {comp["types"][0]: comp["long_name"] for comp in location["address_components"]}

            return pd.Series({
                "geometry": Point(lng, lat),
                "calle": address_components.get("route", ""),
                "numero": address_components.get("street_number", ""),
                "barrio": address_components.get("sublocality", address_components.get("political", address_components.get("sublocality_level_1", ""))),
                "ciudad": address_components.get("locality", address_components.get("administrative_area_level_2", "")),
                "estado": address_components.get("administrative_area_level_1", ""),
                "pais": address_components.get("country", ""),
                "codigo_postal": address_components.get("postal_code", "")
            })
    except Exception as e:
        print(f"Error con {direccion}: {e}")

    # Si falla, devolver valores vacíos
    return pd.Series({"geometry": None, "calle": "", "numero": "", "barrio": "", "ciudad": "", "estado": "", "pais": "", "codigo_postal": ""})


# Aplicar la función a cada dirección
df_info = df["address_1"].apply(obtener_datos_direccion)

# Unir los datos con el dataframe original
df = pd.concat([df, df_info], axis=1)

# Convertir a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")

# Eliminar filas sin coordenadas
#gdf = gdf[gdf["geometry"].notna()]

# Guardar resultado en un archivo CSV
gdf.to_csv("coordenadas_resultado.csv", index=False, encoding="utf-8")


In [131]:

def get_cluster_color(cluster_label):
    colors = [
        'purple', 'black', 'cadetblue', 'pink', 'red', 'blue', 'darkgreen', 
        'darkred', 'lightgreen', 'orange', 'beige', 'darkpurple', 'darkblue', 'green', 'gray', 'lightgray', 'lightred', 'lightblue'
    ]
    return colors[cluster_label]

# Cargar el archivo CSV con coordenadas y datos extra
df = gdf

# Asegurar que las coordenadas estén en formato correcto
df["geometry"] = gpd.points_from_xy(df["geometry"].apply(lambda x: x.x), df["geometry"].apply(lambda x: x.y))
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")

# Crear un mapa centrado en República Dominicana
mapa = folium.Map(location=[18.7357, -70.1627], zoom_start=9)

# Extraer las coordenadas (latitud y longitud)
coordinates = np.array([(point.y, point.x) for point in gdf.geometry])

# Aplicar KMeans para clasificar los puntos en clusters
kmeans = KMeans(n_clusters=8)  # Cambia el número de clusters (5 en este caso)
gdf['cluster'] = kmeans.fit_predict(coordinates)


gdf_barrios = ox.features_from_place(
    "Ozama,  Dominican Republic",
    {"boundary": "administrative","admin_level": "6"}
)


# gdf_barrios = gdf_barrios[gdf_barrios['admin_level'] == '6']
gdf_barrios = gdf_barrios[gdf_barrios['name'].isin(
            ['Pedro Brand','Santo Domingo Norte', 
             'Distrito Nacional', 'Boca Chica',           
             'Los Alcarrizos','San Antonio de Guerra',
             'San Luis','Santo Domingo Este',
             'Santo Domingo Norte','Santo Domingo Oeste']
)]

# Agregar cada polígono al mapa
# for _, row in gdf_barrios.iterrows():
#     if row.geometry is not None:
#         folium.GeoJson(row.geometry, tooltip=row.get("name")).add_to(mapa)

gdf["poligono"] = gdf.geometry.apply(
    lambda punto: gdf_barrios[gdf_barrios.contains(punto)]["name"].values[0] 
    if not gdf_barrios[gdf_barrios.contains(punto)].empty else "Fuera de cualquier polígono"
)

# Agregar puntos al mapa con información detallada
for idx, row in gdf.iterrows():
    popup_text = f"""
    <b>Dirección Original:</b> {row["address_1"]}<br>
    <b>Nombre:</b> {row["first_name"]}<br>
    <b>Apellido:</b> {row["last_name"]}<br>
    <b>Calle:</b> {row["calle"]}<br>
    <b>Número:</b> {row["numero"]}<br>
    <b>Barrio:</b> {row["barrio"]}<br>
    <b>Ciudad:</b> {row["poligono"]}<br>
    <b>País:</b> {row["pais"]}<br>
    <b>Código Postal:</b> {row["codigo_postal"]}<br>
    <b>Cluster:</b> {row["cluster"]}
    """

    marker_color = get_cluster_color(row['cluster'])
    
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],  # Latitud, Longitud
        popup=folium.Popup(popup_text, max_width=300),  # Información al hacer clic
        tooltip=row["address_1"],  # Muestra la dirección al pasar el mouse
        icon=folium.Icon(color=marker_color, icon="info-sign")  # Color y estilo del marcador
    ).add_to(mapa)

# Guardar el mapa como HTML y mostrarlo
mapa.save("mapa_interactivo.html")

mapa


In [126]:
gdf.head(10)
gdf.poligono.value_counts()

poligono
Distrito Nacional              500
Santo Domingo Norte             58
Santo Domingo Este              51
Santo Domingo Oeste             26
Los Alcarrizos                  16
Pedro Brand                      5
Fuera de cualquier polígono      1
Name: count, dtype: int64

In [6]:
gdf['cluster'].value_counts()

cluster
1     135
14    108
0      78
5      52
9      37
11     35
16     33
4      28
13     28
2      27
12     27
15     23
6      22
17     15
10      8
7       4
3       2
8       1
Name: count, dtype: int64

In [96]:
gdf.loc[gdf['poligono'] == 'Fuera de cualquier polígono'].to_csv('arreglar.csv',index=False)

In [None]:
#147

In [122]:
gmaps = googlemaps.Client(key=API_KEY)
direccion = "INAPERSA I, Altos de Arroyo Hondo"  # Reemplázala con una dirección específica
geocode_result = gmaps.geocode(direccion, components={"country": "DO"})

location = geocode_result[0]
lat = location["geometry"]["location"]["lat"]
lng = location["geometry"]["location"]["lng"]
address_components = {comp["types"][0]: comp["long_name"] for comp in location["address_components"]}


df_test = pd.DataFrame(data={'lat': [lat], 'lng': [lng]})
df_test['geometry'] = Point(lng, lat)

mapa = folium.Map(location=[18.7357, -70.1627], zoom_start=9)
folium.Marker(
           location=[df_test.loc[0,'geometry'].y, df_test.loc[0,'geometry'].x]
        ).add_to(mapa)

mapa

In [331]:

import geopandas as gpd
import folium

# Descargar los polígonos de los barrios de Santo Domingo
gdf_barrios = ox.features_from_place(
    "Ozama,  Dominican Republic",
    {"boundary": "administrative","admin_level": "6"}
)
# gdf_barrios = ox.features_from_place(
#     "Santo Domingo, Dominican Republic",
#     {"boundary": "administrative", "admin_level": "10"}
# )

# gdf_barrios = gdf_barrios[gdf_barrios['admin_level'] == '6']
gdf_barrios = gdf_barrios[gdf_barrios['name'].isin(
            ['Pedro Brand','Santo Domingo Norte', 
             'Distrito Nacional', 'Boca Chica',           
             'Los Alcarrizos','San Antonio de Guerra',
             'San Luis','Santo Domingo Este',
             'Santo Domingo Norte','Santo Domingo Oeste']
)]

# gdf_barrios_filtered = gdf_barrios[gdf_barrios['name'].isin(
#     ['Distrito Nacional', 'Santo Domingo Este', 'Santo Domingo Oeste', 'Santo Domingo Norte', 'Los Alcarrizos'])]

# Crear un mapa centrado en Santo Domingo
mapa = folium.Map(location=[18.5, -69.9], zoom_start=9)

# Agregar cada polígono al mapa
for _, row in gdf_barrios.iterrows():
    if row.geometry is not None:
        folium.GeoJson(row.geometry, tooltip=row.get("name")).add_to(mapa)

# Mostrar el mapa
mapa.save("mapa_barrios_santo_domingo.html")
mapa


In [326]:
gdf_barrios


Unnamed: 0_level_0,Unnamed: 1_level_0,geometry,admin_level,name,name:am,name:ar,name:azb,name:be,name:be-tarask,name:bg,name:bo,...,official_name:pa,official_name:pl,official_name:sr,official_name:ug,official_name:ur,official_name:zh,short_name,short_name:en,timezone,one_munid
element,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
relation,53161,"POLYGON ((-69.99886 18.50277, -69.99867 18.503...",4.0,Distrito Nacional,,,,,,,,...,,,,,,,,,,
relation,307828,"POLYGON ((-72.06963 19.9463, -72.05013 19.9697...",2.0,República Dominicana,ዶሚኒካን ሪፐብሊክ,جمهورية الدومينيكان,دومینیکن جومهوریتی,Дамініканская Рэспубліка,Дамініканская Рэспубліка,Доминиканска република,ཌོ་མི་ནི་ཀན་སྤྱི་མཐུན་རྒྱལ་ཁབ།,...,ਦੋਮੀਨੀਕਾਨਾ ਗਣਰਾਜ,Republika Dominikany,Доминиканска Република,دومىنىكان جۇمھۇرىيەتى,جمہوریہ ڈومینیکن,多明尼加共和國,R.D.,D.R.,America/Santo_Domingo,
relation,3422565,"POLYGON ((-70.1651 18.72795, -70.16379 18.7285...",4.0,Santo Domingo,,,,,,,,...,,,,,,,,,,
relation,7407678,"POLYGON ((-69.99886 18.50277, -69.99867 18.503...",6.0,Santo Domingo de Guzmán,,,,,,,,...,,,,,,,,,,100101.0
relation,7407685,"POLYGON ((-69.89794 18.51432, -69.89787 18.514...",6.0,Santo Domingo Este,,,,,,,,...,,,,,,,,,,103201.0
relation,7407687,"POLYGON ((-70.07111 18.50997, -70.07049 18.509...",6.0,Santo Domingo Oeste,,,,,,,,...,,,,,,,,,,103202.0
relation,7407691,"POLYGON ((-70.01795 18.66002, -70.01793 18.660...",6.0,Santo Domingo Norte,,,,,,,,...,,,,,,,,,,103203.0
relation,7407715,"POLYGON ((-70.09215 18.53496, -70.09189 18.535...",6.0,Los Alcarrizos,,,,,,,,...,,,,,,,,,,103206.0
relation,9608154,"POLYGON ((-70.1651 18.72795, -70.16379 18.7285...",,Ozama,,,,,,,,...,,,,,,,,,,
relation,10326757,"MULTIPOLYGON (((-69.60113 19.08188, -69.60112 ...",,Sureste,,,,,,,,...,,,,,,,,,,


In [305]:
# gdf_barrios[['name', 'admin_level', 'geometry']].head(25)
gdf_barrios.loc[gdf_barrios['name'] == 'Ensanche Naco', ['geometry', 'name', 'place', 'boundary']]

Unnamed: 0_level_0,Unnamed: 1_level_0,geometry,name,place,boundary
element,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
way,530633112,"POLYGON ((-69.93135 18.47548, -69.92678 18.475...",Ensanche Naco,neighbourhood,postal_code
way,530633113,"POLYGON ((-69.92771 18.47835, -69.92598 18.478...",Ensanche Naco,neighbourhood,postal_code
way,530633114,"POLYGON ((-69.92903 18.48262, -69.92752 18.482...",Ensanche Naco,neighbourhood,postal_code
