# Chile datos territoriales

In [1]:
import pandas as pd


def apply_replacements(text, replacements):
    for wrong, correct in replacements.items():
        text = text.replace(wrong, correct)
    return text

## Carga de DataFrames

In [None]:
ley_regiones_df = pd.read_csv("sources/ley-regiones.csv", dtype=str)

ley_regiones_df

In [None]:
observatoriologistico_df = pd.read_csv("sources/observatoriologistico.csv", dtype=str)

observatoriologistico_df

In [None]:
replacements = {
    "Aisén del General Carlos Ibañez del Campo": "Aysén del General Carlos Ibáñez del Campo",
    "Magallanes": "Magallanes y de la Antártica Chilena",
    "Región Metropolitana de Santiago": "Metropolitana de Santiago",
}

iso_df = pd.read_csv("sources/iso.csv")
iso_df["corrected_name"] = iso_df["Subdivision name"].apply(
    lambda x: apply_replacements(x, replacements)
)
iso_df

## Transformaciones

In [None]:
# Preprocesar iso_df para hacer la correspondencia más directa y limpiar el código ISO
iso_df["clean_iso_code"] = iso_df["3166-2 code"].str.replace("*", "")

# Unir los dataframes en los nombres corregidos y necesarios
merged_df = observatoriologistico_df.merge(
    iso_df[["corrected_name", "clean_iso_code"]],
    left_on="Nombre Región",
    right_on="corrected_name",
    how="left",
)
merged_df = merged_df.merge(
    ley_regiones_df[["código", "nombre"]],
    left_on="Código Región",
    right_on="código",
    how="left",
)
merged_df

## Construcción de Regiones CSV

In [None]:
# Seleccionar y renombrar las columnas necesarias para el DataFrame final
regions_df = merged_df[
    ["Código Región", "nombre", "Nombre Región", "Abreviatura Región", "clean_iso_code"]
]
regions_df.columns = ["id", "name", "short_name", "abbreviation", "iso_code"]
# Pylance:
regions_df = regions_df.drop_duplicates(subset=["id"], keep="last")

# Guardar a CSV y mostrar el DataFrame
regions_df.to_csv("csv/regions.csv", index=False)
regions_df

## Construcción de Provincias CSV

In [None]:
provinces_df = observatoriologistico_df[
    ["Código Provincia", "Nombre Provincia", "Código Región"]
].copy()
provinces_df.columns = ["id", "name", "region_id"]

provinces_df = provinces_df.drop_duplicates(subset=["id"], keep="last")

provinces_df.to_csv("csv/provinces.csv", index=False)
provinces_df

## Construcción de Comunas CSV

In [None]:
communes_df = observatoriologistico_df[
    ["Código Comuna 2018", "Nombre Comuna", "Código Provincia"]
].copy()
communes_df.columns = ["id", "name", "province_id"]

communes_df.to_csv("csv/communes.csv", index=False)
communes_df

## Construcción de JSON

In [None]:
import json

# Agrupar comunas por provincia_id y construir el diccionario de comunas
communes_grouped = communes_df.groupby("province_id")
provinces_df["communes"] = provinces_df["id"].apply(
    lambda x: {
        row["id"]: {"id": row["id"], "name": row["name"]}
        for _, row in communes_grouped.get_group(x).iterrows()
        if x in communes_grouped.groups
    }
)

# Agrupar provincias por region_id y construir el diccionario de provincias
provinces_grouped = provinces_df.groupby("region_id")
regions_df["provinces"] = regions_df["id"].apply(
    lambda x: {
        row["id"]: {"id": row["id"], "name": row["name"], "communes": row["communes"]}
        for _, row in provinces_grouped.get_group(x).iterrows()
        if x in provinces_grouped.groups
    }
)

# Construir el diccionario final para el país, asumiendo que el país es Chile
cl_country = {
    "country": "Chile",
    "regions": {
        row["id"]: {
            "id": row["id"],
            "name": row["name"],
            "short_name": row["short_name"],
            "abbreviation": row["abbreviation"],
            "iso_code": row["iso_code"],
            "provinces": row["provinces"],
        }
        for _, row in regions_df.iterrows()
    },
}

# Convertir el diccionario a JSON
json_data = json.dumps(cl_country, ensure_ascii=False, indent=2)

# Guardar JSON en un archivo
with open("json/data.json", "w", encoding="utf-8") as f:
    f.write(json_data)

# Imprimir el JSON para verificar
print(json_data)

## Construcción de Región SQL

In [14]:
# Definir el modelo de la tabla
table_model = """
CREATE TABLE region (
  id VARCHAR(2) PRIMARY KEY,
  name VARCHAR(255),
  short_name VARCHAR(255),
  abbreviation VARCHAR(10),
  iso_code VARCHAR(10)
);
"""

# Generar las sentencias INSERT
insert_statements = ""
for _, row in regions_df.iterrows():
    insert_statements += f"INSERT INTO region (id, name, short_name, abbreviation, iso_code) VALUES ('{row['id']}', '{row['name'].replace("'","''")}', '{row['short_name'].replace("'","''")}', '{row['abbreviation']}', '{row['iso_code']}');\n"

# Combinar el modelo de la tabla y las sentencias INSERT
sql_script = table_model + "\n" + insert_statements

# Guardar el script SQL en un archivo
with open("sql/regions.sql", "w", encoding="utf-8") as f:
    f.write(sql_script)

# Imprimir el script SQL para verificar
print(sql_script)


CREATE TABLE region (
  id VARCHAR(2) PRIMARY KEY,
  name VARCHAR(255),
  short_name VARCHAR(255),
  abbreviation VARCHAR(10),
  iso_code VARCHAR(10)
);

INSERT INTO region (id, name, short_name, abbreviation, iso_code) VALUES ('01', 'Región de Tarapacá', 'Tarapacá', 'TPCA', 'CL-TA');
INSERT INTO region (id, name, short_name, abbreviation, iso_code) VALUES ('02', 'Región de Antofagasta', 'Antofagasta', 'ANTOF', 'CL-AN');
INSERT INTO region (id, name, short_name, abbreviation, iso_code) VALUES ('03', 'Región de Atacama', 'Atacama', 'ATCMA', 'CL-AT');
INSERT INTO region (id, name, short_name, abbreviation, iso_code) VALUES ('04', 'Región de Coquimbo', 'Coquimbo', 'COQ', 'CL-CO');
INSERT INTO region (id, name, short_name, abbreviation, iso_code) VALUES ('05', 'Región de Valparaíso', 'Valparaíso', 'VALPO', 'CL-VS');
INSERT INTO region (id, name, short_name, abbreviation, iso_code) VALUES ('06', 'Región del Libertador General Bernardo O''Higgins', 'Libertador General Bernardo O''Higgins', 

In [15]:
# Definir el modelo de la tabla
provinces_table_model = """
CREATE TABLE province (
  id VARCHAR(3) PRIMARY KEY,
  name VARCHAR(255),
  region_id VARCHAR(2),
  FOREIGN KEY (region_id) REFERENCES region(id)
);
"""

# Generar las sentencias INSERT
provinces_insert_statements = ""
for _, row in provinces_df.iterrows():
    provinces_insert_statements += f"INSERT INTO province (id, name, region_id) VALUES ('{row['id']}', '{row['name'].replace("'","''")}', '{row['region_id']}');\n"

# Combinar el modelo de la tabla y las sentencias INSERT
provinces_sql_script = provinces_table_model + "\n" + provinces_insert_statements

# Guardar el script SQL en un archivo
with open("sql/provinces.sql", "w", encoding="utf-8") as f:
    f.write(provinces_sql_script)

# Imprimir el script SQL para verificar
print(provinces_sql_script)


CREATE TABLE province (
  id VARCHAR(3) PRIMARY KEY,
  name VARCHAR(255),
  region_id VARCHAR(2),
  FOREIGN KEY (region_id) REFERENCES region(id)
);

INSERT INTO province (id, name, region_id) VALUES ('011', 'Iquique', '01');
INSERT INTO province (id, name, region_id) VALUES ('014', 'Tamarugal', '01');
INSERT INTO province (id, name, region_id) VALUES ('021', 'Antofagasta', '02');
INSERT INTO province (id, name, region_id) VALUES ('022', 'El Loa', '02');
INSERT INTO province (id, name, region_id) VALUES ('023', 'Tocopilla', '02');
INSERT INTO province (id, name, region_id) VALUES ('031', 'Copiapó', '03');
INSERT INTO province (id, name, region_id) VALUES ('032', 'Chañaral', '03');
INSERT INTO province (id, name, region_id) VALUES ('033', 'Huasco', '03');
INSERT INTO province (id, name, region_id) VALUES ('041', 'Elqui', '04');
INSERT INTO province (id, name, region_id) VALUES ('042', 'Choapa', '04');
INSERT INTO province (id, name, region_id) VALUES ('043', 'Limarí', '04');
INSERT INT

In [17]:
# Definir el modelo de la tabla
communes_table_model = """
CREATE TABLE commune (
  id VARCHAR(5) PRIMARY KEY,
  name VARCHAR(255),
  province_id VARCHAR(3),
  FOREIGN KEY (province_id) REFERENCES province(id)
);
"""

# Generar las sentencias INSERT
communes_insert_statements = ""
for _, row in communes_df.iterrows():
    communes_insert_statements += f"INSERT INTO commune (id, name, province_id) VALUES ('{row['id']}', '{row['name'].replace("'","''")}', '{row['province_id']}');\n"

# Combinar el modelo de la tabla y las sentencias INSERT
communes_sql_script = communes_table_model + "\n" + communes_insert_statements

# Guardar el script SQL en un archivo
with open("sql/communes.sql", "w", encoding="utf-8") as f:
    f.write(communes_sql_script)

# Imprimir el script SQL para verificar
print(communes_sql_script)


CREATE TABLE commune (
  id VARCHAR(5) PRIMARY KEY,
  name VARCHAR(255),
  province_id VARCHAR(3),
  FOREIGN KEY (province_id) REFERENCES province(id)
);

INSERT INTO commune (id, name, province_id) VALUES ('01101', 'Iquique', '011');
INSERT INTO commune (id, name, province_id) VALUES ('01107', 'Alto Hospicio', '011');
INSERT INTO commune (id, name, province_id) VALUES ('01401', 'Pozo Almonte', '014');
INSERT INTO commune (id, name, province_id) VALUES ('01402', 'Camiña', '014');
INSERT INTO commune (id, name, province_id) VALUES ('01403', 'Colchane', '014');
INSERT INTO commune (id, name, province_id) VALUES ('01404', 'Huara', '014');
INSERT INTO commune (id, name, province_id) VALUES ('01405', 'Pica', '014');
INSERT INTO commune (id, name, province_id) VALUES ('02101', 'Antofagasta', '021');
INSERT INTO commune (id, name, province_id) VALUES ('02102', 'Mejillones', '021');
INSERT INTO commune (id, name, province_id) VALUES ('02103', 'Sierra Gorda', '021');
INSERT INTO commune (id, n