# DANE 2018 Urban Demographic Data

### Project Data Overview
(DANE Census and Geospatial Data)

In [1]:
import os

In [2]:
os.listdir()

['Stars_Project-Urban_Demographic_Map_Colombia.qgz',
 'Stars_Population_DANE',
 'Stars_Population_DANE.csv',
 'DANE_2018_UrbanDemographicData.ipynb',
 'Stars_Map_Geospatial_Data_Colombia.gpkg',
 'Final Maps',
 'DANE_keywords_es_en_dict.csv',
 'desktop.ini']

In [3]:
# List .gpkg project layers

import fiona

gpkg_file = 'Stars_Map_Geospatial_Data_Colombia.gpkg'

layers = fiona.listlayers(gpkg_file)

list(layers)

['MGN_ANM_SECCION_URBANA',
 'MGN_URB_AREA_CENSAL',
 'MGN_URB_SECTOR',
 'RedVial_OD_5',
 'MGN_MUNICIPIO_POLITICO',
 'Centroides_Cabecera_Municipios',
 'COLOMBIA_Departamentos',
 'COLOMBIA_VIAS',
 'Colombia_Disuelto',
 'COLOMBIA_AMERICA',
 'América',
 'Colombia_Vecinos',
 'Estrellas_DANE',
 'layer_styles']

In [4]:
# Function to get CRS from a layer
def get_crs(layer_name):
    with fiona.open(gpkg_file, layer=layer_name) as layer:
        return layer.crs
    
# Print layer names and their CRS
for layer_name in layers:
    crs = get_crs(layer_name)
    print(f"Layer: {layer_name}, CRS: {crs}")

Layer: MGN_ANM_SECCION_URBANA, CRS: EPSG:4686
Layer: MGN_URB_AREA_CENSAL, CRS: EPSG:4686
Layer: MGN_URB_SECTOR, CRS: EPSG:4686
Layer: RedVial_OD_5, CRS: EPSG:4686
Layer: MGN_MUNICIPIO_POLITICO, CRS: EPSG:4686
Layer: Centroides_Cabecera_Municipios, CRS: EPSG:4686
Layer: COLOMBIA_Departamentos, CRS: EPSG:4686
Layer: COLOMBIA_VIAS, CRS: EPSG:4326
Layer: Colombia_Disuelto, CRS: EPSG:4686
Layer: COLOMBIA_AMERICA, CRS: EPSG:4326
Layer: América, CRS: EPSG:4326
Layer: Colombia_Vecinos, CRS: EPSG:4686
Layer: Estrellas_DANE, CRS: EPSG:4686
Layer: layer_styles, CRS: 


### Opening 2018 DANE Census Data

Preprocessing

In [5]:
# import geopandas as gpd
import pandas as pd

In [6]:
DANE_2018 = "https://www.dane.gov.co/files/censo2018/informacion-tecnica/PERSONAS_DEMOGRAFICO_Cuadros_CNPV_2018.xlsx"

# Load Excel link in a DataFrame
df = pd.read_excel(DANE_2018, engine='openpyxl')

df.keys()

Index(['Unnamed: 0', 'Unnamed: 1'], dtype='object')

In [7]:
# List sheet names or DataFrames contained in the df
df_names = df.keys()  # Or df.columns if stored as columns

print("DataFrames inside df:")
for names in df_names:
    print(names)

DataFrames inside df:
Unnamed: 0
Unnamed: 1


In [8]:
# In case of wanting to extract all the sheets
'''import pandas as pd

excel_dict = {}
excel = pd.ExcelFile(DANE_2018)
for sheet in excel.sheet_names:
    print(sheet)
    columns = excel.parse(sheet).columns
    converters = {col: str for col in columns}
    '''

'import pandas as pd\n\nexcel_dict = {}\nexcel = pd.ExcelFile(DANE_2018)\nfor sheet in excel.sheet_names:\n    print(sheet)\n    columns = excel.parse(sheet).columns\n    converters = {col: str for col in columns}\n    '

In [9]:
excel = pd.ExcelFile(DANE_2018)
sheet_name = '5.1PM'

sheet = excel.parse(sheet_name)

##### Selecting Kewywords

In [22]:
Municipal_Pop = sheet
Municipal_Pop.head(10)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20
0,,,,,,,,,,,...,,,,,,,,,,
1,Censo Nacional de Población y Vivienda - CNPV ...,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,PERSONAS (Demográfico),,,,,,,,,,...,,,,,,,,,,
4,"Población censada en hogares particulares, por...",,,,,,,,,,...,,,,,,,,,,
5,Cuadro 5.1PM Municipal,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
7,2018,,,,,,,,,,...,,,,,,,,,,
8,"Grupos de edad y áreas (Total, Cabecera y Cent...",,,,Total,Relación o parentesco con el Jefe(a) del hogar,,,,,...,,,,,,,,,,
9,,,,,,Jefe(a) del hogar,"Pareja (Conyuge, compañero(a), esposo(a))",Hijo(a),Hijastro(a),Yerno/nuera,...,"Padrastro, madrastra",Suegro(a),Hermano(a),Hermanastro(a),Cuñado(a),Nieto(a),Abuelo(a),Otro pariente,Empleado(a) del servicio doméstico,No pariente


In [23]:
Municipal_Pop.shape

(80994, 21)

### Project Keywords - 2018 DANE Census keys
#### Spanish-English Keywords Dictionary

In [10]:
'''
If you don't want to see the process, 
skip the following sections 'Selecting Keywords' and 'Creating Spanish - English Dictionary ...'
'''

"\nIf you don't want to see the process, \nskip the following sections 'Selecting Keywords' and 'Creating Spanish - English Dictionary ...'\n"

In [11]:
import pandas as pd

In [12]:
DANE_es_en = pd.read_csv('DANE_keywords_es_en_dict.csv', header = 0)
DANE_es_en

Unnamed: 0,Spanish,English
0,PERSONAS (Demográfico),INDIVIDUALS (Demographic)
1,"Grupos de edad y áreas (Total, Cabecera y Cent...","Age groups and areas (Total, Urban and Populat..."
2,Total Nacional,National Total Population
3,Fuente: DANE - Censo Nacional de Población y V...,Source: DANE - National Population and Housing...
4,Cabecera,Municipal Center
5,Centro Poblado,Population Center
6,Rural disperso,Dispersed Rural
7,Departamento y país,Department and country
8,Municipio,Municipality
9,Área Población,Population Area


In [16]:
col_mun = list(Municipal_Pop.columns)
print(col_mun)

['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20']


In [17]:
len(col_mun)

21

In [18]:
header  = Municipal_Pop.iloc[:, 0:22]

In [19]:
keys = []

for i in col_mun:
    unique_values = header[i].unique()
    keys.extend([value for value in unique_values if isinstance(value, str) and not value[0].isdigit()])

# Print the accumulated unique string values
list(keys)

['Censo Nacional de Población y Vivienda - CNPV 2018',
 'PERSONAS (Demográfico)',
 'Población censada en hogares particulares, por relación o parentesco con el jefe(a) de hogar, según municipio, área (Total, Cabecera y Centros poblados y Rural disperso)  y grupo de edad ',
 'Cuadro 5.1PM Municipal',
 'Grupos de edad y áreas (Total, Cabecera y Centros poblados y Rural disperso) y ',
 'Total Nacional',
 'Fuente: DANE - Censo Nacional de Población y Vivienda 2018',
 'Total',
 'Cabecera',
 'Centro Poblado',
 'Rural disperso',
 'Total',
 'Total',
 'Relación o parentesco con el Jefe(a) del hogar',
 'Jefe(a) del hogar',
 'Pareja (Conyuge, compañero(a), esposo(a))',
 'Hijo(a)',
 'Hijastro(a)',
 'Yerno/nuera',
 'Padre/madre',
 'Padrastro, madrastra',
 'Suegro(a)',
 'Hermano(a)',
 'Hermanastro(a)',
 'Cuñado(a)',
 'Nieto(a)',
 'Abuelo(a)',
 'Otro pariente',
 'Empleado(a) del servicio doméstico',
 'No pariente']

In [20]:
keywords = keys[1:11]
keywords

['PERSONAS (Demográfico)',
 'Población censada en hogares particulares, por relación o parentesco con el jefe(a) de hogar, según municipio, área (Total, Cabecera y Centros poblados y Rural disperso)  y grupo de edad ',
 'Cuadro 5.1PM Municipal',
 'Grupos de edad y áreas (Total, Cabecera y Centros poblados y Rural disperso) y ',
 'Total Nacional',
 'Fuente: DANE - Censo Nacional de Población y Vivienda 2018',
 'Total',
 'Cabecera',
 'Centro Poblado',
 'Rural disperso']

In [None]:
drop = ['Cuadro 5.1PM Municipal','Total','Población censada en hogares particulares, por relación o parentesco con el jefe(a) de hogar, según municipio, área (Total, Cabecera y Centros poblados y Rural disperso)  y grupo de edad ']

for i in drop:
    keywords.remove(i)

list(keywords)

In [None]:
add = 'Departamento y país', 'Municipio', 'Área Población', 'Edad Población', 'Cabecera'

for i in add:
    keywords.append(i)
    
list(keywords)

##### Creating Spanish-English Dictionary of Project Keywords in 2018 DANE Census Variables

In [None]:
es_variables = list(pd.Series(keywords).unique())
list(es_variables)

In [None]:
en_variables = ['INDIVIDUALS (Demographic)',
    'Age groups and areas (Total, Urban and Populated Centers, and Rural Dispersed)',
    'National Total Population',
    'Source: DANE - National Population and Housing Census 2018',
    'Municipal Center',
    'Population Center',
    'Dispersed Rural',
    'Department and country',
    'Municipality',
    'Population Area',
    'Population Age',
    ]

list(en_variables)

In [None]:
# Create a Spanish - English Dictionary

dict_es_en = dict(zip(es_variables, en_variables))

dict_es_en

In [None]:
# Save the dictionary to a csv file for easy access outside the notebook

import csv

csv_file = "DANE_keywords_es_en_dict.csv"

# Open the csv file in write mode
with open(csv_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    # Write the header of the CSV
    writer.writerow(['Spanish', 'English'])
    # Write Key-Value pair as a row in CSV
    for key, value in dict_es_en.items():
        writer.writerow([key, value])

print(f"Diccionary correctly exported to {csv_file}")


### Exploring Data - Stars Project

In [24]:
# Variable Municipal_Pop (Still including National Total Population)

Municipal_Pop = Municipal_Pop.iloc[9:80992,0:5]
Municipal_Pop.tail(23) # Range of all values for a location (Municipality, Department or Country level)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4
80969,,,,70 a 74,8
80970,,,,75 a 79,4
80971,,,,80 a 84,1
80972,,,,85 y más,4
80973,,,Rural disperso,Total,36514
80974,,,,0 a 4,5705
80975,,,,5 a 9,5419
80976,,,,10 a 14,4769
80977,,,,15 a 19,4044
80978,,,,20 a 24,3294


In [25]:
Municipal_Pop.columns

Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], dtype='object')

In [26]:
#Rename the columns by their properties

column_names = ['Departamento y país', 'Municipio', 'Área Población', 'Edad Población', 'Total']
Municipal_Pop.columns = column_names

# Reindex rows to start from zero
Municipal_Pop = Municipal_Pop.iloc[1:].reset_index(drop=True)

Municipal_Pop.head()

Unnamed: 0,Departamento y país,Municipio,Área Población,Edad Población,Total
0,Total Nacional,,Total,Total,43835324
1,,,,0 a 4,3033307
2,,,,5 a 9,3319877
3,,,,10 a 14,3579469
4,,,,15 a 19,3788628


In [27]:
# Columns replacement of NaN
''''
In the columns 'Departamento y país' and 'Municipio', 
the name of a specific location is followed by a sequence of NaNs.

This command iterates through the columns 
to replace all subsequent NaNs with the previous row value 
corresponding to the location of the Municipality, Department, or Country.
'''

columns_to_replace = ['Departamento y país', 'Municipio', 'Área Población']

# Iterate through columns
for column in columns_to_replace:
    prev_value = None
    for i, value in enumerate(Municipal_Pop[column]):
        if pd.isna(value):
            if prev_value is not None:
                Municipal_Pop.at[i, column] = prev_value
        else:
            prev_value = value

# Now, the NaNs were filled in the corresponding columns with previous assigned values

In [28]:
Municipal_Pop.head()

Unnamed: 0,Departamento y país,Municipio,Área Población,Edad Población,Total
0,Total Nacional,,Total,Total,43835324
1,Total Nacional,,Total,0 a 4,3033307
2,Total Nacional,,Total,5 a 9,3319877
3,Total Nacional,,Total,10 a 14,3579469
4,Total Nacional,,Total,15 a 19,3788628


In [29]:
list(Municipal_Pop['Área Población'].unique())

['Total', 'Cabecera', 'Centro Poblado', 'Rural disperso']

In [30]:
Municipal_Pop.tail(20)

Unnamed: 0,Departamento y país,Municipio,Área Población,Edad Población,Total
80962,99_Vichada,99773_Cumaribo,Centro Poblado,85 y más,4
80963,99_Vichada,99773_Cumaribo,Rural disperso,Total,36514
80964,99_Vichada,99773_Cumaribo,Rural disperso,0 a 4,5705
80965,99_Vichada,99773_Cumaribo,Rural disperso,5 a 9,5419
80966,99_Vichada,99773_Cumaribo,Rural disperso,10 a 14,4769
80967,99_Vichada,99773_Cumaribo,Rural disperso,15 a 19,4044
80968,99_Vichada,99773_Cumaribo,Rural disperso,20 a 24,3294
80969,99_Vichada,99773_Cumaribo,Rural disperso,25 a 29,2628
80970,99_Vichada,99773_Cumaribo,Rural disperso,30 a 34,2134
80971,99_Vichada,99773_Cumaribo,Rural disperso,35 a 39,1808


In [31]:
Municipal_Pop.columns

Index(['Departamento y país', 'Municipio', 'Área Población', 'Edad Población',
       'Total'],
      dtype='object')

Separating General Data at National Level

In [32]:
National_Pop = Municipal_Pop[Municipal_Pop['Departamento y país'] == 'Total Nacional'].drop(columns=['Departamento y país', 'Municipio'])


In [33]:
National_Pop = National_Pop[National_Pop['Edad Población'] != 'Total']
National_Pop.head()

Unnamed: 0,Área Población,Edad Población,Total
1,Total,0 a 4,3033307
2,Total,5 a 9,3319877
3,Total,10 a 14,3579469
4,Total,15 a 19,3788628
5,Total,20 a 24,3887189


In [34]:
National_Pop.columns

Index(['Área Población', 'Edad Población', 'Total'], dtype='object')

In [35]:
# Grouping only Municipal Center data (Cabecera) at National Level

National_Pop_Cabecera = National_Pop[National_Pop['Área Población'] == 'Cabecera'][['Edad Población', 'Total']]
National_Pop_Cabecera.reset_index()
# National_Pop_Cabecera.head()

Unnamed: 0,index,Edad Población,Total
0,20,0 a 4,2215389
1,21,5 a 9,2403348
2,22,10 a 14,2595088
3,23,15 a 19,2835687
4,24,20 a 24,3071811
5,25,25 a 29,2937101
6,26,30 a 34,2655681
7,27,35 a 39,2546663
8,28,40 a 44,2148068
9,29,45 a 49,2074223


In [36]:
National_Pop_Cabecera.columns

Index(['Edad Población', 'Total'], dtype='object')

# Integrating DANE Census Data with Geospatial Data

#### Working specifically with municipal data

In [37]:
# Selecting information related to Municipal Centers (Cabeceras)

Municipal_Centers = Municipal_Pop[(Municipal_Pop['Área Población']=='Cabecera')
& (Municipal_Pop['Edad Población']=='Total') & (Municipal_Pop['Departamento y país']!='Total Nacional')]

In [38]:
Municipal_Centers = Municipal_Centers.reset_index(drop=True)

Municipal_Centers = Municipal_Centers.rename(columns={'Departamento y país': 'Departamento'})

In [39]:
Municipal_Centers

Unnamed: 0,Departamento,Municipio,Área Población,Edad Población,Total
0,05_Antioquia,05001_Medellín,Cabecera,Total,2320248
1,05_Antioquia,05002_Abejorral,Cabecera,Total,6931
2,05_Antioquia,05004_Abriaquí,Cabecera,Total,708
3,05_Antioquia,05021_Alejandría,Cabecera,Total,2202
4,05_Antioquia,05030_Amagá,Cabecera,Total,12938
...,...,...,...,...,...
1097,97_Vaupés,97666_Taraira,Cabecera,Total,512
1098,99_Vichada,99001_Puerto Carreño,Cabecera,Total,13591
1099,99_Vichada,99524_La Primavera,Cabecera,Total,5171
1100,99_Vichada,99624_Santa Rosalía,Cabecera,Total,2299


Integrating Geographic Information with the DataFrame

In [41]:
import geopandas as gpd

gpkg_file = 'Stars_Map_Geospatial_Data_Colombia.gpkg'

# Read the layer 'Centroides_Cabecera_Municipios' in a GeoDataFrame

''' This layer contains the centroids of Colombian urban areas or municipal centers,
and shows the urban distribution in a more precise manner, compared to when using the centroids
of municipal limits, which include rural areas or other kinds of occupations '''

centroides_gdf = gpd.read_file(gpkg_file, layer='Centroides_Cabecera_Municipios')

In [42]:
centroides_gdf.crs

<Geographic 2D CRS: EPSG:4686>
Name: MAGNA-SIRGAS
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: Colombia - onshore and offshore. Includes San Andres y Providencia, Malpelo Islands, Roncador Bank, Serrana Bank and Serranilla Bank.
- bounds: (-84.77, -4.23, -66.87, 15.51)
Datum: Marco Geocentrico Nacional de Referencia
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [43]:
centroides_gdf.head()

Unnamed: 0,COD_DPTO,COD_MPIO,COD_CLAS,COD_SETR,COD_SECR,COD_CPOB,NOM_CPOB,COD_DANE,CPOB_AREA,ALTITUD,SHAPE_Leng,SHAPE_Area,geometry
0,5,5001,1,0,0,5001000,MEDELLÍN,5001100000000,121.135753,1500.0,1.22011,0.009898,POINT (-75.58178 6.24666)
1,5,5002,1,0,0,5002000,ABEJORRAL,5002100000000,1.238012,2032.0,0.071662,0.000101,POINT (-75.42874 5.78932)
2,5,5004,1,0,0,5004000,ABRIAQUÍ,5004100000000,0.156639,1915.0,0.026742,1.3e-05,POINT (-76.06430 6.63228)
3,5,5021,1,0,0,5021000,ALEJANDRÍA,5021100000000,0.401987,1817.0,0.040012,3.3e-05,POINT (-75.14135 6.37606)
4,5,5030,1,0,0,5030000,AMAGÁ,5030100000000,1.546071,1380.0,0.089251,0.000126,POINT (-75.70204 6.03882)


In [44]:
# Create new columns in 'Municipal_Centers'

''' 
'COD_DPTO' refers to Department Code
'NOM_DPTO' refers to Department Name
'COD_MPIO' refers to Municipality Code
'NOM_CPOB' refers to Municipality Name
'''

Municipal_Centers['COD_DPTO'] = Municipal_Centers['Departamento'].apply(lambda x: x.split('_')[0])
Municipal_Centers['NOM_DPTO'] = Municipal_Centers['Departamento'].apply(lambda x: x.split('_')[1])

Municipal_Centers['COD_MPIO'] = Municipal_Centers['Municipio'].apply(lambda x: x.split('_')[0])
Municipal_Centers['NOM_CPOB'] = Municipal_Centers['Municipio'].apply(lambda x: x.split('_')[1])

# Merge 'Municipal_Centers' with 'centroides_gdf' according to Municipality Code
Cabeceras_gdf = centroides_gdf.merge(Municipal_Centers, on='COD_MPIO', how='inner')

# Drop columnns in 'merged_gdf'
columns_to_drop = ['Departamento', 'Municipio', 'Área Población']
Cabeceras_gdf.drop(columns=columns_to_drop, inplace=True)

# Change column name for 'Total' to 'Municipal Center Population'
Cabeceras_gdf.rename(columns={'Total': 'Población Cabecera'}, inplace=True)


In [45]:
Cabeceras_gdf.head()

Unnamed: 0,COD_DPTO_x,COD_MPIO,COD_CLAS,COD_SETR,COD_SECR,COD_CPOB,NOM_CPOB_x,COD_DANE,CPOB_AREA,ALTITUD,SHAPE_Leng,SHAPE_Area,geometry,Edad Población,Población Cabecera,COD_DPTO_y,NOM_DPTO,NOM_CPOB_y
0,5,5001,1,0,0,5001000,MEDELLÍN,5001100000000,121.135753,1500.0,1.22011,0.009898,POINT (-75.58178 6.24666),Total,2320248,5,Antioquia,Medellín
1,5,5002,1,0,0,5002000,ABEJORRAL,5002100000000,1.238012,2032.0,0.071662,0.000101,POINT (-75.42874 5.78932),Total,6931,5,Antioquia,Abejorral
2,5,5004,1,0,0,5004000,ABRIAQUÍ,5004100000000,0.156639,1915.0,0.026742,1.3e-05,POINT (-76.06430 6.63228),Total,708,5,Antioquia,Abriaquí
3,5,5021,1,0,0,5021000,ALEJANDRÍA,5021100000000,0.401987,1817.0,0.040012,3.3e-05,POINT (-75.14135 6.37606),Total,2202,5,Antioquia,Alejandría
4,5,5030,1,0,0,5030000,AMAGÁ,5030100000000,1.546071,1380.0,0.089251,0.000126,POINT (-75.70204 6.03882),Total,12938,5,Antioquia,Amagá


In [46]:
Cabeceras_gdf.columns

Index(['COD_DPTO_x', 'COD_MPIO', 'COD_CLAS', 'COD_SETR', 'COD_SECR',
       'COD_CPOB', 'NOM_CPOB_x', 'COD_DANE', 'CPOB_AREA', 'ALTITUD',
       'SHAPE_Leng', 'SHAPE_Area', 'geometry', 'Edad Población',
       'Población Cabecera', 'COD_DPTO_y', 'NOM_DPTO', 'NOM_CPOB_y'],
      dtype='object')

In [47]:
# Select Desired Columns, avoind repeated data

Cabeceras_gdf = Cabeceras_gdf.loc[:, ['COD_MPIO', 'COD_CLAS', 'COD_SETR', 'COD_SECR',
                   'COD_CPOB', 'NOM_CPOB_y', 'COD_DANE', 'CPOB_AREA', 'ALTITUD',
                   'SHAPE_Leng', 'SHAPE_Area', 'geometry',
                   'Población Cabecera', 'COD_DPTO_y', 'NOM_DPTO']]

In [48]:
# Rename columns

Cabeceras_gdf = Cabeceras_gdf.rename(columns={'NOM_CPOB_y': 'NOM_CPOB','COD_DPTO_y':'COD_DPTO'})

In [51]:
# Order columns

Cabeceras_gdf = Cabeceras_gdf.loc[:, ['COD_DPTO', 'NOM_DPTO','COD_MPIO',
                  'NOM_CPOB', 'Población Cabecera', 'COD_CLAS', 'COD_SETR',
                  'COD_SECR', 'COD_CPOB', 'COD_DANE', 'CPOB_AREA', 'ALTITUD',
                  'SHAPE_Leng', 'SHAPE_Area', 'geometry']]

In [50]:
Cabeceras_gdf.columns

Index(['COD_DPTO', 'NOM_DPTO', 'COD_MPIO', 'NOM_CPOB', 'Población Cabecera',
       'COD_CLAS', 'COD_SETR', 'COD_SECR', 'COD_CPOB', 'COD_DANE', 'CPOB_AREA',
       'ALTITUD', 'SHAPE_Leng', 'SHAPE_Area', 'geometry'],
      dtype='object')

In [None]:
#!pip install contextily

## Export Geospatial Data

In [None]:
gpkg_file = 'Stars_Map_Geospatial_Data_Colombia.gpkg'
exported_layer_name = 'Estrellas_DANE'

# Export from GeoDataFrame to GeoPackage
Cabeceras_gdf.to_file(gpkg_file, layer=exported_layer_name, driver='GPKG')

print(f'The GeoDataFrame has been exported to {gpkg_file} with layer name {exported_layer_name}.')


In [53]:
# Check layers in the Geopackage

gpkg_file = 'Stars_Map_Geospatial_Data_Colombia.gpkg'


layers = fiona.listlayers(gpkg_file)

list(layers)

['MGN_ANM_SECCION_URBANA',
 'MGN_URB_AREA_CENSAL',
 'MGN_URB_SECTOR',
 'RedVial_OD_5',
 'MGN_MUNICIPIO_POLITICO',
 'Centroides_Cabecera_Municipios',
 'COLOMBIA_Departamentos',
 'COLOMBIA_VIAS',
 'Colombia_Disuelto',
 'COLOMBIA_AMERICA',
 'América',
 'Colombia_Vecinos',
 'Estrellas_DANE',
 'layer_styles']

In [54]:
# Export to CSV file

import geopandas as gpd
import pandas as pd

gpkg_file = 'Stars_Map_Geospatial_Data_Colombia.gpkg'

# Leer la capa 'Estrellas_DANE' en un GeoDataFrame
Estrellas_gdf = gpd.read_file(gpkg_file, layer='Estrellas_DANE')

#Exportar en archivo .csv:

Estrellas_gdf.to_csv('Stars_Population_DANE.csv', sep=',')

#### Continue with the layout design in QGIS