Imports

In [1]:
from pathlib import Path
import datetime
import geopandas as gpd
import pandas as pd

In [2]:
def get_shapefiles(root_dir='.'):
    """
    Recursively finds all .shp files in a directory and its subdirectories.
    
    Args:
        root_dir (str): The path to the starting directory.
        
    Returns:
        dictionary: A dictionary where:
            - Key = filename without extension (string, e.g., 'my_map')
            - Value = Full Path object (Path, e.g., 'C:/Data/2022/my_map.shp')
    """
    
    # Iterate over all .shp files
    return { path.stem : path for path in Path(root_dir).rglob('*.shp')}



In [3]:
def create_excel(shapefiles, output_folder='data_miteco_excel'):
    """
    Converts a dictionary of Shapefile paths into Excel files, dropping geometric data.

    Args:
        shapefiles (dict): A dictionary where keys are filenames (str) and 
                           values are file paths (Path or str).
        output_folder (str, optional): The directory where Excel files will be saved. 
                                       Defaults to 'data_miteco_excel'.

    Returns:
        None: This function does not return a value; it saves files to disk.
    """

    folder = Path(output_folder)
    folder.mkdir(parents=True, exist_ok=True)
    
    for name, path in shapefiles.items():
        try:
            # Load shapefile
            gdf = gpd.read_file(path)
    
            # Drop geometry (keep only tabular data)
            table = gdf.drop(columns='geometry', errors='ignore')
    
            # Save to Excel
            excel_path = folder / (name + '.xlsx')
            
            with pd.ExcelWriter(excel_path, engine='openpyxl') as writer:
                table.to_excel(writer, index=False)
                
            print(f"Successfully saved: {excel_path}")

        except Exception as e:
            print(f"Error processing {name}: {e}")

In [6]:
def extract_data(shapefiles, join_key='natcode'):
    master_gdf = None

    for name, path in shapefiles.items():
        # 1. Read file
        current_gdf = gpd.read_file(path)
        
        # 2. Check for the key
        if join_key not in current_gdf.columns:
            continue

        if master_gdf is None:
            # First file sets the standard
            master_gdf = current_gdf
        else:
            # 3. INTELLIGENT COLUMN SELECTION
            # We only keep columns that are NOT already in the master_gdf
            # (But we MUST keep the join_key to perform the merge)
            cols_to_use = [join_key] # Start with the key
            
            for col in current_gdf.columns:
                if col != join_key and col not in master_gdf.columns:
                     cols_to_use.append(col)
            
            # 4. Prepare the slice to merge
            # We only take the new columns + the key
            data_to_merge = pd.DataFrame(current_gdf)[cols_to_use]
            
            # 5. Merge
            master_gdf = master_gdf.merge(
                data_to_merge, 
                on=join_key, 
                how='outer'
            )
            
    return master_gdf
        

In [5]:
shapefiles = get_shapefiles()
create_excel(shapefiles)

Successfully saved: data_miteco_excel\PctAutoc_2022.xlsx
Successfully saved: data_miteco_excel\CifraPob2023.xlsx
Successfully saved: data_miteco_excel\DensPob2023.xlsx
Successfully saved: data_miteco_excel\EdadMedia2023.xlsx
Successfully saved: data_miteco_excel\NumAfi_2022.xlsx
Successfully saved: data_miteco_excel\NumParados_2022.xlsx
Successfully saved: data_miteco_excel\PctPob65_2022.xlsx
Successfully saved: data_miteco_excel\PctPob16_2021.xlsx
Successfully saved: data_miteco_excel\PctPobExt_2022.xlsx
Successfully saved: data_miteco_excel\RatioMasc_2023.xlsx
Successfully saved: data_miteco_excel\RentaMedia_2020.xlsx
Successfully saved: data_miteco_excel\VarPob2014_2023.xlsx


In [7]:
gdf = extract_data(shapefiles)

In [9]:
    folder = Path(output_folder)
    folder.mkdir(parents=True, exist_ok=True)
gdf.to_file("data_miteco_shape/data_miteco.shp", driver='ESRI Shapefile', encoding='utf-8')

DataLayerError: Failed to create file data_miteco_shape\data_miteco.shp: No such file or directory