In [3]:
# Merge data of CSV files we are going to use
import pandas as pd
from os.path import exists
from os import makedirs

# Define data path
dataPath = '../data/birthPlaceRegion/'
mergedPath = '../data/merged_data/'
if not exists(mergedPath):
    makedirs(mergedPath)


# Define file paths for each year
def merge_csvs_explicit():
    years = range(1997, 2026)  # Only up to 2024 (existing data)
    
    dfs = []
    for year in years:
        file_path = f"{dataPath}{year}_birthPlaceRegion_sex.csv"
        try:
            df = pd.read_csv(file_path, delimiter=',')
            # Replace '..' with 0 in column 'Value' and coerce to numeric
            if 'Value' in df.columns:
                df['Value'] = df['Value'].replace('..', 0)
                df['Value'] = pd.to_numeric(df['Value'], errors='coerce').fillna(0)
            dfs.append(df)
            print(f"Successfully read {year}")
        except FileNotFoundError:
            print(f"Warning: File for year {year} not found")
    
    if dfs:
        df_all_years = pd.concat(dfs, ignore_index=True)
        print(f"Successfully merged {len(dfs)} files")
        return df_all_years
    else:
        print("No files were found to merge")
        return None


# Concatenate all DataFrames into a single DataFrame
df_combined = merge_csvs_explicit()  # or merge_csvs_explicit()
# Save the combined DataFrame to a new CSV file
df_combined.to_csv(f"{mergedPath}merged_birthPlaceRegion_sex.csv", index=False)
print("Merged data saved to 'merged_birthPlaceRegion_sex.csv'")



Successfully read 1997
Successfully read 1998
Successfully read 1999
Successfully read 2000
Successfully read 2001
Successfully read 2002
Successfully read 2003
Successfully read 2004
Successfully read 2005
Successfully read 2006
Successfully read 2007
Successfully read 2008
Successfully read 2009
Successfully read 2010
Successfully read 2011
Successfully read 2012
Successfully read 2013
Successfully read 2014
Successfully read 2015
Successfully read 2016
Successfully read 2017
Successfully read 2018
Successfully read 2019
Successfully read 2020
Successfully read 2021
Successfully read 2022
Successfully read 2023
Successfully read 2024
Successfully read 2025
Successfully merged 29 files
Merged data saved to 'merged_birthPlaceRegion_sex.csv'


In [4]:
# Merge data of CSV files Data_Lloc_naix_(esp_vs_fuera)
#Define data path
dataPath = '../data/birthPlace_spain_v_outside/'
mergedPath = '../data/merged_data/'
# Define file paths for each year
def merge_csvs_explicit_esp():
    years = range(1997, 2026)  # Only up to 2024 (existing data)
    
    dfs = []
    for year in years:
        file_path = f"{dataPath}{year}_birthPlace_spain_v_outside.csv"
        try:
            df = pd.read_csv(file_path, delimiter=',')
            # Replace '..' with 0 in column 'Value' and coerce to numeric
            if 'Value' in df.columns:
                df['Value'] = df['Value'].replace('..', 0)
                df['Value'] = pd.to_numeric(df['Value'], errors='coerce').fillna(0)
            dfs.append(df)
            print(f"Successfully read {year}")
        except FileNotFoundError:
            print(f"Warning: File for year {year} not found")
    
    if dfs:
        df_all_years = pd.concat(dfs, ignore_index=True)
        print(f"Successfully merged {len(dfs)} files")
        return df_all_years
    else:
        print("No files were found to merge")
        return None


# Concatenate all DataFrames into a single DataFrame
df_combined = merge_csvs_explicit_esp()  # or merge_csvs_explicit()
# Save the combined DataFrame to a new CSV file
df_combined.to_csv(f"{mergedPath}merged_birthPlace_spain_v_outside.csv", index=False)
print("Merged data saved to 'merged_birthPlace_spain_v_outside.csv'")


Successfully read 1997
Successfully read 1998
Successfully read 1999
Successfully read 2000
Successfully read 2001
Successfully read 2002
Successfully read 2003
Successfully read 2004
Successfully read 2005
Successfully read 2006
Successfully read 2007
Successfully read 2008
Successfully read 2009
Successfully read 2010
Successfully read 2011
Successfully read 2012
Successfully read 2013
Successfully read 2014
Successfully read 2015
Successfully read 2016
Successfully read 2017
Successfully read 2018
Successfully read 2019
Successfully read 2020
Successfully read 2021
Successfully read 2022
Successfully read 2023
Successfully read 2024
Successfully read 2025
Successfully merged 29 files
Merged data saved to 'merged_birthPlace_spain_v_outside.csv'
