In [1]:
from os import listdir, makedirs
from os.path import exists
import pandas as pd


# Workspace and Mapping Info

In [2]:
# main workspace folders
data_folder = '../data_original/'
out_folder = '../data/'

# mapping file
codes_df = pd.read_csv(f'{data_folder}Codigo_valores/pad_dimensions.csv')
codes_df['Codi_Valor'] = codes_df['Codi_Valor'].astype(str)                                                     # to avoid int/str issues
mappings = {dim: g.set_index('Codi_Valor')['Desc_Valor_EN'] for dim, g in codes_df.groupby('Desc_Dimensio')}    # create a mapping dict of series


# 1. Data Lloc Naix Regio

In [3]:
def convert_birthPlaceRegion_sex(file, data_dir, out_dir, mappings):
    df = pd.read_csv(data_dir + file)

    # drop unnecessary columns
    df = df.drop(columns=['Codi_Districte', 'Codi_Barri'])

    # remap values
    for col in df.columns:
        if col in mappings:
            df[col] = df[col].astype(str).map(mappings[col])

    # translate column names
    df = df.rename(columns={'Data_Referencia': 'Year_Reference',
                            'Nom_Districte': 'District',
                            'Nom_Barri': 'Neighborhood',
                            'Valor': 'Value',
                            'LLOC_NAIX_REGIO': 'Birth_Place_Region',
                            'SEXE': 'Sex'})

    # remap and get year
    df['Year_Reference'] = pd.to_datetime(df['Year_Reference'], errors='coerce').dt.year
    year = df['Year_Reference'].iloc[0]

    # save to output
    df.to_csv(f'{out_dir}{year}_birthPlaceRegion_sex.csv', index=False)

    return


In [4]:
data_dir = f'{data_folder}Data_Lloc_naix_regio/'
files = listdir(data_dir)

out_dir = f'{out_folder}birthPlaceRegion/'
if not exists(out_dir):
    makedirs(out_dir)


for file in files:
    convert_birthPlaceRegion_sex(file, data_dir, out_dir, mappings)



# 2. Data Lloc Naix Regio (Spain v Outside)

## AEB?

In [5]:
def convert_sp(file, data_dir, out_dir, mappings):
    df = pd.read_csv(data_dir + file)

    # drop unnecessary columns
    df = df.drop(columns=['Codi_Districte', 'Codi_Barri'])

    # remap values
    for col in df.columns:
        if col in mappings:
            df[col] = df[col].astype(str).map(mappings[col])

    # translate column names
    df = df.rename(columns={'Data_Referencia': 'Year_Reference',
                            'Nom_Districte': 'District',
                            'Nom_Barri': 'Neighborhood',
                            'AEB': 'AEB',
                            'Seccio_Censal': 'Census_Section',
                            'Valor': 'Value',
                            'LLOC_NAIX': 'Birth_Place',
                            'SEXE': 'Sex'})

    # remap and get year
    df['Year_Reference'] = pd.to_datetime(df['Year_Reference'], errors='coerce').dt.year
    year = df['Year_Reference'].iloc[0]

    # save to output
    df.to_csv(f'{out_dir}{year}_birthPlace_spain_v_outside.csv', index=False)
    

    return



In [6]:
data_dir = f'{data_folder}Data_Lloc_naix(esp_vs_fuera)/'
files = listdir(data_dir)

out_dir = f'{out_folder}birthPlace_spain_v_outside/'
if not exists(out_dir):
    makedirs(out_dir)


for file in files:
    convert_sp(file, data_dir, out_dir, mappings)



# 3. Renda

In [7]:
def convert_rent(file, data_dir, out_dir, mappings):
    df = pd.read_csv(data_dir + file)

    # drop unnecessary columns
    df = df.drop(columns=['Codi_Districte', 'Codi_Barri'])

    # remap values
    for col in df.columns:
        if col in mappings:
            df[col] = df[col].astype(str).map(mappings[col])

    # translate column names
    df = df.rename(columns={'Any': 'Year_Reference',
                            'Nom_Districte': 'District',
                            'Nom_Barri': 'Neighborhood',
                            'Seccio_Censal': 'Census_Section'})

    # get year
    year = df['Year_Reference'].iloc[0]

    # save to output
    df.to_csv(f'{out_dir}{year}_rent.csv', index=False)
    

    return



In [8]:
data_dir = f'{data_folder}Renda/'
files = listdir(data_dir)

out_dir = f'{out_folder}rent/'
if not exists(out_dir):
    makedirs(out_dir)

for file in files:
    convert_rent(file, data_dir, out_dir, mappings)
