# Trusted zone
Joins tables per data source

In [1]:
# ! pip install duckdb --user --upgrade --quiet
# ! pip install pyarrow --user --upgrade --quiet

In [2]:
import os
import duckdb
import numpy as np
import pandas as pd

In [3]:
def df_to_DBtable(DB,df,table): # repeated
    """
    Creates a persistent table in DuckDB from the DataFrame content.
    """
    con = duckdb.connect(DB)
    con.register(table, df)
    con.execute(f'CREATE TABLE {table} AS SELECT * FROM {table}')
    con.close()

def DBtable_to_df(table):
    """
    Converts the DB `table` in a data frame format 
    """
    repo = ''.join(filter(str.isalpha,table))
    con = duckdb.connect(f'{repo}.duckdb')
    df = con.execute(f'SELECT * FROM {table}').df()
    con.close()
    return df

def get_tables(DB):
    """
    Gets all the tables from the `DB`
    """
    con = duckdb.connect(DB)
    tables = con.execute(f'SELECT table_name FROM information_schema.tables').df()['table_name']
    con.close()
    return tables

def pre_join(DB):
    """
    Adds a version (year) column to all `DB` tables in order to do the joining.
    Returns all the `DB` tables in data frame format.
    """
    tablesDB = get_tables(DB)
    dfs = []
    for table in tablesDB:
        year = ''.join(filter(str.isnumeric,table))
        df = DBtable_to_df(table)
        # display(df)
        df['Year'] = year
        dfs.append(df)
    return dfs

def trusted_zone(DB,table):
    """
    Joins all the `DB` tables into one and stores it in the `DB`.
    """
    dfs = pre_join(DB)
    df = pd.concat(dfs, axis=0, ignore_index=True)
    # display(df)
    df_to_DBtable(DB,df,table)

In [4]:
trusted_zone('nationalities.duckdb', 'nationalities')
trusted_zone('household.duckdb', 'household')

### Test

#### nationalities 

In [5]:
# nationalities joined
con = duckdb.connect('nationalities.duckdb')
view_nationalities = con.execute(f'SELECT * FROM nationalities').df()
display(view_nationalities)
con.close()

Unnamed: 0,Madrid_section,Habitantes,Españoles,Extranjeros,Alemania,Austria,Bélgica,Bulgaria,Chipre,Croacia,...,Papúa_Nueva_Guinea,Timor_Oriental,Vanuatu,Otros_Países_de_Oceanía,Year,Belarús,Mónaco,Santa_Sede,República_Democrática_del_Congo,Samoa
0,0014,65.0,64.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2018,,,,,
1,001401,65.0,64.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2018,,,,,
2,001401001,65.0,64.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2018,,,,,
3,0029,4559.0,3953.0,606.0,8.0,0.0,1.0,13.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2018,,,,,
4,002901,4559.0,3953.0,606.0,8.0,0.0,1.0,13.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2018,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14518,182101,253.0,226.0,27.0,0.0,0.0,1.0,5.0,0.0,0.0,...,0.0,0.0,,0.0,2019,0.0,0.0,0.0,,0.0
14519,182101001,253.0,226.0,27.0,0.0,0.0,1.0,5.0,0.0,0.0,...,0.0,0.0,,0.0,2019,0.0,0.0,0.0,,0.0
14520,1837,1658.0,1413.0,245.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,,0.0,2019,0.0,0.0,0.0,,0.0
14521,183701,1658.0,1413.0,245.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,,0.0,2019,0.0,0.0,0.0,,0.0


#### household

In [6]:
# households joined
con = duckdb.connect('household.duckdb')
view_household = con.execute(f'SELECT * FROM household').df()
display(view_household)
con.close()

Unnamed: 0,section,population,single_women_aged_16_to_64,single_men_aged_16_to_64,single_women_aged_65_or_over,single_men_aged_65_or_over,adult_women_with_one_or_more_minors,adult_men_with_one_or_more_minors,two_adults_from_16_to_64_and_without_minors,two_adults_one_at_least_65_and_without_minors,...,two_adults_over_35_and_one_adult_from_16_to_34_and_two_minors,three_adults_and_0_or_more_minors,two_adults_over_35_and_two_adults_from_16_to_34,two_adults_over_35_and_two_adults_from_16_to_34_and_one_minor,two_adults_over_35_and_two_adults_from_16_to_34_and_two_or_more_minors,four_adults_and_0_or_more_minors,five_adults_and_0_or_more_minors,fifteen_or_more_inhabitants,only_minors,Year
0,Ciudad de Madrid,1307682.0,117998.0,116863.0,126695.0,37538.0,26232.0,5328.0,163183.0,175581.0,...,9283.0,106779.0,51993.0,8131.0,3537.0,48740.0,59406.0,622.0,422.0,2020
1,01. Centro,69187.0,11695.0,13596.0,5057.0,2269.0,815.0,172.0,13563.0,5267.0,...,213.0,4719.0,1123.0,181.0,67.0,1898.0,2153.0,48.0,25.0,2020
2,1001.0,568.0,75.0,93.0,52.0,17.0,6.0,5.0,97.0,74.0,...,2.0,36.0,11.0,3.0,0.0,20.0,20.0,0.0,0.0,2020
3,1002.0,492.0,111.0,99.0,32.0,17.0,4.0,1.0,96.0,30.0,...,0.0,33.0,6.0,1.0,0.0,15.0,14.0,0.0,0.0,2020
4,1003.0,862.0,137.0,164.0,68.0,43.0,7.0,2.0,148.0,80.0,...,3.0,50.0,16.0,7.0,0.0,28.0,37.0,0.0,0.0,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7390,21029.0,526.0,35.0,33.0,7.0,5.0,21.0,9.0,36.0,24.0,...,11.0,25.0,51.0,8.0,2.0,9.0,16.0,0.0,0.0,2019
7391,21030.0,563.0,33.0,59.0,26.0,8.0,16.0,4.0,70.0,49.0,...,3.0,36.0,43.0,5.0,0.0,12.0,21.0,0.0,0.0,2019
7392,21031.0,948.0,99.0,94.0,14.0,7.0,41.0,8.0,155.0,35.0,...,6.0,29.0,25.0,6.0,0.0,17.0,23.0,0.0,0.0,2019
7393,21032.0,658.0,72.0,67.0,18.0,6.0,33.0,13.0,94.0,25.0,...,3.0,22.0,27.0,7.0,0.0,6.0,9.0,1.0,2.0,2019
