# Trusted zone

In [1]:
import pandas as pd

import os, sys
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import utils

In [2]:

def pre_join(DB):
    """
    Adds a version (year) column to all `DB` tables in order to do the joining.
    Returns all the `DB` tables in data frame format.
    """
    tablesDB = utils.get_tables(DB)
    dfs = []
    for table in tablesDB:
        year = table[-4:]
        df = utils.DBtable_to_df(DB, table)
        df['Year'] = year
        dfs.append(df)
    return dfs

def trusted_zone(DB,table):
    """
    Joins all the tables found in the database `DB` into one called `table`.
    """
    dfs = pre_join(DB)
    df = pd.concat(dfs, axis=0, ignore_index=True)
    utils.df_to_DBtable(DB,df,table)

In [3]:
trusted_zone('../nationalities.duckdb', 'nationalities')
trusted_zone('../household.duckdb', 'household')

### Preview of trusted tables

#### nationalities 

In [4]:
# nationalities joined
utils.DBtable_to_df('../nationalities.duckdb','nationalities')

Unnamed: 0,Madrid_section,Españoles,Alemania,Austria,Bélgica,Bulgaria,Chipre,Croacia,Dinamarca,Eslovaquia,...,Vanuatu,Otros_Países_de_Oceanía,Year,República_Democrática_del_Cong,San_Vicente_y_Las_Granadinas,Papúa_Nueva_Guinea,Samoa,Bielorrusia,Surinam,Islas_Marshall
0,001401001,67.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2020,,,,,,,
1,002901001,2008.0,10.0,0.0,0.0,4.0,0.0,0.0,2.0,0.0,...,0.0,0.0,2020,,,,,,,
2,002901002,2065.0,2.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2020,,,,,,,
3,003501001,184.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2020,,,,,,,
4,004001001,2542.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2020,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13243,181601014,1847.0,4.0,0.0,2.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2018,0.0,0.0,0.0,,0.0,0.0,0.0
13244,181601015,2141.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2018,0.0,0.0,0.0,,0.0,0.0,0.0
13245,181601016,1144.0,14.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,2018,0.0,0.0,0.0,,0.0,0.0,0.0
13246,182101001,229.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2018,0.0,0.0,0.0,,0.0,0.0,0.0


#### household

In [5]:
utils.DBtable_to_df('../household.duckdb','household')

Unnamed: 0,section,single_women_aged_16_to_64,single_men_aged_16_to_64,single_women_aged_65_or_over,single_men_aged_65_or_over,adult_women_with_one_or_more_minors,adult_men_with_one_or_more_minors,two_adults_from_16_to_64_and_without_minors,two_adults_one_at_least_65_and_without_minors,two_adults_and_one_minor,...,two_adults_over_35_and_one_adult_from_16_to_34_and_two_minors,three_adults_and_0_or_more_minors,two_adults_over_35_and_two_adults_from_16_to_34,two_adults_over_35_and_two_adults_from_16_to_34_and_one_minor,two_adults_over_35_and_two_adults_from_16_to_34_and_two_or_more_minors,four_adults_and_0_or_more_minors,five_adults_and_0_or_more_minors,fifteen_or_more_inhabitants,only_minors,Year
0,1001.0,67.0,88.0,62.0,16.0,9.0,4.0,101.0,69.0,15.0,...,1.0,35.0,11.0,4.0,2.0,13.0,17.0,0.0,1.0,2019
1,1002.0,99.0,89.0,36.0,15.0,5.0,3.0,84.0,29.0,11.0,...,0.0,41.0,11.0,1.0,0.0,13.0,12.0,0.0,0.0,2019
2,1003.0,147.0,151.0,71.0,42.0,7.0,1.0,144.0,75.0,23.0,...,2.0,54.0,16.0,5.0,1.0,20.0,35.0,0.0,2.0,2019
3,1004.0,99.0,95.0,55.0,20.0,9.0,1.0,88.0,71.0,23.0,...,4.0,43.0,14.0,1.0,0.0,15.0,17.0,1.0,0.0,2019
4,1006.0,142.0,174.0,75.0,42.0,12.0,1.0,151.0,80.0,33.0,...,4.0,61.0,13.0,3.0,1.0,12.0,22.0,0.0,0.0,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7324,21029.0,32.0,29.0,6.0,5.0,20.0,12.0,45.0,19.0,53.0,...,10.0,29.0,49.0,7.0,2.0,8.0,12.0,0.0,0.0,2018
7325,21030.0,37.0,46.0,29.0,11.0,19.0,6.0,68.0,52.0,44.0,...,3.0,39.0,41.0,3.0,0.0,14.0,16.0,0.0,0.0,2018
7326,21031.0,88.0,91.0,15.0,6.0,39.0,9.0,156.0,31.0,153.0,...,6.0,28.0,21.0,6.0,2.0,10.0,16.0,0.0,0.0,2018
7327,21032.0,65.0,71.0,17.0,4.0,27.0,13.0,82.0,23.0,68.0,...,2.0,21.0,30.0,5.0,0.0,7.0,6.0,1.0,1.0,2018
