In [None]:
#| default_exp hier

# hier

> Code for dealing with multiindexed columns

In [None]:
#| export
import pathlib
import itertools

import pandas as pd
import numpy as np
from lxml import etree
import yaml

# import sproc.xml
import sproc.structure

In order to avoid circular dependencies in the resulting *Python* modules, and since this is only for testing

In [None]:
import sproc.xml

Directory where the data (*XML* files) are stored

In [None]:
directory = pathlib.Path.cwd().parent / 'samples'
assert directory.exists()
directory

PosixPath('/home/manu/Sync/UC3M/proyectos/2022/nextProcurement/sproc/samples')

A (sample) file in that directory

In [None]:
xml_file = directory / 'PlataformasAgregadasSinMenores_20220104_030016_1.atom'
assert xml_file.exists()
xml_file

PosixPath('/home/manu/Sync/UC3M/proyectos/2022/nextProcurement/sproc/samples/PlataformasAgregadasSinMenores_20220104_030016_1.atom')

*Root* element of the *XML* tree

In [None]:
root = etree.parse(xml_file).getroot()

Entries are extracted

In [None]:
entries = sproc.xml.get_entries(root)
assert len(entries) == 117
entries[:4]

[<Element {http://www.w3.org/2005/Atom}entry>,
 <Element {http://www.w3.org/2005/Atom}entry>,
 <Element {http://www.w3.org/2005/Atom}entry>,
 <Element {http://www.w3.org/2005/Atom}entry>]

The first one is picked out

In [None]:
element = entries[0]
sproc.xml.split_namespace_tag(element.tag)

('http://www.w3.org/2005/Atom', 'entry')

It is turned into a `pd.Series`

In [None]:
element_series = sproc.xml.entry_to_series(element)
element_series[:8]

id                                                                           https://contrataciondelestado.es/sindicacion/P...
summary                                                                      Id licitación: C. 2-2021; Órgano de Contrataci...
title                                                                        L'objecte del contracte és la renovació de tot...
updated                                                                                          2022-01-03T01:11:41.826+01:00
ContractFolderStatus - ContractFolderID                                                                              C. 2-2021
ContractFolderStatus - ContractFolderStatusCode                                                                            ADJ
ContractFolderStatus - LocatedContractingParty - BuyerProfileURIID           https://contractaciopublica.gencat.cat/ecofin_...
ContractFolderStatus - LocatedContractingParty - Party - PartyName - Name                             Ajuntamen

## Series

We can easily iterate the series

In [None]:
next(iter(element_series.items()))

('id',
 'https://contrataciondelestado.es/sindicacion/PlataformasAgregadasSinMenores/8799346')

In [None]:
index_paths = []
values = []
for i, v in element_series.items():
    index_paths.append(tuple(i.split(sproc.structure.nested_tags_separator)))
    values.append(v)

In [None]:
index_paths[:6]

[('id',),
 ('summary',),
 ('title',),
 ('updated',),
 ('ContractFolderStatus', 'ContractFolderID'),
 ('ContractFolderStatus', 'ContractFolderStatusCode')]

In [None]:
values[:6]

['https://contrataciondelestado.es/sindicacion/PlataformasAgregadasSinMenores/8799346',
 'Id licitación: C. 2-2021; Órgano de Contratación: Ajuntament de Sant Ramon; Importe: 135553.26; Estado: ADJUDICADA',
 "L'objecte del contracte és la renovació de totes les llumeneres que formen la il·luminació existent de tots els carrers i vials del casc urbà de la localitat de Sant Ramon i dels nuclis agregats de La Manresana, Portell, Viver i Gospí",
 '2022-01-03T01:11:41.826+01:00',
 'C. 2-2021',
 'ADJ']

In [None]:
multiindex = pd.MultiIndex.from_tuples(index_paths)
multiindex[:6]

MultiIndex([(                  'id',                        nan, nan, ...),
            (             'summary',                        nan, nan, ...),
            (               'title',                        nan, nan, ...),
            (             'updated',                        nan, nan, ...),
            ('ContractFolderStatus',         'ContractFolderID', nan, ...),
            ('ContractFolderStatus', 'ContractFolderStatusCode', nan, ...)],
           )

In [None]:
multiindexed_series = pd.Series(values, index=multiindex)
multiindexed_series[:6]

id                    NaN                       NaN  NaN  NaN    https://contrataciondelestado.es/sindicacion/P...
summary               NaN                       NaN  NaN  NaN    Id licitación: C. 2-2021; Órgano de Contrataci...
title                 NaN                       NaN  NaN  NaN    L'objecte del contracte és la renovació de tot...
updated               NaN                       NaN  NaN  NaN                        2022-01-03T01:11:41.826+01:00
ContractFolderStatus  ContractFolderID          NaN  NaN  NaN                                            C. 2-2021
                      ContractFolderStatusCode  NaN  NaN  NaN                                                  ADJ
dtype: object

A function that turns the index of a `pd.Series` into a hierarchical (`pd.MultiIndex`) one.

In [None]:
#| export
def flat_series_to_multiindexed_series(
    s: pd.Series # Flat series
) -> pd.Series: # Multi-indexed series
    "Returns a multi-indexed version of the input"
    
    index_paths = []
    values = []
    
    for i, v in s.items():
        index_paths.append(tuple(i.split(sproc.structure.nested_tags_separator)))
        values.append(v)
        
    return pd.Series(values, index=pd.MultiIndex.from_tuples(index_paths))

In [None]:
flat_series_to_multiindexed_series(element_series)[:6]

id                    NaN                       NaN  NaN  NaN    https://contrataciondelestado.es/sindicacion/P...
summary               NaN                       NaN  NaN  NaN    Id licitación: C. 2-2021; Órgano de Contrataci...
title                 NaN                       NaN  NaN  NaN    L'objecte del contracte és la renovació de tot...
updated               NaN                       NaN  NaN  NaN                        2022-01-03T01:11:41.826+01:00
ContractFolderStatus  ContractFolderID          NaN  NaN  NaN                                            C. 2-2021
                      ContractFolderStatusCode  NaN  NaN  NaN                                                  ADJ
dtype: object

## DataFrame

We can concatenate together the `pd.Series` for the different *entries* into a `pd.DataFrame`

In [None]:
pd.concat([flat_series_to_multiindexed_series(sproc.xml.entry_to_series(e)) for e in entries[:4]], axis=1).T

Unnamed: 0_level_0,id,summary,title,updated,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus
Unnamed: 0_level_1,NaN,NaN,NaN,NaN,ContractFolderID,ContractFolderStatusCode,LocatedContractingParty,LocatedContractingParty,LocatedContractingParty,ProcurementProject,...,TenderResult,TenderResult,TenderingProcess,TenderingProcess,TenderingProcess,ValidNoticeInfo,ValidNoticeInfo,ValidNoticeInfo,LegalDocumentReference,LegalDocumentReference
Unnamed: 0_level_2,NaN,NaN,NaN,NaN,NaN,NaN,BuyerProfileURIID,Party,ParentLocatedParty,Name,...,WinningParty,AwardedTenderedProject,ProcedureCode,TenderSubmissionDeadlinePeriod,TenderSubmissionDeadlinePeriod,NoticeTypeCode,AdditionalPublicationStatus,AdditionalPublicationStatus,ID,Attachment
Unnamed: 0_level_3,NaN,NaN,NaN,NaN,NaN,NaN,NaN,PartyName,PartyName,NaN,...,PartyName,LegalMonetaryTotal,NaN,EndDate,EndTime,NaN,PublicationMediaName,AdditionalPublicationDocumentReference,NaN,ExternalReference
Unnamed: 0_level_4,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Name,Name,NaN,...,Name,TaxExclusiveAmount,NaN,NaN,NaN,NaN,NaN,IssueDate,NaN,URI
0,https://contrataciondelestado.es/sindicacion/P...,Id licitación: C. 2-2021; Órgano de Contrataci...,L'objecte del contracte és la renovació de tot...,2022-01-03T01:11:41.826+01:00,C. 2-2021,ADJ,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Sant Ramon,Entitats municipals de Catalunya,L'objecte del contracte és la renovació de tot...,...,"AERONAVAL DE CONSTRUCCIONES I INSTALACIONES , ...",90078.51,9,2021-12-17,14:00:00,"[[DOC_CN, DOC_CAN_ADJ]]","[[Perfil del contratante, Perfil del contratan...","[[2021-11-30, 2022-01-03]]",,
1,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 8128_3/2021; Órgano de Contrata...,Obras de restauración hidromorfológica del río...,2022-01-03T01:00:11.194+01:00,8128_3/2021,PUB,,Pleno del Ayuntamiento,AYUNTAMIENTO DE MONREAL,Obras de restauración hidromorfológica del río...,...,,,1,2022-01-22,23:30:00,DOC_CN,Perfil del contratante,2022-01-03,,
2,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 1000_0005-CP01-2021-000063; Órg...,Contrato del servicio de realización de labore...,2022-01-03T01:00:10.399+01:00,1000_0005-CP01-2021-000063,EV,,El Director General de Comunicación y Relacion...,"Departamento de Presidencia, Igualdad, Función...",Contrato del servicio de realización de labore...,...,,,1,,,DOC_CN,"[[DOUE, Perfil del contratante]]","[[2021-12-01, 2022-01-03]]",,
3,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 1379/2020 4738; Órgano de Contr...,Obres de renovació de l'enllumenat públic a la...,2022-01-03T00:11:40.740+01:00,1379/2020 4738,EV,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Canet de Mar,Entitats municipals de Catalunya,Obres de renovació de l'enllumenat públic a la...,...,,,9,2022-01-02,23:59:00,DOC_CN,Perfil del contratante,2021-12-13,Plec Clausules.pdf,https://contractaciopublica.gencat.cat/ecofin_...


However, there is a problem if the `pd.Series`'s to be concatenated have different number of levels

In [None]:
flat_series_to_multiindexed_series(sproc.xml.entry_to_series(entries[15])).index.nlevels

5

In [None]:
flat_series_to_multiindexed_series(sproc.xml.entry_to_series(entries[16])).index.nlevels

6

In [None]:
try:
    pd.concat([flat_series_to_multiindexed_series(sproc.xml.entry_to_series(e)) for e in entries[15:17]], axis=1).T
except ValueError:
    print('Oooooooooops')

Oooooooooops


The function below just converts the `pd.DataFrame` *column-wise*. Columns with different levels are handled as suggested in [this post](https://stackoverflow.com/a/68940748).

In [None]:
#| export
def flat_df_to_multiindexed_df(
    input_df: pd.DataFrame # Input dataframe
) -> pd.DataFrame: # A column-hierarchical version of the input dataframe
    "Reads and parses an XML file into a `DataFrame`"
    
    # every field becomes a `tuple`
    fields = [tuple(c.split(sproc.structure.nested_tags_separator)) for c in input_df.columns]
    
    # the number of levels in the multindex for the columns
    n_levels = len(max(fields, key=len))
    
    # every tuple is padded with empty string until it has `n_levels`
    fields = [e + ('',)*(n_levels-len(e)) for e in fields]

    index_hierarchical = pd.MultiIndex.from_tuples(fields)

    # an empty `pd.DataFrame`
    res = pd.DataFrame(None, columns=index_hierarchical)

    # every column in the *output* `pd.DataFrame`...
    for c in res.columns:

        # ...is filled in looking up the data in the input `pd.DataFrame` by means of the appropriate "merged" column name
        res[c] = input_df[sproc.structure.assemble_name(c)]
    
    return res

We first build the `pd.DataFrame`

In [None]:
df = sproc.xml.to_df(xml_file)
df.head(6)

Unnamed: 0,id,summary,title,updated,ContractFolderStatus - ContractFolderID,ContractFolderStatus - ContractFolderStatusCode,ContractFolderStatus - LocatedContractingParty - BuyerProfileURIID,ContractFolderStatus - LocatedContractingParty - Party - PartyName - Name,ContractFolderStatus - LocatedContractingParty - ParentLocatedParty - PartyName - Name,ContractFolderStatus - ProcurementProject - Name,...,ContractFolderStatus - LegalDocumentReference - Attachment - ExternalReference - URI,ContractFolderStatus - TechnicalDocumentReference - ID,ContractFolderStatus - TechnicalDocumentReference - Attachment - ExternalReference - URI,ContractFolderStatus - ProcurementProject - PlannedPeriod - StartDate,ContractFolderStatus - ProcurementProject - PlannedPeriod - EndDate,ContractFolderStatus - LocatedContractingParty - Party - PartyIdentification - ID,ContractFolderStatus - LocatedContractingParty - ParentLocatedParty - ParentLocatedParty - PartyName - Name,ContractFolderStatus - TenderingProcess - ParticipationRequestReceptionPeriod - EndDate,ContractFolderStatus - TenderingProcess - ParticipationRequestReceptionPeriod - EndTime,ContractFolderStatus - TenderResult - AwardedTenderedProject - ProcurementProjectLotID
0,https://contrataciondelestado.es/sindicacion/P...,Id licitación: C. 2-2021; Órgano de Contrataci...,L'objecte del contracte és la renovació de tot...,2022-01-03T01:11:41.826+01:00,C. 2-2021,ADJ,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Sant Ramon,Entitats municipals de Catalunya,L'objecte del contracte és la renovació de tot...,...,,,,,,,,,,
1,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 8128_3/2021; Órgano de Contrata...,Obras de restauración hidromorfológica del río...,2022-01-03T01:00:11.194+01:00,8128_3/2021,PUB,,Pleno del Ayuntamiento,AYUNTAMIENTO DE MONREAL,Obras de restauración hidromorfológica del río...,...,,,,,,,,,,
2,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 1000_0005-CP01-2021-000063; Órg...,Contrato del servicio de realización de labore...,2022-01-03T01:00:10.399+01:00,1000_0005-CP01-2021-000063,EV,,El Director General de Comunicación y Relacion...,"Departamento de Presidencia, Igualdad, Función...",Contrato del servicio de realización de labore...,...,,,,,,,,,,
3,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 1379/2020 4738; Órgano de Contr...,Obres de renovació de l'enllumenat públic a la...,2022-01-03T00:11:40.740+01:00,1379/2020 4738,EV,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Canet de Mar,Entitats municipals de Catalunya,Obres de renovació de l'enllumenat públic a la...,...,https://contractaciopublica.gencat.cat/ecofin_...,,,,,,,,,
4,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 2021-44; Órgano de Contratación...,Subministre i la instal·lació fotovoltaica en ...,2022-01-03T00:11:40.696+01:00,2021-44,EV,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Valls,Entitats municipals de Catalunya,Subministre i la instal·lació fotovoltaica en ...,...,https://contractaciopublica.gencat.cat/ecofin_...,Enllac plec clausules tecniques.doc,https://contractaciopublica.gencat.cat/ecofin_...,,,,,,,
5,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 2809/2021; Órgano de Contrataci...,Servei de visites i activitats sobre Història ...,2022-01-03T00:11:40.639+01:00,2809/2021,EV,https://contractaciopublica.gencat.cat/ecofin_...,Institut de Cultura de Barcelona,Entitats municipals de Catalunya,Servei de visites i activitats sobre Història ...,...,https://contractaciopublica.gencat.cat/ecofin_...,2809-21 PPT.pdf,https://contractaciopublica.gencat.cat/ecofin_...,,,,,,,


...and, afterwards, a *hierarchical* version thereof

In [None]:
hierarchical_df = flat_df_to_multiindexed_df(df)
hierarchical_df.head()

Unnamed: 0_level_0,id,summary,title,updated,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,ContractFolderID,ContractFolderStatusCode,LocatedContractingParty,LocatedContractingParty,LocatedContractingParty,ProcurementProject,...,LegalDocumentReference,TechnicalDocumentReference,TechnicalDocumentReference,ProcurementProject,ProcurementProject,LocatedContractingParty,LocatedContractingParty,TenderingProcess,TenderingProcess,TenderResult
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,BuyerProfileURIID,Party,ParentLocatedParty,Name,...,Attachment,ID,Attachment,PlannedPeriod,PlannedPeriod,Party,ParentLocatedParty,ParticipationRequestReceptionPeriod,ParticipationRequestReceptionPeriod,AwardedTenderedProject
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,PartyName,PartyName,Unnamed: 10_level_3,...,ExternalReference,Unnamed: 13_level_3,ExternalReference,StartDate,EndDate,PartyIdentification,ParentLocatedParty,EndDate,EndTime,ProcurementProjectLotID
Unnamed: 0_level_4,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Name,Name,Unnamed: 10_level_4,...,URI,Unnamed: 13_level_4,URI,Unnamed: 15_level_4,Unnamed: 16_level_4,ID,PartyName,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
Unnamed: 0_level_5,Unnamed: 1_level_5,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,...,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5,Unnamed: 17_level_5,Name,Unnamed: 19_level_5,Unnamed: 20_level_5,Unnamed: 21_level_5
0,https://contrataciondelestado.es/sindicacion/P...,Id licitación: C. 2-2021; Órgano de Contrataci...,L'objecte del contracte és la renovació de tot...,2022-01-03T01:11:41.826+01:00,C. 2-2021,ADJ,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Sant Ramon,Entitats municipals de Catalunya,L'objecte del contracte és la renovació de tot...,...,,,,,,,,,,
1,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 8128_3/2021; Órgano de Contrata...,Obras de restauración hidromorfológica del río...,2022-01-03T01:00:11.194+01:00,8128_3/2021,PUB,,Pleno del Ayuntamiento,AYUNTAMIENTO DE MONREAL,Obras de restauración hidromorfológica del río...,...,,,,,,,,,,
2,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 1000_0005-CP01-2021-000063; Órg...,Contrato del servicio de realización de labore...,2022-01-03T01:00:10.399+01:00,1000_0005-CP01-2021-000063,EV,,El Director General de Comunicación y Relacion...,"Departamento de Presidencia, Igualdad, Función...",Contrato del servicio de realización de labore...,...,,,,,,,,,,
3,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 1379/2020 4738; Órgano de Contr...,Obres de renovació de l'enllumenat públic a la...,2022-01-03T00:11:40.740+01:00,1379/2020 4738,EV,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Canet de Mar,Entitats municipals de Catalunya,Obres de renovació de l'enllumenat públic a la...,...,https://contractaciopublica.gencat.cat/ecofin_...,,,,,,,,,
4,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 2021-44; Órgano de Contratación...,Subministre i la instal·lació fotovoltaica en ...,2022-01-03T00:11:40.696+01:00,2021-44,EV,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Valls,Entitats municipals de Catalunya,Subministre i la instal·lació fotovoltaica en ...,...,https://contractaciopublica.gencat.cat/ecofin_...,Enllac plec clausules tecniques.doc,https://contractaciopublica.gencat.cat/ecofin_...,,,,,,,


*Cross-sections* of the above `pd.DataFrame` can be extracted

In [None]:
hierarchical_df.xs('id', axis='columns')

0      https://contrataciondelestado.es/sindicacion/P...
1      https://contrataciondelestado.es/sindicacion/P...
2      https://contrataciondelestado.es/sindicacion/P...
3      https://contrataciondelestado.es/sindicacion/P...
4      https://contrataciondelestado.es/sindicacion/P...
                             ...                        
112    https://contrataciondelestado.es/sindicacion/P...
113    https://contrataciondelestado.es/sindicacion/P...
114    https://contrataciondelestado.es/sindicacion/P...
115    https://contrataciondelestado.es/sindicacion/P...
116    https://contrataciondelestado.es/sindicacion/P...
Name: id, Length: 117, dtype: object

At deeper levels (notice in the above table *ContractFolderID* is nested inside *ContractFolderStatus*)

In [None]:
hierarchical_df.xs('ContractFolderID', axis=1, level=1)

Unnamed: 0,ContractFolderStatus
,
,
,
,
0,C. 2-2021
1,8128_3/2021
2,1000_0005-CP01-2021-000063
3,1379/2020 4738
4,2021-44
...,...


The latter encompasses most of the data

In [None]:
hierarchical_df.xs('ContractFolderStatus', axis=1)

Unnamed: 0_level_0,ContractFolderID,ContractFolderStatusCode,LocatedContractingParty,LocatedContractingParty,LocatedContractingParty,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,...,LegalDocumentReference,TechnicalDocumentReference,TechnicalDocumentReference,ProcurementProject,ProcurementProject,LocatedContractingParty,LocatedContractingParty,TenderingProcess,TenderingProcess,TenderResult
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,BuyerProfileURIID,Party,ParentLocatedParty,Name,TypeCode,BudgetAmount,BudgetAmount,RequiredCommodityClassification,...,Attachment,ID,Attachment,PlannedPeriod,PlannedPeriod,Party,ParentLocatedParty,ParticipationRequestReceptionPeriod,ParticipationRequestReceptionPeriod,AwardedTenderedProject
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,PartyName,PartyName,Unnamed: 6_level_2,Unnamed: 7_level_2,EstimatedOverallContractAmount,TaxExclusiveAmount,ItemClassificationCode,...,ExternalReference,Unnamed: 13_level_2,ExternalReference,StartDate,EndDate,PartyIdentification,ParentLocatedParty,EndDate,EndTime,ProcurementProjectLotID
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Name,Name,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,...,URI,Unnamed: 13_level_3,URI,Unnamed: 15_level_3,Unnamed: 16_level_3,ID,PartyName,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
Unnamed: 0_level_4,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,...,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Name,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
0,C. 2-2021,ADJ,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Sant Ramon,Entitats municipals de Catalunya,L'objecte del contracte és la renovació de tot...,1,135553.26,135553.26,34928530,...,,,,,,,,,,
1,8128_3/2021,PUB,,Pleno del Ayuntamiento,AYUNTAMIENTO DE MONREAL,Obras de restauración hidromorfológica del río...,3,81022.72,81022.72,45000000,...,,,,,,,,,,
2,1000_0005-CP01-2021-000063,EV,,El Director General de Comunicación y Relacion...,"Departamento de Presidencia, Igualdad, Función...",Contrato del servicio de realización de labore...,2,239797.5,47959.5,92400000,...,,,,,,,,,,
3,1379/2020 4738,EV,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Canet de Mar,Entitats municipals de Catalunya,Obres de renovació de l'enllumenat públic a la...,3,214781.46,178984.55,45316100,...,https://contractaciopublica.gencat.cat/ecofin_...,,,,,,,,,
4,2021-44,EV,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Valls,Entitats municipals de Catalunya,Subministre i la instal·lació fotovoltaica en ...,1,129247.06,107642.2,9331000,...,https://contractaciopublica.gencat.cat/ecofin_...,Enllac plec clausules tecniques.doc,https://contractaciopublica.gencat.cat/ecofin_...,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,1005_391-2021,PUB,,Dirección General de Cultura-Institución Prínc...,"Departamento de Cultura, Deporte y Juventud",Apoyo a la gestión del patrimonio filmográfico...,2,104025.9,34675.3,92511000,...,,,,,,,,,,
113,8165_3/2021,EV,,Mancomunidad de Servicios Sociales de Base de ...,MANCOMUNIDAD DE SERVICIOS DE HUARTE Y DE ESTER...,Asistencia técnica para la prestación del serv...,2,72000,14400,85000000,...,,,,,,,,,,
114,8113_3/2021,EV,,Subdirector de Gestión y Recursos,Agencia Navarra para la Dependencia,"Contrato de servicios de desinfección, desinse...",2,20305.6,4061.12,90920000,...,,,,2022-01-01,2022-12-31,,,,,
115,8113_01 2021,EV,,Agencia Navarra de Autonomía y Desarrollo de l...,Agencia Navarra para la Dependencia,Contrato del Servicio de Teleasistencia para l...,2,11855331.61,10777574.19,"[[85320000, 32500000]]",...,,,,,,,,,,


## Util

A function to produce a *full* index path from the upper levels. NOTE: it turns out that when loading  a `pd.DataFrame` saved in *parquet* format (using `to_parquet`), `np.nan` (a *float*) in a multiindexed column becomes `nan` (a *string*). This is accounted for in the code below.

In [None]:
#| export
def pad_col_levels(
    df: pd.DataFrame, # Input
    levels: tuple | list, # Individual names to assemble and pad
    denan: bool = False # If `True`, skip `pd.NA`s
    ) -> tuple: # Multiindex column name
    "Builds a multiindex-amenable column name from a sequence of levels."
    
    # if "de-NaN" was requested...
    if denan:
        levels = [e for e in levels if pd.notna(e)]
    
    return tuple(list(levels) + [''] * (df.columns.nlevels - len(levels)))

A `tuple` representing column multiindex is built

In [None]:
col_index = pad_col_levels(hierarchical_df, ['ContractFolderStatus', 'LocatedContractingParty', 'Party', 'PartyName', 'Name'] )
col_index

('ContractFolderStatus',
 'LocatedContractingParty',
 'Party',
 'PartyName',
 'Name',
 '')

*nan*'s can be omitted by passing the corresponding parameter

In [None]:
pad_col_levels(hierarchical_df, ['ContractFolderStatus', 'LocatedContractingParty', 'Party', 'PartyName', 'Name', np.nan, np.nan], denan=True)

('ContractFolderStatus',
 'LocatedContractingParty',
 'Party',
 'PartyName',
 'Name',
 '')

It is used to retrieve the corresponding column

In [None]:
hierarchical_df.loc[:, col_index]

0                               Ajuntament de Sant Ramon
1                                 Pleno del Ayuntamiento
2      El Director General de Comunicación y Relacion...
3                             Ajuntament de Canet de Mar
4                                    Ajuntament de Valls
                             ...                        
112    Dirección General de Cultura-Institución Prínc...
113    Mancomunidad de Servicios Sociales de Base de ...
114                    Subdirector de Gestión y Recursos
115    Agencia Navarra de Autonomía y Desarrollo de l...
116    Agencia Pública Empresarial Sanitaria Bajo Gua...
Name: (ContractFolderStatus, LocatedContractingParty, Party, PartyName, Name, ), Length: 117, dtype: object

A function returning the columns containing a given `substring` in their (multi-level) name

In [None]:
#| export
def columns_containing(df: pd.DataFrame, substring: str):
    
    is_contained = [list(filter(lambda e: (type(e) != float) and (substring in e), c)) != [] for c in df.columns]
    
    return df.columns[is_contained]

In [None]:
columns_containing(hierarchical_df, 'Name')

MultiIndex([('ContractFolderStatus', 'LocatedContractingParty', ...),
            ('ContractFolderStatus', 'LocatedContractingParty', ...),
            ('ContractFolderStatus',      'ProcurementProject', ...),
            ('ContractFolderStatus',            'TenderResult', ...),
            ('ContractFolderStatus',         'ValidNoticeInfo', ...),
            ('ContractFolderStatus', 'LocatedContractingParty', ...)],
           )

A simple function to determine whether the columns of a `pd.DataFrame` are multiindexed.

In [None]:
#| export
def is_column_multiindexed(
    df: pd.DataFrame # Input
    ) -> bool: # Assessment
    "Returns `True` if the given `pd.DataFrame` is column-multiindex."

    return type(df.columns) == pd.MultiIndex

In [None]:
is_column_multiindexed(hierarchical_df)

True

In [None]:
is_column_multiindexed(df)

False

### Flattening and renaming columns

The above `pd.DataFrame` with *multiindexed* columns

In [None]:
hier_df = flat_df_to_multiindexed_df(df)
hier_df.head(2)

Unnamed: 0_level_0,id,summary,title,updated,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,ContractFolderID,ContractFolderStatusCode,LocatedContractingParty,LocatedContractingParty,LocatedContractingParty,ProcurementProject,...,LegalDocumentReference,TechnicalDocumentReference,TechnicalDocumentReference,ProcurementProject,ProcurementProject,LocatedContractingParty,LocatedContractingParty,TenderingProcess,TenderingProcess,TenderResult
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,BuyerProfileURIID,Party,ParentLocatedParty,Name,...,Attachment,ID,Attachment,PlannedPeriod,PlannedPeriod,Party,ParentLocatedParty,ParticipationRequestReceptionPeriod,ParticipationRequestReceptionPeriod,AwardedTenderedProject
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,PartyName,PartyName,Unnamed: 10_level_3,...,ExternalReference,Unnamed: 13_level_3,ExternalReference,StartDate,EndDate,PartyIdentification,ParentLocatedParty,EndDate,EndTime,ProcurementProjectLotID
Unnamed: 0_level_4,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Name,Name,Unnamed: 10_level_4,...,URI,Unnamed: 13_level_4,URI,Unnamed: 15_level_4,Unnamed: 16_level_4,ID,PartyName,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
Unnamed: 0_level_5,Unnamed: 1_level_5,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,...,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5,Unnamed: 17_level_5,Name,Unnamed: 19_level_5,Unnamed: 20_level_5,Unnamed: 21_level_5
0,https://contrataciondelestado.es/sindicacion/P...,Id licitación: C. 2-2021; Órgano de Contrataci...,L'objecte del contracte és la renovació de tot...,2022-01-03T01:11:41.826+01:00,C. 2-2021,ADJ,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Sant Ramon,Entitats municipals de Catalunya,L'objecte del contracte és la renovació de tot...,...,,,,,,,,,,
1,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 8128_3/2021; Órgano de Contrata...,Obras de restauración hidromorfológica del río...,2022-01-03T01:00:11.194+01:00,8128_3/2021,PUB,,Pleno del Ayuntamiento,AYUNTAMIENTO DE MONREAL,Obras de restauración hidromorfológica del río...,...,,,,,,,,,,


A mapping between columns and *human-readable* fields can be found [here](https://docs.google.com/spreadsheets/d/1vRN_bLWyUp6Hs1ONUSP9m3cfrgjx5bfbD4wzW4gXCu0/edit#gid=0). The latter was processed in `naming.ipynb` to get the file below.

In [None]:
data_scheme_file = directory / 'PLACE.yaml'
assert data_scheme_file.exists()
data_scheme_file

PosixPath('/home/manu/Sync/UC3M/proyectos/2022/nextProcurement/sproc/samples/PLACE.yaml')

It provides (as a `dict`) a mapping from *human-readable* names to (maybe nested) fields in *Atom* files

In [None]:
with open(data_scheme_file) as yaml_data:
    data_scheme = yaml.load(yaml_data, Loader=yaml.FullLoader)
{k: data_scheme[k] for k in list(data_scheme)[:5]}

{'id': ['id', nan, nan, nan, nan, nan, nan],
 'summary': ['summary', nan, nan, nan, nan, nan, nan],
 'title': ['title', nan, nan, nan, nan, nan, nan],
 'updated': ['updated', nan, nan, nan, nan, nan, nan],
 'Número de Expediente': ['ContractFolderStatus',
  'ContractFolderID',
  nan,
  nan,
  nan,
  nan,
  nan]}

A convenience function, `n2c` (as in "name to column"), to go from *human-readable* name to actual *multiindex* column

In [None]:
n2c = lambda n: pad_col_levels(hier_df, data_scheme[n], denan=True)

The mapping can be exploited to access the columns of the `pd.DataFrame` using more natural names

In [None]:
hier_df[n2c('Número de Expediente')]

0                       C. 2-2021
1                     8128_3/2021
2      1000_0005-CP01-2021-000063
3                  1379/2020 4738
4                         2021-44
                  ...            
112                 1005_391-2021
113                   8165_3/2021
114                   8113_3/2021
115                  8113_01 2021
116                  0001264/2021
Name: (ContractFolderStatus, ContractFolderID, , , , ), Length: 117, dtype: object

A hierarchical column *multiindex* is assumed.

In [None]:
inv_data_scheme = {''.join([e if pd.notna(e) else '' for e in v]): k for k, v in data_scheme.items()}
[k for k in itertools.islice(inv_data_scheme, 4, 6)]

['ContractFolderStatusContractFolderID',
 'ContractFolderStatusContractFolderStatusCode']

In [None]:
('id', '', '', '', '', '') in inv_data_scheme

False

In [None]:
hier_df.columns.nlevels

6

In [None]:
''.join(hier_df.columns[0])

'id'

A tiny function to ensure some data scheme is valid

In [None]:
#| export
def _data_scheme_ok(data_scheme: dict) -> bool:
    
    lengths = []
    
    for l in data_scheme.values():
        
        lengths.append(len(l))
        
        # if not every element is a `str` or `nan`...
        if not np.all([(type(e) == str) or np.isnan(e) for e in l]):
            
            return False
        
    return True
    # return np.unique(lengths).shape[0] == 1 # <-------------------------- TODO: required?

In [None]:
_data_scheme_ok(data_scheme)

True

In [None]:
import copy

A data scheme with the wrong number of elements

In [None]:
invalid_data_scheme = copy.deepcopy(data_scheme)
invalid_data_scheme['id'].append(np.nan)
len(invalid_data_scheme['id']), len(invalid_data_scheme['summary'])

(8, 7)

In [None]:
_data_scheme_ok(inv_data_scheme)

True

A data scheme with the wrong type

In [None]:
invalid_data_scheme = copy.deepcopy(data_scheme)
invalid_data_scheme['id'].append(2)

In [None]:
_data_scheme_ok(inv_data_scheme)

True

A function to rename every *multi-indexed* column using the provided mapping (`data_scheme`).

In [None]:
#| export
def flatten_columns_names(
    df: pd.DataFrame, # Input
    data_scheme: dict # Every key is a flattened name, and every value a list with the different levels of the multi-index
    , inplace: bool = False # If `True` the input DataFrame is modified
    ) -> None | pd.DataFrame: # Flat DataFrame
    
    assert _data_scheme_ok(data_scheme), f'data scheme is not OK'
    
    # the inverse of the above mapping (turning nan's into empty strings, and concatenating all the levels together)
    inv_data_scheme = {''.join([e if pd.notna(e) else '' for e in v]): k for k, v in data_scheme.items()}
    
    new_names = []
    
    for c in df.columns:
        
        stitched_c = ''.join(c)
        
        # if the column is found in the inverse mapping...
        if stitched_c in inv_data_scheme:
            
            # ...the given name is used
            new_names.append(inv_data_scheme[stitched_c])
            
        # if the columns is NOT found in the inverse mapping...
        else:
            
            # ...the new name is obtained by contatenating the individual components
            new_names.append(sproc.structure.nested_tags_separator.join([e for e in c if e != '']))
    
    if inplace:
        
       res = df
    
    else:
        
        res = df.copy()
    
    res.columns = new_names
    
    return res

In [None]:
renamed_cols_df = flatten_columns_names(hier_df, data_scheme)
renamed_cols_df.head(2)

Unnamed: 0,id,summary,title,updated,Número de Expediente,Estado,URL perfil de contratante,Nombre,Ubicación orgánica,Objeto del Contrato,...,Pliego de cláusulas administrativas (URI),Pliego de Prescripciones técnicas,Pliego de Prescripciones técnicas (URI),Plazo de Ejecución (Comienzo),Plazo de Ejecución (Fin),ID,ContractFolderStatus - LocatedContractingParty - ParentLocatedParty - ParentLocatedParty - PartyName - Name,Presentación de Solicitudes (Fecha),Presentación de Solicitudes (Hora),Lote
0,https://contrataciondelestado.es/sindicacion/P...,Id licitación: C. 2-2021; Órgano de Contrataci...,L'objecte del contracte és la renovació de tot...,2022-01-03T01:11:41.826+01:00,C. 2-2021,ADJ,https://contractaciopublica.gencat.cat/ecofin_...,Ajuntament de Sant Ramon,Entitats municipals de Catalunya,L'objecte del contracte és la renovació de tot...,...,,,,,,,,,,
1,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 8128_3/2021; Órgano de Contrata...,Obras de restauración hidromorfológica del río...,2022-01-03T01:00:11.194+01:00,8128_3/2021,PUB,,Pleno del Ayuntamiento,AYUNTAMIENTO DE MONREAL,Obras de restauración hidromorfológica del río...,...,,,,,,,,,,


One can now used the *human-readable* columns names, e.g.

In [None]:
renamed_cols_df['Número de Expediente']

0                       C. 2-2021
1                     8128_3/2021
2      1000_0005-CP01-2021-000063
3                  1379/2020 4738
4                         2021-44
                  ...            
112                 1005_391-2021
113                   8165_3/2021
114                   8113_3/2021
115                  8113_01 2021
116                  0001264/2021
Name: Número de Expediente, Length: 117, dtype: object

#### Insiders & Minors

The data scheme **for both**

In [None]:
data_scheme_file = directory.parent / 'naming' / 'insiders_minors.yaml'
print(data_scheme_file)
assert data_scheme_file.exists()

/home/manu/Sync/UC3M/proyectos/2022/nextProcurement/sproc/naming/insiders_minors.yaml


In [None]:
with open(data_scheme_file) as yaml_data:
    data_scheme = yaml.load(yaml_data, Loader=yaml.FullLoader)
{k: data_scheme[k] for k in list(data_scheme)[:5]}

{'id': ['id'],
 'summary': ['summary'],
 'title': ['title'],
 'updated': ['updated'],
 'deleted_on': ['deleted_on']}

##### Insiders

In [None]:
insiders_file = directory / 'insiders_sample.parquet'
print(insiders_file)
assert insiders_file.exists()

/home/manu/Sync/UC3M/proyectos/2022/nextProcurement/sproc/samples/insiders_sample.parquet


In [None]:
insiders_df = pd.read_parquet(insiders_file)
insiders_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,summary,title,updated,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,deleted_on
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,ContractFolderID,ContractFolderStatusCode,LocatedContractingParty,LocatedContractingParty,LocatedContractingParty,LocatedContractingParty,...,TenderingTerms,ProcurementProjectLot,ProcurementProjectLot,LocatedContractingParty,TendererStatus,TenderResult,TenderingTerms,TenderingTerms,LocatedContractingParty,Unnamed: 23_level_1
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,ContractingPartyTypeCode,Party,Party,Party,...,TendererQualificationRequest,TenderingTerms,TenderingTerms,ParentLocatedParty,ProcurementProjectLotID,WinningParty,ProcurementNationalLegislationCode,TendererQualificationRequest,ParentLocatedParty,Unnamed: 23_level_2
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,PartyIdentification,PartyName,PostalAddress,...,EmployeeQuantityDescription,TendererQualificationRequest,TendererQualificationRequest,ParentLocatedParty,Unnamed: 18_level_3,PartyLegalEntity,Unnamed: 20_level_3,OperatingYearsDescription,ParentLocatedParty,Unnamed: 23_level_3
Unnamed: 0_level_4,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,ID,Name,CityName,...,Unnamed: 14_level_4,SpecificTendererRequirement,SpecificTendererRequirement,ParentLocatedParty,Unnamed: 18_level_4,CompanyTypeCode,Unnamed: 20_level_4,Unnamed: 21_level_4,ParentLocatedParty,Unnamed: 23_level_4
Unnamed: 0_level_5,Unnamed: 1_level_5,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,...,Unnamed: 14_level_5,RequirementTypeCode,Description,ParentLocatedParty,Unnamed: 18_level_5,Unnamed: 19_level_5,Unnamed: 20_level_5,Unnamed: 21_level_5,ParentLocatedParty,Unnamed: 23_level_5
Unnamed: 0_level_6,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,...,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6,ParentLocatedParty,Unnamed: 18_level_6,Unnamed: 19_level_6,Unnamed: 20_level_6,Unnamed: 21_level_6,ParentLocatedParty,Unnamed: 23_level_6
Unnamed: 0_level_7,Unnamed: 1_level_7,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7,Unnamed: 12_level_7,...,Unnamed: 14_level_7,Unnamed: 15_level_7,Unnamed: 16_level_7,ParentLocatedParty,Unnamed: 18_level_7,Unnamed: 19_level_7,Unnamed: 20_level_7,Unnamed: 21_level_7,ParentLocatedParty,Unnamed: 23_level_7
Unnamed: 0_level_8,Unnamed: 1_level_8,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8,Unnamed: 12_level_8,...,Unnamed: 14_level_8,Unnamed: 15_level_8,Unnamed: 16_level_8,PartyIdentification,Unnamed: 18_level_8,Unnamed: 19_level_8,Unnamed: 20_level_8,Unnamed: 21_level_8,ParentLocatedParty,Unnamed: 23_level_8
Unnamed: 0_level_9,Unnamed: 1_level_9,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9,Unnamed: 6_level_9,Unnamed: 7_level_9,Unnamed: 8_level_9,Unnamed: 9_level_9,Unnamed: 10_level_9,Unnamed: 11_level_9,Unnamed: 12_level_9,...,Unnamed: 14_level_9,Unnamed: 15_level_9,Unnamed: 16_level_9,ID,Unnamed: 18_level_9,Unnamed: 19_level_9,Unnamed: 20_level_9,Unnamed: 21_level_9,PartyIdentification,Unnamed: 23_level_9
Unnamed: 0_level_10,Unnamed: 1_level_10,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,Unnamed: 10_level_10,Unnamed: 11_level_10,Unnamed: 12_level_10,...,Unnamed: 14_level_10,Unnamed: 15_level_10,Unnamed: 16_level_10,Unnamed: 17_level_10,Unnamed: 18_level_10,Unnamed: 19_level_10,Unnamed: 20_level_10,Unnamed: 21_level_10,ID,Unnamed: 23_level_10
Unnamed: 0_level_11,Unnamed: 1_level_11,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11,Unnamed: 6_level_11,Unnamed: 7_level_11,Unnamed: 8_level_11,Unnamed: 9_level_11,Unnamed: 10_level_11,Unnamed: 11_level_11,Unnamed: 12_level_11,...,Unnamed: 14_level_11,Unnamed: 15_level_11,Unnamed: 16_level_11,Unnamed: 17_level_11,Unnamed: 18_level_11,Unnamed: 19_level_11,Unnamed: 20_level_11,Unnamed: 21_level_11,Unnamed: 22_level_11,Unnamed: 23_level_11
zip,file name,entry,Unnamed: 3_level_12,Unnamed: 4_level_12,Unnamed: 5_level_12,Unnamed: 6_level_12,Unnamed: 7_level_12,Unnamed: 8_level_12,Unnamed: 9_level_12,Unnamed: 10_level_12,Unnamed: 11_level_12,Unnamed: 12_level_12,Unnamed: 13_level_12,Unnamed: 14_level_12,Unnamed: 15_level_12,Unnamed: 16_level_12,Unnamed: 17_level_12,Unnamed: 18_level_12,Unnamed: 19_level_12,Unnamed: 20_level_12,Unnamed: 21_level_12,Unnamed: 22_level_12,Unnamed: 23_level_12
licitacionesPerfilesContratanteCompleto3_2018.zip,licitacionesPerfilesContratanteCompleto3_20200522_234632_1.atom,494,https://contrataciondelestado.es/sindicacion/l...,Id licitación: LICT/99/024/2017/0063; Órgano d...,Contratación de la Obra de creación de un nuev...,2017-12-29 12:36:06.402000+00:00,LICT/99/024/2017/0063,RES,5.0,[G28207017],Director Gerente de FREMAP,Majadahonda (Madrid),...,,[nan],[nan],,[nan],[nan],,,,NaT
licitacionesPerfilesContratanteCompleto3_2018.zip,licitacionesPerfilesContratanteCompleto3_20200522_234632_1.atom,489,https://contrataciondelestado.es/sindicacion/l...,Id licitación: 357/17; Órgano de Contratación:...,Suministro de mobiliario Hospital de Sagunt,2017-12-29 12:44:12.934000+00:00,357/17,ADJ,2.0,[S4611001A],Departamento de Salud de Sagunto. Dirección Ec...,Sagunto,...,,[nan],[nan],,[nan],[nan],,,,NaT
licitacionesPerfilesContratanteCompleto3_2018.zip,licitacionesPerfilesContratanteCompleto3_20200522_234632_1.atom,488,https://contrataciondelestado.es/sindicacion/l...,Id licitación: 7.1.23/17; Órgano de Contrataci...,Red separativa de alcantarillado en el barrio ...,2017-12-29 12:45:04.741000+00:00,7.1.23/17,RES,2.0,[S3933002B],"Consejería de Universidades e Investigación, M...",Santander,...,,[nan],[nan],,[nan],[nan],,,,NaT
licitacionesPerfilesContratanteCompleto3_2018.zip,licitacionesPerfilesContratanteCompleto3_20200522_234632_1.atom,486,https://contrataciondelestado.es/sindicacion/l...,Id licitación: 5/2016; Órgano de Contratación:...,Prestación del servicio de vigilancia y limpie...,2017-12-29 12:45:20.094000+00:00,5/2016,RES,3.0,"[L01120271, P1202700I]",Teniente de Alcalde Delegado del Área de Contr...,Benicarló,...,,[nan],[nan],,[nan],[nan],,,,NaT
licitacionesPerfilesContratanteCompleto3_2018.zip,licitacionesPerfilesContratanteCompleto3_20200522_234632_1.atom,481,https://contrataciondelestado.es/sindicacion/l...,Id licitación: 1004217003400; Órgano de Contra...,Servicios de mantenimiento y desarrollo de los...,2017-12-29 12:48:25.547000+00:00,1004217003400,RES,1.0,"[E00116302, S2800643E]",Jefatura de la Sección Económico Financiera de...,Madrid,...,,[nan],[nan],,[nan],[nan],,,,NaT


In [None]:
flatten_columns_names(insiders_df, data_scheme)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,summary,title,updated,Número de Expediente (Datos Generales del Expediente),Estado (Datos Generales del Expediente),Tipo de Administración (Entidad Adjudicadora),ID (Entidad Adjudicadora),Nombre (Entidad Adjudicadora),Población (Entidad Adjudicadora),...,Empleados (descripción) (Requisitos de Participación),Condiciones de admisión (código) (Requisitos de Participación del Lote),Condiciones de admisión (Requisitos de Participación del Lote),Ubicación orgánica ID (6) (Entidad Adjudicadora Jerarquía),ContractFolderStatus_TendererStatus_ProcurementProjectLotID (Unmatched),El adjudicatario es una UTE (Adjudicatario),ContractFolderStatus_TenderingTerms_ProcurementNationalLegislationCode (Unmatched),Experiencia (descripción) (Requisitos de Participación),Ubicación orgánica ID (7) (Entidad Adjudicadora Jerarquía),deleted_on
zip,file name,entry,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
licitacionesPerfilesContratanteCompleto3_2018.zip,licitacionesPerfilesContratanteCompleto3_20200522_234632_1.atom,494,https://contrataciondelestado.es/sindicacion/l...,Id licitación: LICT/99/024/2017/0063; Órgano d...,Contratación de la Obra de creación de un nuev...,2017-12-29 12:36:06.402000+00:00,LICT/99/024/2017/0063,RES,5.0,[G28207017],Director Gerente de FREMAP,Majadahonda (Madrid),...,,[nan],[nan],,[nan],[nan],,,,NaT
licitacionesPerfilesContratanteCompleto3_2018.zip,licitacionesPerfilesContratanteCompleto3_20200522_234632_1.atom,489,https://contrataciondelestado.es/sindicacion/l...,Id licitación: 357/17; Órgano de Contratación:...,Suministro de mobiliario Hospital de Sagunt,2017-12-29 12:44:12.934000+00:00,357/17,ADJ,2.0,[S4611001A],Departamento de Salud de Sagunto. Dirección Ec...,Sagunto,...,,[nan],[nan],,[nan],[nan],,,,NaT
licitacionesPerfilesContratanteCompleto3_2018.zip,licitacionesPerfilesContratanteCompleto3_20200522_234632_1.atom,488,https://contrataciondelestado.es/sindicacion/l...,Id licitación: 7.1.23/17; Órgano de Contrataci...,Red separativa de alcantarillado en el barrio ...,2017-12-29 12:45:04.741000+00:00,7.1.23/17,RES,2.0,[S3933002B],"Consejería de Universidades e Investigación, M...",Santander,...,,[nan],[nan],,[nan],[nan],,,,NaT
licitacionesPerfilesContratanteCompleto3_2018.zip,licitacionesPerfilesContratanteCompleto3_20200522_234632_1.atom,486,https://contrataciondelestado.es/sindicacion/l...,Id licitación: 5/2016; Órgano de Contratación:...,Prestación del servicio de vigilancia y limpie...,2017-12-29 12:45:20.094000+00:00,5/2016,RES,3.0,"[L01120271, P1202700I]",Teniente de Alcalde Delegado del Área de Contr...,Benicarló,...,,[nan],[nan],,[nan],[nan],,,,NaT
licitacionesPerfilesContratanteCompleto3_2018.zip,licitacionesPerfilesContratanteCompleto3_20200522_234632_1.atom,481,https://contrataciondelestado.es/sindicacion/l...,Id licitación: 1004217003400; Órgano de Contra...,Servicios de mantenimiento y desarrollo de los...,2017-12-29 12:48:25.547000+00:00,1004217003400,RES,1.0,"[E00116302, S2800643E]",Jefatura de la Sección Económico Financiera de...,Madrid,...,,[nan],[nan],,[nan],[nan],,,,NaT


##### Minors

In [None]:
minors_file = directory / 'minors_sample.parquet'
assert minors_file.exists()
print(minors_file)

/home/manu/Sync/UC3M/proyectos/2022/nextProcurement/sproc/samples/minors_sample.parquet


In [None]:
minors_df = pd.read_parquet(minors_file)
minors_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,summary,title,updated,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,deleted_on
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,ContractFolderID,ContractFolderStatusCode,LocatedContractingParty,LocatedContractingParty,ProcurementProject,ProcurementProject,...,TenderResult,TendererStatus,TenderResult,LocatedContractingParty,LocatedContractingParty,TenderingTerms,TenderingTerms,LocatedContractingParty,LocatedContractingParty,Unnamed: 23_level_1
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Party,Party,Name,TypeCode,...,WinningParty,ProcurementProjectLotID,WinningParty,ParentLocatedParty,ParentLocatedParty,FundingProgramCode,FundingProgram,ParentLocatedParty,ParentLocatedParty,Unnamed: 23_level_2
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,PartyIdentification,PartyName,Unnamed: 11_level_3,Unnamed: 12_level_3,...,PhysicalLocation,Unnamed: 15_level_3,PartyLegalEntity,ParentLocatedParty,ParentLocatedParty,Unnamed: 19_level_3,Unnamed: 20_level_3,ParentLocatedParty,ParentLocatedParty,Unnamed: 23_level_3
Unnamed: 0_level_4,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,ID,Name,Unnamed: 11_level_4,Unnamed: 12_level_4,...,Address,Unnamed: 15_level_4,CompanyTypeCode,ParentLocatedParty,ParentLocatedParty,Unnamed: 19_level_4,Unnamed: 20_level_4,ParentLocatedParty,ParentLocatedParty,Unnamed: 23_level_4
Unnamed: 0_level_5,Unnamed: 1_level_5,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,...,PostalZone,Unnamed: 15_level_5,Unnamed: 16_level_5,ParentLocatedParty,ParentLocatedParty,Unnamed: 19_level_5,Unnamed: 20_level_5,ParentLocatedParty,ParentLocatedParty,Unnamed: 23_level_5
Unnamed: 0_level_6,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,...,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6,ParentLocatedParty,ParentLocatedParty,Unnamed: 19_level_6,Unnamed: 20_level_6,ParentLocatedParty,ParentLocatedParty,Unnamed: 23_level_6
Unnamed: 0_level_7,Unnamed: 1_level_7,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7,Unnamed: 12_level_7,...,Unnamed: 14_level_7,Unnamed: 15_level_7,Unnamed: 16_level_7,PartyIdentification,ParentLocatedParty,Unnamed: 19_level_7,Unnamed: 20_level_7,ParentLocatedParty,ParentLocatedParty,Unnamed: 23_level_7
Unnamed: 0_level_8,Unnamed: 1_level_8,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8,Unnamed: 12_level_8,...,Unnamed: 14_level_8,Unnamed: 15_level_8,Unnamed: 16_level_8,ID,PartyIdentification,Unnamed: 19_level_8,Unnamed: 20_level_8,ParentLocatedParty,ParentLocatedParty,Unnamed: 23_level_8
Unnamed: 0_level_9,Unnamed: 1_level_9,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9,Unnamed: 6_level_9,Unnamed: 7_level_9,Unnamed: 8_level_9,Unnamed: 9_level_9,Unnamed: 10_level_9,Unnamed: 11_level_9,Unnamed: 12_level_9,...,Unnamed: 14_level_9,Unnamed: 15_level_9,Unnamed: 16_level_9,Unnamed: 17_level_9,ID,Unnamed: 19_level_9,Unnamed: 20_level_9,PartyIdentification,ParentLocatedParty,Unnamed: 23_level_9
Unnamed: 0_level_10,Unnamed: 1_level_10,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,Unnamed: 10_level_10,Unnamed: 11_level_10,Unnamed: 12_level_10,...,Unnamed: 14_level_10,Unnamed: 15_level_10,Unnamed: 16_level_10,Unnamed: 17_level_10,Unnamed: 18_level_10,Unnamed: 19_level_10,Unnamed: 20_level_10,ID,PartyName,Unnamed: 23_level_10
Unnamed: 0_level_11,Unnamed: 1_level_11,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11,Unnamed: 6_level_11,Unnamed: 7_level_11,Unnamed: 8_level_11,Unnamed: 9_level_11,Unnamed: 10_level_11,Unnamed: 11_level_11,Unnamed: 12_level_11,...,Unnamed: 14_level_11,Unnamed: 15_level_11,Unnamed: 16_level_11,Unnamed: 17_level_11,Unnamed: 18_level_11,Unnamed: 19_level_11,Unnamed: 20_level_11,Unnamed: 21_level_11,Name,Unnamed: 23_level_11
zip,file name,entry,Unnamed: 3_level_12,Unnamed: 4_level_12,Unnamed: 5_level_12,Unnamed: 6_level_12,Unnamed: 7_level_12,Unnamed: 8_level_12,Unnamed: 9_level_12,Unnamed: 10_level_12,Unnamed: 11_level_12,Unnamed: 12_level_12,Unnamed: 13_level_12,Unnamed: 14_level_12,Unnamed: 15_level_12,Unnamed: 16_level_12,Unnamed: 17_level_12,Unnamed: 18_level_12,Unnamed: 19_level_12,Unnamed: 20_level_12,Unnamed: 21_level_12,Unnamed: 22_level_12,Unnamed: 23_level_12
contratosMenoresPerfilesContratantes_2018.zip,contratosMenoresPerfilesContratantes_20190225_140722_12.atom,499,https://contrataciondelestado.es/sindicacion/d...,Id licitación: 000103/2017-1069; Órgano de Con...,Reforma de elementos de ventilación exterior d...,2018-01-02 07:41:12.989000+00:00,000103/2017-1069,RES,[L01300275],Junta de Gobierno del Ayuntamiento de Molina d...,Reforma de elementos de ventilación exterior d...,2.0,...,[nan],[nan],,,,[nan],,,,NaT
contratosMenoresPerfilesContratantes_2018.zip,contratosMenoresPerfilesContratantes_20190225_140722_12.atom,498,https://contrataciondelestado.es/sindicacion/d...,Id licitación: 29-2017-II; Órgano de Contratac...,Servicios de calibrado y certificado de dos de...,2018-01-02 07:48:40.056000+00:00,29-2017-II,RES,[E04803403],Presidencia de la Confederación Hidrográfica d...,Servicios de calibrado y certificado de dos de...,2.0,...,[nan],[nan],,,,[nan],,,,NaT
contratosMenoresPerfilesContratantes_2018.zip,contratosMenoresPerfilesContratantes_20190225_140722_12.atom,497,https://contrataciondelestado.es/sindicacion/d...,Id licitación: 013-07-2018; Órgano de Contrata...,"Patrocinio menor proyecto "" Activitats C.I.N.E""",2018-01-02 07:52:03.763000+00:00,013-07-2018,RES,[A04013514],Agencia de Turismo de las Illes Balears,"Patrocinio menor proyecto "" Activitats C.I.N.E""",8.0,...,[nan],[nan],,,,[nan],,,,NaT
contratosMenoresPerfilesContratantes_2018.zip,contratosMenoresPerfilesContratantes_20190225_140722_12.atom,496,https://contrataciondelestado.es/sindicacion/d...,Id licitación: CON/2017/51; Órgano de Contrata...,Redacción EPIA - Legalización antena emisora d...,2018-01-02 07:53:43.525000+00:00,CON/2017/51,RES,[L01330117],Alcaldía del Ayuntamiento de Cangas del Narcea,Redacción EPIA - Legalización antena emisora d...,2.0,...,[nan],[nan],,,,[nan],,,,NaT
contratosMenoresPerfilesContratantes_2018.zip,contratosMenoresPerfilesContratantes_20190225_140722_12.atom,495,https://contrataciondelestado.es/sindicacion/d...,Id licitación: 000047/2017-1069; Órgano de Con...,Obras de reparación del Centro de Información ...,2018-01-02 08:14:33.726000+00:00,000047/2017-1069,RES,[L01300275],Junta de Gobierno del Ayuntamiento de Molina d...,Obras de reparación del Centro de Información ...,3.0,...,[nan],[nan],,,,[nan],,,,NaT


In [None]:
flatten_columns_names(minors_df, data_scheme)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,summary,title,updated,Número de Expediente (Datos Generales del Expediente),Estado (Datos Generales del Expediente),ID (Entidad Adjudicadora),Nombre (Entidad Adjudicadora),Objeto del Contrato (Datos Generales del Expediente),Tipo de Contrato (Datos Generales del Expediente),...,Código Postal (Adjudicatario),ContractFolderStatus_TendererStatus_ProcurementProjectLotID (Unmatched),El adjudicatario es una UTE (Adjudicatario),Ubicación orgánica ID (5) (Entidad Adjudicadora Jerarquía),Ubicación orgánica ID (6) (Entidad Adjudicadora Jerarquía),Descripción de Programas de Financiación (Condiciones de Licitación),Programas de Financiación (Condiciones de Licitación),Ubicación orgánica ID (7) (Entidad Adjudicadora Jerarquía),Ubicación orgánica (8) (Entidad Adjudicadora Jerarquía),deleted_on
zip,file name,entry,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
contratosMenoresPerfilesContratantes_2018.zip,contratosMenoresPerfilesContratantes_20190225_140722_12.atom,499,https://contrataciondelestado.es/sindicacion/d...,Id licitación: 000103/2017-1069; Órgano de Con...,Reforma de elementos de ventilación exterior d...,2018-01-02 07:41:12.989000+00:00,000103/2017-1069,RES,[L01300275],Junta de Gobierno del Ayuntamiento de Molina d...,Reforma de elementos de ventilación exterior d...,2.0,...,[nan],[nan],,,,[nan],,,,NaT
contratosMenoresPerfilesContratantes_2018.zip,contratosMenoresPerfilesContratantes_20190225_140722_12.atom,498,https://contrataciondelestado.es/sindicacion/d...,Id licitación: 29-2017-II; Órgano de Contratac...,Servicios de calibrado y certificado de dos de...,2018-01-02 07:48:40.056000+00:00,29-2017-II,RES,[E04803403],Presidencia de la Confederación Hidrográfica d...,Servicios de calibrado y certificado de dos de...,2.0,...,[nan],[nan],,,,[nan],,,,NaT
contratosMenoresPerfilesContratantes_2018.zip,contratosMenoresPerfilesContratantes_20190225_140722_12.atom,497,https://contrataciondelestado.es/sindicacion/d...,Id licitación: 013-07-2018; Órgano de Contrata...,"Patrocinio menor proyecto "" Activitats C.I.N.E""",2018-01-02 07:52:03.763000+00:00,013-07-2018,RES,[A04013514],Agencia de Turismo de las Illes Balears,"Patrocinio menor proyecto "" Activitats C.I.N.E""",8.0,...,[nan],[nan],,,,[nan],,,,NaT
contratosMenoresPerfilesContratantes_2018.zip,contratosMenoresPerfilesContratantes_20190225_140722_12.atom,496,https://contrataciondelestado.es/sindicacion/d...,Id licitación: CON/2017/51; Órgano de Contrata...,Redacción EPIA - Legalización antena emisora d...,2018-01-02 07:53:43.525000+00:00,CON/2017/51,RES,[L01330117],Alcaldía del Ayuntamiento de Cangas del Narcea,Redacción EPIA - Legalización antena emisora d...,2.0,...,[nan],[nan],,,,[nan],,,,NaT
contratosMenoresPerfilesContratantes_2018.zip,contratosMenoresPerfilesContratantes_20190225_140722_12.atom,495,https://contrataciondelestado.es/sindicacion/d...,Id licitación: 000047/2017-1069; Órgano de Con...,Obras de reparación del Centro de Información ...,2018-01-02 08:14:33.726000+00:00,000047/2017-1069,RES,[L01300275],Junta de Gobierno del Ayuntamiento de Molina d...,Obras de reparación del Centro de Información ...,3.0,...,[nan],[nan],,,,[nan],,,,NaT


In [None]:
#| hide
from nbdev.doclinks import nbdev_export

In [None]:
#| hide
nbdev_export('30_hierarchical.ipynb')