In [None]:
import pathlib
import itertools

import pandas as pd
from IPython.display import display
import yaml

import dlsproc.bundle
import dlsproc.hier
import dlsproc.postprocess
import dlsproc.assemble

So that *all* the columns of a `pd.DataFrame` are shown:

In [None]:
pd.set_option('display.max_columns', None)

# Parameters

The directory containing downloaded data:

In [None]:
data_directory = pathlib.Path.cwd().parent / 'data'
assert data_directory.exists()
print(data_directory)

/home/manu/dlsproc/data


Directory in which the data is to be saved

In [None]:
# output_directory = pathlib.Path.cwd()
output_directory = data_directory / 'agregados'
assert output_directory.exists()
print(output_directory)

/home/manu/dlsproc/data/agregados


The name of the file **without** extension

In [None]:
output_file = output_directory / '2018-2021'
print(f'{output_file} (existing? {output_file.exists()})')

/home/manu/dlsproc/data/agregados/2018-2021 (existing? False)


# Processing

> Licitaciones publicadas en la Plataforma mediante mecanismos de agregación, excluyendo los contratos menores

The directory for *outsiders* data

In [None]:
outsiders_directory = data_directory / 'agregados'
# outsiders_directory = pathlib.Path.cwd().parent / 'samples' / 'yearly'
assert outsiders_directory.exists()
print(outsiders_directory)

/home/manu/dlsproc/data/agregados


We infer whether we are working on a sample or the full dataset (useful later on)

In [None]:
working_on_a_sample = outsiders_directory.parts[-2] == 'samples'
working_on_a_sample

False

The list of files to be processed. They were downloaded [here](https://www.hacienda.gob.es/es-ES/GobiernoAbierto/Datos%20Abiertos/Paginas/LicitacionesAgregacion.aspx). One could use something like
```
wget https://contrataciondelsectorpublico.gob.es/sindicacion/sindicacion_1044/PlataformasAgregadasSinMenores_2018.zip
```

In [None]:
zip_files = [
    'PlataformasAgregadasSinMenores_2018.zip',
    'PlataformasAgregadasSinMenores_2019.zip',
    'PlataformasAgregadasSinMenores_2020.zip',
    'PlataformasAgregadasSinMenores_2021.zip'
]

Accumulators for the data itself (contracts) and records of deleted entries

In [None]:
res_df = None
res_deleted_series = None

Every file is processed in a loop

In [None]:
%%time
for filename in zip_files:
    
    print(f'Processing "{filename}"')
    
    # above file names are relative to a directory
    f = outsiders_directory / filename
    
    assert f.exists()
    
    # data is read from the above *zip* file, and concatenated into a single `pd.DataFrame`...
    yearly_df = dlsproc.bundle.read_zip(f, concatenate=True)
    
    # ...and re-structured with multiindexed columns
    yearly_df = dlsproc.hier.flat_df_to_multiindexed_df(yearly_df)
    
    # # the same contract might show up more than once due to updates...but only the last one is kept
    # last_update_only_df = dlsproc.postprocess.keep_updates_only(yearly_df)
    
    # the same zip file also contains information (at the beginning) about deleted entries
    deleted_series = dlsproc.bundle.read_deleted_zip(f)
    
    # if this is NOT the first iteration...
    if res_df is not None:
    
        # ...the new data is stacked
        # res_df = dlsproc.assemble.stack(res_df, last_update_only_df)
        res_df = dlsproc.assemble.stack(res_df, yearly_df)
        res_deleted_series = pd.concat((res_deleted_series, deleted_series), axis=0)
        
    # ...if this is the first iteration
    else:
        
        # ...the new data is set as the accumulated result
        # res_df = last_update_only_df
        res_df = yearly_df
        res_deleted_series = deleted_series

Processing "PlataformasAgregadasSinMenores_2018.zip"
Processing "PlataformasAgregadasSinMenores_2019.zip"
Processing "PlataformasAgregadasSinMenores_2020.zip"
Processing "PlataformasAgregadasSinMenores_2021.zip"
CPU times: user 7min 40s, sys: 1.99 s, total: 7min 42s
Wall time: 7min 42s


In [None]:
res_df.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,id,summary,title,updated,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,ContractFolderID,ContractFolderStatusCode,LocatedContractingParty,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,TenderResult,TenderResult,TenderResult,TenderResult,TenderResult,TenderingProcess,TenderingProcess,TenderingProcess,LegalDocumentReference,LegalDocumentReference,ValidNoticeInfo,ValidNoticeInfo,ValidNoticeInfo,LocatedContractingParty,TenderingProcess,TenderingProcess,TechnicalDocumentReference,TechnicalDocumentReference,LocatedContractingParty,ProcurementProject,ProcurementProject,LocatedContractingParty,TenderResult,LocatedContractingParty,TenderingProcess,LocatedContractingParty,TenderingProcess,LocatedContractingParty
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Party,Name,TypeCode,BudgetAmount,BudgetAmount,RequiredCommodityClassification,RealizedLocation,PlannedPeriod,ResultCode,ReceivedTenderQuantity,WinningParty,WinningParty,AwardedTenderedProject,ProcedureCode,TenderSubmissionDeadlinePeriod,TenderSubmissionDeadlinePeriod,ID,Attachment,NoticeTypeCode,AdditionalPublicationStatus,AdditionalPublicationStatus,ParentLocatedParty,ParticipationRequestReceptionPeriod,ParticipationRequestReceptionPeriod,ID,Attachment,ParentLocatedParty,PlannedPeriod,PlannedPeriod,Party,AwardedTenderedProject,ParentLocatedParty,TenderSubmissionDeadlinePeriod,ParentLocatedParty,TenderSubmissionDeadlinePeriod,BuyerProfileURIID
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,PartyName,Unnamed: 9_level_3,Unnamed: 10_level_3,EstimatedOverallContractAmount,TaxExclusiveAmount,ItemClassificationCode,CountrySubentityCode,DurationMeasure,Unnamed: 16_level_3,Unnamed: 17_level_3,PartyIdentification,PartyName,LegalMonetaryTotal,Unnamed: 21_level_3,EndDate,EndTime,Unnamed: 24_level_3,ExternalReference,Unnamed: 26_level_3,PublicationMediaName,AdditionalPublicationDocumentReference,PartyName,EndDate,EndTime,Unnamed: 32_level_3,ExternalReference,ParentLocatedParty,StartDate,EndDate,PartyIdentification,ProcurementProjectLotID,ParentLocatedParty,Unnamed: 40_level_3,ParentLocatedParty,Description,Unnamed: 43_level_3
Unnamed: 0_level_4,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Name,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,ID,Name,TaxExclusiveAmount,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4,Unnamed: 24_level_4,URI,Unnamed: 26_level_4,Unnamed: 27_level_4,IssueDate,Name,Unnamed: 30_level_4,Unnamed: 31_level_4,Unnamed: 32_level_4,URI,PartyName,Unnamed: 35_level_4,Unnamed: 36_level_4,ID,Unnamed: 38_level_4,ParentLocatedParty,Unnamed: 40_level_4,ParentLocatedParty,Unnamed: 42_level_4,Unnamed: 43_level_4
Unnamed: 0_level_5,Unnamed: 1_level_5,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5,Unnamed: 17_level_5,Unnamed: 18_level_5,Unnamed: 19_level_5,Unnamed: 20_level_5,Unnamed: 21_level_5,Unnamed: 22_level_5,Unnamed: 23_level_5,Unnamed: 24_level_5,Unnamed: 25_level_5,Unnamed: 26_level_5,Unnamed: 27_level_5,Unnamed: 28_level_5,Unnamed: 29_level_5,Unnamed: 30_level_5,Unnamed: 31_level_5,Unnamed: 32_level_5,Unnamed: 33_level_5,Name,Unnamed: 35_level_5,Unnamed: 36_level_5,Unnamed: 37_level_5,Unnamed: 38_level_5,PartyName,Unnamed: 40_level_5,ParentLocatedParty,Unnamed: 42_level_5,Unnamed: 43_level_5
Unnamed: 0_level_6,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6,Unnamed: 17_level_6,Unnamed: 18_level_6,Unnamed: 19_level_6,Unnamed: 20_level_6,Unnamed: 21_level_6,Unnamed: 22_level_6,Unnamed: 23_level_6,Unnamed: 24_level_6,Unnamed: 25_level_6,Unnamed: 26_level_6,Unnamed: 27_level_6,Unnamed: 28_level_6,Unnamed: 29_level_6,Unnamed: 30_level_6,Unnamed: 31_level_6,Unnamed: 32_level_6,Unnamed: 33_level_6,Unnamed: 34_level_6,Unnamed: 35_level_6,Unnamed: 36_level_6,Unnamed: 37_level_6,Unnamed: 38_level_6,Name,Unnamed: 40_level_6,PartyName,Unnamed: 42_level_6,Unnamed: 43_level_6
Unnamed: 0_level_7,Unnamed: 1_level_7,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7,Unnamed: 12_level_7,Unnamed: 13_level_7,Unnamed: 14_level_7,Unnamed: 15_level_7,Unnamed: 16_level_7,Unnamed: 17_level_7,Unnamed: 18_level_7,Unnamed: 19_level_7,Unnamed: 20_level_7,Unnamed: 21_level_7,Unnamed: 22_level_7,Unnamed: 23_level_7,Unnamed: 24_level_7,Unnamed: 25_level_7,Unnamed: 26_level_7,Unnamed: 27_level_7,Unnamed: 28_level_7,Unnamed: 29_level_7,Unnamed: 30_level_7,Unnamed: 31_level_7,Unnamed: 32_level_7,Unnamed: 33_level_7,Unnamed: 34_level_7,Unnamed: 35_level_7,Unnamed: 36_level_7,Unnamed: 37_level_7,Unnamed: 38_level_7,Unnamed: 39_level_7,Unnamed: 40_level_7,Name,Unnamed: 42_level_7,Unnamed: 43_level_7
file name,entry,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8,Unnamed: 12_level_8,Unnamed: 13_level_8,Unnamed: 14_level_8,Unnamed: 15_level_8,Unnamed: 16_level_8,Unnamed: 17_level_8,Unnamed: 18_level_8,Unnamed: 19_level_8,Unnamed: 20_level_8,Unnamed: 21_level_8,Unnamed: 22_level_8,Unnamed: 23_level_8,Unnamed: 24_level_8,Unnamed: 25_level_8,Unnamed: 26_level_8,Unnamed: 27_level_8,Unnamed: 28_level_8,Unnamed: 29_level_8,Unnamed: 30_level_8,Unnamed: 31_level_8,Unnamed: 32_level_8,Unnamed: 33_level_8,Unnamed: 34_level_8,Unnamed: 35_level_8,Unnamed: 36_level_8,Unnamed: 37_level_8,Unnamed: 38_level_8,Unnamed: 39_level_8,Unnamed: 40_level_8,Unnamed: 41_level_8,Unnamed: 42_level_8,Unnamed: 43_level_8
PlataformasAgregadasSinMenores.atom,0,https://contrataciondelestado.es/sindicacion/P...,Id licitación: OBR-18-058; Órgano de Contratac...,Acondicionamiento de caminos en Rañadoiro (TINEO),2019-01-02 15:18:17.649000+00:00,OBR-18-058,ADJ,Consejería de Desarrollo Rural y Recursos Natu...,Acondicionamiento de cuatro caminos que sirven...,3.0,142080.76,142080.76,45222000,ES120,4.0,8.0,19.0,B33211475,"FRANCISCO SUAREZ CONTRATAS, S L",103278.0,9.0,2018-11-08,12:00:00,Resolucion_Aprobacion_PCAP_OBR-18-058.pdf,http://www.asturias.es/Proveedores/FICHEROS/ES...,"[DOC_CN, DOC_CAN_ADJ]",[Plataforma de Contratación del Sector Público...,"[2018-10-18, 2018-10-18, [2018-12-26, 2018-12-...",,,,,,,,,,,,2018-11-08 12:00:00+00:00,,,
PlataformasAgregadasSinMenores.atom,1,https://contrataciondelestado.es/sindicacion/P...,ID licitación: 1279185566111;Órgano de Contrat...,servicio de asistencia en materia de comunicac...,2019-01-02 15:11:35.118000+00:00,2018-058894,PUB,Junta de Gobierno de la Villa de Bilbao,servicio de asistencia en materia de comunicac...,2.0,41998.2,20999.1,79416200,ES213,12.0,,,,,,1.0,2019-01-17,13:00:00,2018-058894_PCAP.pdf,http://www.bilbao.eus/cs/Satellite?blobcol=url...,DOC_CN,Perfil Contratante,2019-01-02,Ayuntamiento de Bilbao,2019-01-17,13:00:00,2018-058894_PPT.pdf,http://www.bilbao.eus/cs/Satellite?blobcol=url...,,,,,,,2019-01-17 13:00:00+00:00,,,


How many ros and columns?

In [None]:
res_df.shape

(724706, 42)

However, some contracts show up more than once, and only the last update is to be kept

In [None]:
res_last_update_only_df = dlsproc.postprocess.keep_updates_only(res_df)
res_last_update_only_df.shape

(181904, 42)

The *deleted series* might contain duplicates. We build a new `pd.Series` dropping duplicates by keeping only the last one.

In [None]:
# deduplicated_deleted_series = res_deleted_series.sort_values().groupby(res_deleted_series.index).tail(1)
deduplicated_deleted_series = dlsproc.postprocess.deduplicate_deleted_series(res_deleted_series)
deduplicated_deleted_series

file name                                              id                                                                                 
PlataformasAgregadasSinMenores_20210121_030014_1.atom  https://contrataciondelestado.es/sindicacion/PlataformasAgregadasSinMenores/1954167   2021-01-19 23:11:19.614000+00:00
                                                       https://contrataciondelestado.es/sindicacion/PlataformasAgregadasSinMenores/1954168   2021-01-19 23:11:18.304000+00:00
                                                       https://contrataciondelestado.es/sindicacion/PlataformasAgregadasSinMenores/1954169   2021-01-19 23:11:19.821000+00:00
                                                       https://contrataciondelestado.es/sindicacion/PlataformasAgregadasSinMenores/1954170   2021-01-19 23:11:18.345000+00:00
                                                       https://contrataciondelestado.es/sindicacion/PlataformasAgregadasSinMenores/1954171   2021-01-19 23:11:18.5530

In [None]:
stateful_df = dlsproc.assemble.merge_deleted(res_last_update_only_df, deduplicated_deleted_series)
stateful_df

Unnamed: 0_level_0,Unnamed: 1_level_0,id,summary,title,updated,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,deleted_on
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,ContractFolderID,ContractFolderStatusCode,LocatedContractingParty,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,TenderResult,TenderResult,TenderResult,TenderResult,TenderResult,TenderingProcess,TenderingProcess,TenderingProcess,LegalDocumentReference,LegalDocumentReference,ValidNoticeInfo,ValidNoticeInfo,ValidNoticeInfo,LocatedContractingParty,TenderingProcess,TenderingProcess,TechnicalDocumentReference,TechnicalDocumentReference,LocatedContractingParty,ProcurementProject,ProcurementProject,LocatedContractingParty,TenderResult,LocatedContractingParty,TenderingProcess,LocatedContractingParty,TenderingProcess,LocatedContractingParty,Unnamed: 44_level_1
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Party,Name,TypeCode,BudgetAmount,BudgetAmount,RequiredCommodityClassification,RealizedLocation,PlannedPeriod,ResultCode,ReceivedTenderQuantity,WinningParty,WinningParty,AwardedTenderedProject,ProcedureCode,TenderSubmissionDeadlinePeriod,TenderSubmissionDeadlinePeriod,ID,Attachment,NoticeTypeCode,AdditionalPublicationStatus,AdditionalPublicationStatus,ParentLocatedParty,ParticipationRequestReceptionPeriod,ParticipationRequestReceptionPeriod,ID,Attachment,ParentLocatedParty,PlannedPeriod,PlannedPeriod,Party,AwardedTenderedProject,ParentLocatedParty,TenderSubmissionDeadlinePeriod,ParentLocatedParty,TenderSubmissionDeadlinePeriod,BuyerProfileURIID,Unnamed: 44_level_2
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,PartyName,Unnamed: 9_level_3,Unnamed: 10_level_3,EstimatedOverallContractAmount,TaxExclusiveAmount,ItemClassificationCode,CountrySubentityCode,DurationMeasure,Unnamed: 16_level_3,Unnamed: 17_level_3,PartyIdentification,PartyName,LegalMonetaryTotal,Unnamed: 21_level_3,EndDate,EndTime,Unnamed: 24_level_3,ExternalReference,Unnamed: 26_level_3,PublicationMediaName,AdditionalPublicationDocumentReference,PartyName,EndDate,EndTime,Unnamed: 32_level_3,ExternalReference,ParentLocatedParty,StartDate,EndDate,PartyIdentification,ProcurementProjectLotID,ParentLocatedParty,Unnamed: 40_level_3,ParentLocatedParty,Description,Unnamed: 43_level_3,Unnamed: 44_level_3
Unnamed: 0_level_4,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Name,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,ID,Name,TaxExclusiveAmount,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4,Unnamed: 24_level_4,URI,Unnamed: 26_level_4,Unnamed: 27_level_4,IssueDate,Name,Unnamed: 30_level_4,Unnamed: 31_level_4,Unnamed: 32_level_4,URI,PartyName,Unnamed: 35_level_4,Unnamed: 36_level_4,ID,Unnamed: 38_level_4,ParentLocatedParty,Unnamed: 40_level_4,ParentLocatedParty,Unnamed: 42_level_4,Unnamed: 43_level_4,Unnamed: 44_level_4
Unnamed: 0_level_5,Unnamed: 1_level_5,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5,Unnamed: 17_level_5,Unnamed: 18_level_5,Unnamed: 19_level_5,Unnamed: 20_level_5,Unnamed: 21_level_5,Unnamed: 22_level_5,Unnamed: 23_level_5,Unnamed: 24_level_5,Unnamed: 25_level_5,Unnamed: 26_level_5,Unnamed: 27_level_5,Unnamed: 28_level_5,Unnamed: 29_level_5,Unnamed: 30_level_5,Unnamed: 31_level_5,Unnamed: 32_level_5,Unnamed: 33_level_5,Name,Unnamed: 35_level_5,Unnamed: 36_level_5,Unnamed: 37_level_5,Unnamed: 38_level_5,PartyName,Unnamed: 40_level_5,ParentLocatedParty,Unnamed: 42_level_5,Unnamed: 43_level_5,Unnamed: 44_level_5
Unnamed: 0_level_6,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6,Unnamed: 17_level_6,Unnamed: 18_level_6,Unnamed: 19_level_6,Unnamed: 20_level_6,Unnamed: 21_level_6,Unnamed: 22_level_6,Unnamed: 23_level_6,Unnamed: 24_level_6,Unnamed: 25_level_6,Unnamed: 26_level_6,Unnamed: 27_level_6,Unnamed: 28_level_6,Unnamed: 29_level_6,Unnamed: 30_level_6,Unnamed: 31_level_6,Unnamed: 32_level_6,Unnamed: 33_level_6,Unnamed: 34_level_6,Unnamed: 35_level_6,Unnamed: 36_level_6,Unnamed: 37_level_6,Unnamed: 38_level_6,Name,Unnamed: 40_level_6,PartyName,Unnamed: 42_level_6,Unnamed: 43_level_6,Unnamed: 44_level_6
Unnamed: 0_level_7,Unnamed: 1_level_7,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7,Unnamed: 12_level_7,Unnamed: 13_level_7,Unnamed: 14_level_7,Unnamed: 15_level_7,Unnamed: 16_level_7,Unnamed: 17_level_7,Unnamed: 18_level_7,Unnamed: 19_level_7,Unnamed: 20_level_7,Unnamed: 21_level_7,Unnamed: 22_level_7,Unnamed: 23_level_7,Unnamed: 24_level_7,Unnamed: 25_level_7,Unnamed: 26_level_7,Unnamed: 27_level_7,Unnamed: 28_level_7,Unnamed: 29_level_7,Unnamed: 30_level_7,Unnamed: 31_level_7,Unnamed: 32_level_7,Unnamed: 33_level_7,Unnamed: 34_level_7,Unnamed: 35_level_7,Unnamed: 36_level_7,Unnamed: 37_level_7,Unnamed: 38_level_7,Unnamed: 39_level_7,Unnamed: 40_level_7,Name,Unnamed: 42_level_7,Unnamed: 43_level_7,Unnamed: 44_level_7
file name,entry,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8,Unnamed: 12_level_8,Unnamed: 13_level_8,Unnamed: 14_level_8,Unnamed: 15_level_8,Unnamed: 16_level_8,Unnamed: 17_level_8,Unnamed: 18_level_8,Unnamed: 19_level_8,Unnamed: 20_level_8,Unnamed: 21_level_8,Unnamed: 22_level_8,Unnamed: 23_level_8,Unnamed: 24_level_8,Unnamed: 25_level_8,Unnamed: 26_level_8,Unnamed: 27_level_8,Unnamed: 28_level_8,Unnamed: 29_level_8,Unnamed: 30_level_8,Unnamed: 31_level_8,Unnamed: 32_level_8,Unnamed: 33_level_8,Unnamed: 34_level_8,Unnamed: 35_level_8,Unnamed: 36_level_8,Unnamed: 37_level_8,Unnamed: 38_level_8,Unnamed: 39_level_8,Unnamed: 40_level_8,Unnamed: 41_level_8,Unnamed: 42_level_8,Unnamed: 43_level_8,Unnamed: 44_level_8
PlataformasAgregadasSinMenores_20180217_180137_1.atom,453,https://contrataciondelestado.es/sindicacion/P...,"Expediente: 1284/17, Entidad: Diputación Provi...",Refuerzo de Firme en la VP 3001 Renedo de Esgu...,2018-01-02 08:01:52.024000+00:00,1284/17,RES,Diputación Provincial de Valladolid,Refuerzo de Firme en la VP 3001 Renedo de Esgu...,3.0,89917.95,89917.95,45233142,ES418,3.0,8,14,A47082185,CONSTRUCCIONES HERMANOS SASTRE S.A.,60690.08,1.0,2017-11-02,23:59:00,,,DOC_FORM,Publicación del anuncio de formalización en un...,2018-01-02,,2017-11-02,23:59:00,,,,,,L02000047,1,,2017-11-02 23:59:00+00:00,,,,NaT
PlataformasAgregadasSinMenores_20180217_180137_1.atom,452,https://contrataciondelestado.es/sindicacion/P...,"Expediente: 1282/17, Entidad: Diputación Provi...",Refuerzo de Firme en la VP 6603 Mota del Marqu...,2018-01-02 08:02:24.833000+00:00,1282/17,RES,Diputación Provincial de Valladolid,Refuerzo de Firme en la VP 6603 Mota del Marqu...,3.0,175708.46,175708.46,45233142,ES418,3.0,8,13,A49012792,CONTRATAS Y OBRAS SAN GREGORIO S.A.,118919.49,1.0,2017-11-02,23:59:00,,,DOC_FORM,Publicación del anuncio de formalización en un...,2018-01-02,,2017-11-02,23:59:00,,,,,,L02000047,1,,2017-11-02 23:59:00+00:00,,,,NaT
PlataformasAgregadasSinMenores_20180217_180137_1.atom,451,https://contrataciondelestado.es/sindicacion/P...,"Expediente: 1281/17, Entidad: Diputación Provi...",Refuerzo de firme en la VP 4013 Melgar de Arri...,2018-01-02 08:02:51.744000+00:00,1281/17,RES,Diputación Provincial de Valladolid,Refuerzo de firme en la VP 4013 Melgar de Arri...,3.0,229259.52,229259.52,45233142,ES418,3.0,8,11,B49160567,EXFAMEX S.L.,178478.55,1.0,2017-11-02,23:59:00,,,DOC_FORM,Publicación del anuncio de formalización en un...,2018-01-02,,2017-11-02,23:59:00,,,,,,L02000047,1,,2017-11-02 23:59:00+00:00,,,,NaT
PlataformasAgregadasSinMenores_20180217_180137_1.atom,448,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 2017017; Órgano de Contratació...,Desarrollo del programa de intervención socioe...,2018-01-02 09:25:52.396000+00:00,2017017,RES,Alcalde del Ayuntamiento de Eibar,Desarrollo del programa de intervención socioe...,2.0,704145.00,361100.00,85310000,ES212,2.0,9,2,U75190488,"LAHAGIN UNION TEMPORAL DE EMPRESAS, LEY 18/1982",339794,1.0,2017-10-13,19:00:00,05 PLIEGO ADMINISTRATIVO.doc,https://apps.euskadi.eus/w32-1084/es/v79aWar/c...,"[DOC_CN, DOC_CAN_ADJ, DOC_FORM]","[BOLHIS, Perfil del contratante, Perfil del co...","[2017-09-27, 2017-11-27, 2018-01-02]",Ayuntamiento de Eibar,,,02 PLEGU TEKNIKOA.doc,https://apps.euskadi.eus/w32-1084/es/v79aWar/c...,Ayuntamiento de Eibar,2018-01-01,,,,,2017-10-13 19:00:00+00:00,,,,NaT
PlataformasAgregadasSinMenores_20180217_180137_1.atom,447,https://contrataciondelestado.es/sindicacion/P...,Id licitación: B2017002; Órgano de Contrataci...,STAND DE EUSKADI EN FITUR Y SUS POSIBLES ADAP...,2018-01-02 09:25:52.501000+00:00,B2017002,RES,Dirección general de BASQUETOUR,"Diseño, construcción en régimen de alquiler, t...",2.0,1150000.00,175000.00,39154100,ES300,,9,6,U95902052,"EL ESQUEMA DE LAS COSAS, S.L. Y ESCENOSET, S.L...",172500,1.0,2017-10-23,14:00:00,B2017002_Clausulas.pdf,https://apps.euskadi.eus/w32-1084/es/v79aWar/c...,"[DOC_CN, DOC_CAN_ADJ, DOC_FORM]","[DOUE, Perfil del contratante, Perfil del cont...","[2017-09-15, 2017-12-05, 2018-01-02]","BASQUETOUR Agencia Vasca de Turismo, S.A.",,,B2017002_BTecnicas.pdf,https://apps.euskadi.eus/w32-1084/es/v79aWar/c...,"BASQUETOUR, S.A.",2017-12-01,2018-12-31,,,,2017-10-23 14:00:00+00:00,,,,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PlataformasAgregadasSinMenores.atom,4,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 8044_30146-0-21; Órgano de Cont...,Renovación de la cubierta de edificio docente,2022-01-01 00:00:09.070000+00:00,8044_30146-0-21,EV,AYUNTAMIENTO DE BAKAIKU,Renovación de la cubierta de edificio docente,3.0,84988.62,84988.62,45200000,ES220,3.0,,,,,,1.0,,,,,DOC_CN,Perfil del contratante,"[2021-11-24, 2021-12-10, 2021-12-16, 2021-12-31]",AYUNTAMIENTO DE BAKAIKU,,,,,,,,,,,NaT,,,,NaT
PlataformasAgregadasSinMenores.atom,3,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 3069_1/2021; Órgano de Contrata...,Construcción de depósito regulador de 100m3 en...,2022-01-01 00:00:16.761000+00:00,3069_1/2021,EV,CONCEJO DE GALBARRA,Construcción de depósito regulador de 100m3 en...,3.0,119270.16,119270.16,"[45231300, 45213260, 45112000, 44611600]",ES220,3.0,,,,,,999.0,,,,,DOC_CN,"[DOUE, Perfil del contratante]","[2021-12-16, 2021-12-16, 2021-12-16, 2021-12-1...",Concejo de Galbarra,,,,,,,,,,,NaT,,,,NaT
PlataformasAgregadasSinMenores.atom,2,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 3091_21/2021; Órgano de Contrat...,Contratación de obra de línea subterránea en A...,2022-01-01 00:00:17.200000+00:00,3091_21/2021,EV,ALCALDESA-PRESIDENTA DEL AYUNTAMIENTO DE BUÑUEL,Contratación de obra de línea subterránea en A...,3.0,137467.23,137467.23,45231400,ES220,105.0,,,,,,1.0,,,,,DOC_CN,Perfil del contratante,"[2021-12-17, 2021-12-31]",Ayuntamiento de Buñuel,,,,,,,,,,,NaT,,,,NaT
PlataformasAgregadasSinMenores.atom,1,https://contrataciondelestado.es/sindicacion/P...,Id licitación: 3051_CONT_SERVICIOS/2021/40; Ór...,REDACCIÓN DEL PLAN ESTRATÉGICO DE ACCESIBILIDA...,2022-01-01 00:00:17.597000+00:00,3051_CONT_SERVICIOS/2021/40,PUB,Junta de Gobierno Local,Redacción Del Plan Estratégico De Accesibilida...,2.0,50000.00,45454.55,71241000,ES220,10.0,,,,,,1.0,2022-02-21,10:00:00,,,DOC_CN,Perfil del contratante,2022-01-01,Ayuntamiento de Pamplona,,,,,,,,,,,2022-02-21 10:00:00+00:00,,,,NaT


The number of filled-in rows for column `deleted_on` should match the number of `id`s in `deduplicated_deleted_series` that show up in `stateful_df`

In [None]:
assert stateful_df['deleted_on'].notna().sum() == len(set(stateful_df['id']) & set(deduplicated_deleted_series.index.get_level_values(1)))

Saving to *pickle*

In [None]:
if not working_on_a_sample:
    stateful_df.to_pickle(output_file.with_suffix('.pickle'))

Saving to *parquet*

In [None]:
parquet_df = dlsproc.assemble.parquet_amenable(stateful_df)
parquet_df.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,id,summary,title,updated,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,ContractFolderStatus,deleted_on
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,ContractFolderID,ContractFolderStatusCode,LocatedContractingParty,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,ProcurementProject,TenderResult,TenderResult,TenderResult,TenderResult,TenderResult,TenderingProcess,TenderingProcess,TenderingProcess,LegalDocumentReference,LegalDocumentReference,ValidNoticeInfo,ValidNoticeInfo,ValidNoticeInfo,LocatedContractingParty,TenderingProcess,TenderingProcess,TechnicalDocumentReference,TechnicalDocumentReference,LocatedContractingParty,ProcurementProject,ProcurementProject,LocatedContractingParty,TenderResult,LocatedContractingParty,TenderingProcess,LocatedContractingParty,TenderingProcess,LocatedContractingParty,Unnamed: 44_level_1
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Party,Name,TypeCode,BudgetAmount,BudgetAmount,RequiredCommodityClassification,RealizedLocation,PlannedPeriod,ResultCode,ReceivedTenderQuantity,WinningParty,WinningParty,AwardedTenderedProject,ProcedureCode,TenderSubmissionDeadlinePeriod,TenderSubmissionDeadlinePeriod,ID,Attachment,NoticeTypeCode,AdditionalPublicationStatus,AdditionalPublicationStatus,ParentLocatedParty,ParticipationRequestReceptionPeriod,ParticipationRequestReceptionPeriod,ID,Attachment,ParentLocatedParty,PlannedPeriod,PlannedPeriod,Party,AwardedTenderedProject,ParentLocatedParty,TenderSubmissionDeadlinePeriod,ParentLocatedParty,TenderSubmissionDeadlinePeriod,BuyerProfileURIID,Unnamed: 44_level_2
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,PartyName,Unnamed: 9_level_3,Unnamed: 10_level_3,EstimatedOverallContractAmount,TaxExclusiveAmount,ItemClassificationCode,CountrySubentityCode,DurationMeasure,Unnamed: 16_level_3,Unnamed: 17_level_3,PartyIdentification,PartyName,LegalMonetaryTotal,Unnamed: 21_level_3,EndDate,EndTime,Unnamed: 24_level_3,ExternalReference,Unnamed: 26_level_3,PublicationMediaName,AdditionalPublicationDocumentReference,PartyName,EndDate,EndTime,Unnamed: 32_level_3,ExternalReference,ParentLocatedParty,StartDate,EndDate,PartyIdentification,ProcurementProjectLotID,ParentLocatedParty,Unnamed: 40_level_3,ParentLocatedParty,Description,Unnamed: 43_level_3,Unnamed: 44_level_3
Unnamed: 0_level_4,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Name,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,ID,Name,TaxExclusiveAmount,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4,Unnamed: 24_level_4,URI,Unnamed: 26_level_4,Unnamed: 27_level_4,IssueDate,Name,Unnamed: 30_level_4,Unnamed: 31_level_4,Unnamed: 32_level_4,URI,PartyName,Unnamed: 35_level_4,Unnamed: 36_level_4,ID,Unnamed: 38_level_4,ParentLocatedParty,Unnamed: 40_level_4,ParentLocatedParty,Unnamed: 42_level_4,Unnamed: 43_level_4,Unnamed: 44_level_4
Unnamed: 0_level_5,Unnamed: 1_level_5,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5,Unnamed: 17_level_5,Unnamed: 18_level_5,Unnamed: 19_level_5,Unnamed: 20_level_5,Unnamed: 21_level_5,Unnamed: 22_level_5,Unnamed: 23_level_5,Unnamed: 24_level_5,Unnamed: 25_level_5,Unnamed: 26_level_5,Unnamed: 27_level_5,Unnamed: 28_level_5,Unnamed: 29_level_5,Unnamed: 30_level_5,Unnamed: 31_level_5,Unnamed: 32_level_5,Unnamed: 33_level_5,Name,Unnamed: 35_level_5,Unnamed: 36_level_5,Unnamed: 37_level_5,Unnamed: 38_level_5,PartyName,Unnamed: 40_level_5,ParentLocatedParty,Unnamed: 42_level_5,Unnamed: 43_level_5,Unnamed: 44_level_5
Unnamed: 0_level_6,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6,Unnamed: 17_level_6,Unnamed: 18_level_6,Unnamed: 19_level_6,Unnamed: 20_level_6,Unnamed: 21_level_6,Unnamed: 22_level_6,Unnamed: 23_level_6,Unnamed: 24_level_6,Unnamed: 25_level_6,Unnamed: 26_level_6,Unnamed: 27_level_6,Unnamed: 28_level_6,Unnamed: 29_level_6,Unnamed: 30_level_6,Unnamed: 31_level_6,Unnamed: 32_level_6,Unnamed: 33_level_6,Unnamed: 34_level_6,Unnamed: 35_level_6,Unnamed: 36_level_6,Unnamed: 37_level_6,Unnamed: 38_level_6,Name,Unnamed: 40_level_6,PartyName,Unnamed: 42_level_6,Unnamed: 43_level_6,Unnamed: 44_level_6
Unnamed: 0_level_7,Unnamed: 1_level_7,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7,Unnamed: 12_level_7,Unnamed: 13_level_7,Unnamed: 14_level_7,Unnamed: 15_level_7,Unnamed: 16_level_7,Unnamed: 17_level_7,Unnamed: 18_level_7,Unnamed: 19_level_7,Unnamed: 20_level_7,Unnamed: 21_level_7,Unnamed: 22_level_7,Unnamed: 23_level_7,Unnamed: 24_level_7,Unnamed: 25_level_7,Unnamed: 26_level_7,Unnamed: 27_level_7,Unnamed: 28_level_7,Unnamed: 29_level_7,Unnamed: 30_level_7,Unnamed: 31_level_7,Unnamed: 32_level_7,Unnamed: 33_level_7,Unnamed: 34_level_7,Unnamed: 35_level_7,Unnamed: 36_level_7,Unnamed: 37_level_7,Unnamed: 38_level_7,Unnamed: 39_level_7,Unnamed: 40_level_7,Name,Unnamed: 42_level_7,Unnamed: 43_level_7,Unnamed: 44_level_7
file name,entry,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8,Unnamed: 12_level_8,Unnamed: 13_level_8,Unnamed: 14_level_8,Unnamed: 15_level_8,Unnamed: 16_level_8,Unnamed: 17_level_8,Unnamed: 18_level_8,Unnamed: 19_level_8,Unnamed: 20_level_8,Unnamed: 21_level_8,Unnamed: 22_level_8,Unnamed: 23_level_8,Unnamed: 24_level_8,Unnamed: 25_level_8,Unnamed: 26_level_8,Unnamed: 27_level_8,Unnamed: 28_level_8,Unnamed: 29_level_8,Unnamed: 30_level_8,Unnamed: 31_level_8,Unnamed: 32_level_8,Unnamed: 33_level_8,Unnamed: 34_level_8,Unnamed: 35_level_8,Unnamed: 36_level_8,Unnamed: 37_level_8,Unnamed: 38_level_8,Unnamed: 39_level_8,Unnamed: 40_level_8,Unnamed: 41_level_8,Unnamed: 42_level_8,Unnamed: 43_level_8,Unnamed: 44_level_8
PlataformasAgregadasSinMenores_20180217_180137_1.atom,453,https://contrataciondelestado.es/sindicacion/P...,"Expediente: 1284/17, Entidad: Diputación Provi...",Refuerzo de Firme en la VP 3001 Renedo de Esgu...,2018-01-02 08:01:52.024000+00:00,1284/17,RES,Diputación Provincial de Valladolid,Refuerzo de Firme en la VP 3001 Renedo de Esgu...,3.0,89917.95,89917.95,[45233142.0],ES418,3.0,[8.0],[14.0],[A47082185],[CONSTRUCCIONES HERMANOS SASTRE S.A.],[60690.08],1.0,2017-11-02,23:59:00,,,[DOC_FORM],[Publicación del anuncio de formalización en u...,[2018-01-02],,2017-11-02,23:59:00,,,,,,L02000047,[1.0],,2017-11-02 23:59:00+00:00,,,,NaT
PlataformasAgregadasSinMenores_20180217_180137_1.atom,452,https://contrataciondelestado.es/sindicacion/P...,"Expediente: 1282/17, Entidad: Diputación Provi...",Refuerzo de Firme en la VP 6603 Mota del Marqu...,2018-01-02 08:02:24.833000+00:00,1282/17,RES,Diputación Provincial de Valladolid,Refuerzo de Firme en la VP 6603 Mota del Marqu...,3.0,175708.46,175708.46,[45233142.0],ES418,3.0,[8.0],[13.0],[A49012792],[CONTRATAS Y OBRAS SAN GREGORIO S.A.],[118919.49],1.0,2017-11-02,23:59:00,,,[DOC_FORM],[Publicación del anuncio de formalización en u...,[2018-01-02],,2017-11-02,23:59:00,,,,,,L02000047,[1.0],,2017-11-02 23:59:00+00:00,,,,NaT


In [None]:
if not working_on_a_sample:
    parquet_df.to_parquet(output_file.with_suffix('.parquet'))

## Making a sample

If we are working with sample files, a *parquet* sample file is created

In [None]:
if working_on_a_sample:
    sample_df  = parquet_df.iloc[itertools.chain(range(10), range(-10, 0))]
    sample_df.to_parquet(outsiders_directory.parent / '2018-2021_20samples.parquet')

In [None]:
# stateful_df = pd.read_pickle(output_file.with_suffix('.pickle'))
# stateful_df.shape