In [None]:
# default_exp core

# core

> Main functionality.

The main functionality is actually provided by the function below. `main` is just a *wrapper* that parses command-line arguments.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
# export
import sys
import argparse
import pathlib

import dlsproc.extend

## Functionality

In [None]:
# export
def process_atom(input_file: str | pathlib.Path):
    
    print(input_file)

A function to parse command-line arguments and pass them to `process_atom` above.

In [None]:
# export
def cli_process_atom() -> None:
    
    parser = argparse.ArgumentParser(description='Process atom file')

    parser.add_argument('input_file', type=argparse.FileType('r'), help='XML file')

    command_line_arguments = parser.parse_args()
    
    process_atom(command_line_arguments.input_file.name)

A function to extend an existing *parquet* file with new data in a *zip* file.

In [None]:
# export
def cli_extend_parquet_with_zip(args: list = None) -> None:
    
    parser = argparse.ArgumentParser(description='Extend existing parquet file with data from a given zip')

    parser.add_argument('history_file', type=argparse.FileType('r'), help='Parquet file')
    parser.add_argument('zip_file', type=argparse.FileType('r'), help='Zip file')
    parser.add_argument('output_file', help='Output file')

    command_line_arguments = parser.parse_args(args)
    
    history_file = pathlib.Path(command_line_arguments.history_file.name)
    zip_file = pathlib.Path(command_line_arguments.zip_file.name)
    
    output_file = pathlib.Path(command_line_arguments.output_file)
    assert not output_file.exists()
    
    dlsproc.extend.parquet_with_zip(history_file, zip_file, output_file)
    
    return command_line_arguments

In [None]:
# args = ['reports/2018-2021.parquet', 'data/agregados/PlataformasAgregadasSinMenores_202201.zip', 'extended.parquet']
# command_line_arguments = cli_extend_parquet_with_zip(args)

In [None]:
%ls

00_core.ipynb          CONTRIBUTING.md     [0m[01;32mmake_conda_environment.sh[0m*
05_structure.ipynb     [01;34mdata[0m/               Makefile
10_xml.ipynb           [01;34mdlsproc[0m/            MANIFEST.in
15_postprocess.ipynb   [01;34mdlsproc.egg-info[0m/   README.md
20_bundle.ipynb        docker-compose.yml  [01;34mreports[0m/
30_hierarchical.ipynb  [01;34mdocs[0m/               [01;34msamples[0m/
40_io.ipynb            [01;32mdownload.sh[0m*        [00;32msettings.ini[0m
50_parse.ipynb         extended2.parquet   setup.py
60_assemble.ipynb      extended.parquet    [01;34mwiki[0m/
70_extend.ipynb        index.ipynb
changelog              LICENSE


In [None]:
# args = ['extended.parquet', 'data/agregados/PlataformasAgregadasSinMenores_202202.zip', 'extended2.parquet']
# cli_extend_parquet_with_zip(args)

In [None]:
%ls

00_core.ipynb          CONTRIBUTING.md     [0m[01;32mmake_conda_environment.sh[0m*
05_structure.ipynb     [01;34mdata[0m/               Makefile
10_xml.ipynb           [01;34mdlsproc[0m/            MANIFEST.in
15_postprocess.ipynb   [01;34mdlsproc.egg-info[0m/   README.md
20_bundle.ipynb        docker-compose.yml  [01;34mreports[0m/
30_hierarchical.ipynb  [01;34mdocs[0m/               [01;34msamples[0m/
40_io.ipynb            [01;32mdownload.sh[0m*        [00;32msettings.ini[0m
50_parse.ipynb         extended2.parquet   setup.py
60_assemble.ipynb      extended.parquet    [01;34mwiki[0m/
70_extend.ipynb        index.ipynb
changelog              LICENSE


In [None]:
# hide
import nbdev.export
nbdev.export.notebook2script('00_core.ipynb')

Converted 00_core.ipynb.
