## Modules to use

In [None]:
import os
import requests
import uuid

## Functions

In [None]:
# Template filling

def template_url(idx, sub_id,):
    """ Function to fill the URL for retrieving the PDF
    
    Parameters
    ----------
    idx : str
        ID, integer
    sub_id : str
        String, code
    
    Returns
    -------
    url : str
        String, URL to be downloaded
    """
    # Construct the URL, no check or variable type are performed
    url = "http://some.com/"
    url += "id={0:010d}".format(int(idx))
    url += "&kind={0}".format(sub_id)
    return url

In [None]:
# Retrieve target file

def get_file(url, 
             output_fnm=None, 
             output_dir=None):
    """ Function to connect with the URL and retrieve the
    PDF file. It will first check if the file the URL is 
    pointing is what we want, and will fail if that's not the case
    the file.
    Request to be a PDF
    
    Parameters
    ----------
    url : str
        url to be retrieved
    output_fnm : str
        filename for the output 
    output_dir : str
        directory for the output file
    
    Returns
    -------
    boolean
    """
    # Define request to the webpage
    req = requests.get(url, allow_redirects=True)
    # Get the filetype that is generated by the URL
    content_type = req.headers.get('content-type')
    # If the filetype is not PDF, fail 
    if "pdf" in content_type.lower():
        print("File is a PDF")
    else: 
        print("File is not a PDF. Error.")
        raise
    # Checks if output filename is None, in which case
    # genarates a random uuid
    if output_fnm is None:
        output_fnm = str(uuid.uuid4())
        output_fnm += ".pdf"
    # Checks if the output directory is None, in which
    # case saves the file in the current dworking directory.
    # It doesn't chack/crete inesxistent directories
    if output_dir is None:
        output_dir = os.getcwd()
    # Write out the file, b stands for binary
    out_tmp = os.path.join(output_dir, output_fnm)
    with open(out_tmp, "wb") as f:
        f.write(req.content)
        f.close()
    print("File {0} was successfully retrieved".format(out_tmp))
    return True

In [None]:
# Run the steps

def main(login_page,
         login_config, 
         id01,
         id02,
         url_generator, 
         file_retriever,
         fname=None, fdir=None):
    """ Main Function to run the different steps. Notice
    I'm giving dictionary, strings, ands functions as 
    arguments to the function.
    
    Parameters
    ----------
    login_page : str
        top URL where to validate credentials
    login_config : dict
        Dictionary containing the target form, user, and
        password to be filled up.
    id01 : str
        In this example, each set of data contains an unique
        identifier
    id02 : str
        Unique identifier, attached to id01
    url_generator : obj
        function to generate the url
    file_retriever : obj
        function to retrieve file
    fname : str
        filename for the output
    fdir : str
        directory for the output
    
    Returns
    -------
    boolean
    """
    # Generate target URL, calling the template URL generator
    # function
    url_target = url_generator(id01, id02) 
    # Generate an object that contains a persistent session
    # to the web login
    #
    # Important: maybe is not neccessary to run the Session()
    # each time, and instead see if the port is already open
    #
    s = requests.Session()
    s.post(login_page, login_config)
    # Retrieve the file, using the function
    get_file(url_target, output_fnm=fname, output_dir=fdir)
    # Print message befor finishing
    print("Main function completed")
    return True

## Main block

In [None]:
if __name__ == "__main__":
    # Define variables for the call
    login_web = "http://some.com/login_page"
    login_data = {"id": "form1", 
                  "usuario": "myself", 
                  "password": "sunnyDay",}
    id_level1 = "0875998"
    id_level2 = "repoFilesExample"
    output_pdf = "catalogue.pdf"

    # Call the main functions
    main(login_web, 
         login_data, 
         id_level1, id_level2, 
         template_url,
         get_file, 
         fname=output_pdf)