In [1]:
# 1. Settings and Preparations

# Importing libraries that will be used
import pandas as pd
import openpyxl
import os
from pandas.io.formats import excel

# Settings to avoid default header formatting when exporting the final file and to also prevent version non-compliance warnings.
excel.ExcelFormatter.header_style = None
pd.set_option('chained_assignment', None)

# Source path where the file is located in the system.
# Example: D:\Desafio_ISOps\Dados_consolidados.xlsx
PATH_TO_CONSOLIDATED_DATA_FILE = ''  

# Dataframes / Tables that will be used in the process
CONSOLIDATED_DATA = pd.read_excel(PATH_TO_CONSOLIDATED_DATA_FILE, sheet_name=['Engenharia', 'Marketing', 'Sales'])
aggregated_data = pd.DataFrame()

In [2]:
# 2. Aggregate the data into a single Dataframe/Table

'''
Aggregates data from all sheets into one Dataframe. -- A good example to understand how it works in practice,
                                                      could be a comparison to a merge/join using SQL
'''

PAGES_TO_MERGE = ['Engenharia', 'Marketing', 'Sales']

aggregated_data = pd.concat([CONSOLIDATED_DATA[page] for page in PAGES_TO_MERGE]
                           ).reset_index(drop=True)

In [3]:
# 3. Perform transformations on columns with date type values to standardize them in the ISO8601 format

COLUMNS_TO_FORMAT_AS_DATE = ['Data_Candidatura', 'Data_Movimentacao']

''' 
The function check_cell_data_type is used to verify if the data type of a given cell in a specific column
is in the datetime64[ns] format (the standard Excel format for Date type cells).

Possible outcomes:
True: The correct value to meet the ISO8601 formatting criterion is assigned to the cell.
False: A null value (NA) is assigned to the cell.
'''

def check_cell_data_type(cell):
    if pd.api.types.is_datetime64_any_dtype(cell):
        return pd.NA if pd.isna(cell) else cell.date()
    else:
        return cell

for column in aggregated_data.columns[0:]:
    aggregated_data[column] = aggregated_data[column].apply(check_cell_data_type)

In [4]:
# 4. Rewrite the final file, adding a sheet titled 'Todos', containing data from all other sheets.

with pd.ExcelWriter('Dados_Consolidados.xlsx', date_format='YYYY-MM-DD', engine='openpyxl', mode='a', if_sheet_exists = 'replace') as writer:
    aggregated_data.to_excel(writer, index=False, sheet_name='União')