In [232]:
# coding=utf-8

import os, sys
import openpyxl

"""
DATA CONFIG
"""
INDEX = 0
NAME = 1
SIZE = 6
AMOUNT = 11
MATERIAL = 12
UNITS = 17
MATERIAL_AMOUNT = 19
STANDART = 21
COMMENT = 24

PAYLOAD_DATA_INDEXES = [
    INDEX, NAME, SIZE, 
    AMOUNT, MATERIAL, UNITS, 
    MATERIAL_AMOUNT, STANDART, 
    COMMENT
]

FISRT_LIST = 'Лист1'
FISRT_LIST_FIRST_DATA_ROW = 22
OTHER_LISTS_FIRST_DATA_ROW = 2

REPEAT_SYMBOL = '——ıı——'

"""
SCRIPT CONFIG
"""
DEFAULT_DIR_PATH = os.path.dirname(sys.argv[0])
DEFUALT_RESULT_FILE_NAME = 'output.xls'
DEFAULT_RESULT_FILE_PATH = \
    os.path.join(DEFAULT_DIR_PATH, DEFUALT_RESULT_FILE_NAME)

"""
LOGIC
"""    
def get_files(dir_path=DEFAULT_DIR_PATH):
    """
    Returns list of excel file paths in
    a given directory
    """
    result = []
    for path, subdirs, files in os.walk(dir_path):
        for file in files:
            filename, file_extension = os.path.splitext(file)
            if file_extension in ['.xls', '.xlsx']:
                file_path = os.path.join(path, file)
                print(file_path)
                result.append(file_path)
    
    # DEBUG PRINTS
    print('________________________________________________')
    print(' ')
    return result

def merge(rows):
    """
    Parse a row of the table.
    Add data to result dict.
    
    If this key exists in the dict process its params
    and add the amount
    Else - add new key in dict
    """
    # clean data from openpyxl wrappers
    plain_rows = []
    for row in rows:
        plain_row = [item.value for item in row]               
        plain_rows.append(plain_row)
    
    #handle repeat sybmols
    for row_index, plain_row in enumerate(plain_rows):
        for value_index, value in enumerate(plain_row):
            if value == REPEAT_SYMBOL:
                plain_row[value_index] = plain_rows[row_index - 1][value_index] 
    
    # DEBUG PRINT
    [print([r for i, r in enumerate(row) if i in PAYLOAD_DATA_INDEXES]) \
     for row in plain_rows if row[NAME] is not None]
    
    # generate output dictionary
    output = {}
    for row in plain_rows:
        if not row[NAME] in output and not row[NAME] == None:
            output[row[NAME]] = {
                'name': row[NAME],
                'size': row[SIZE],
                'amount': row[AMOUNT],
                'material': row[MATERIAL],
                'units': row[UNITS],
                'material_amount': row[MATERIAL_AMOUNT],
                'standart': row[STANDART],
                'comment': row[COMMENT]
            }
        else:
            # APPLY MERGE POLITICS HERE
            pass
    return output

def build_results_file(results_dict, results_file_path=DEFAULT_RESULT_FILE_PATH):
    """
    Build an excel file based on results dict and 
    a given path.
    """
    pass

def process(dir_path=DEFAULT_DIR_PATH, result_file_path=DEFAULT_RESULT_FILE_PATH):
    try:
        files = get_files(dir_path)
        rows_to_process = []
        if files:
            for file in files:
                workbook = openpyxl.load_workbook(filename=file)   
                for sheet in workbook:
                    for row in sheet:
                        # add rows by certain condition
                        row_index = (lambda x: x[0].row)(row)
                        if (sheet is not workbook[FISRT_LIST] \
                        and row_index >= OTHER_LISTS_FIRST_DATA_ROW) \
                            or row_index >= FISRT_LIST_FIRST_DATA_ROW:
                            rows_to_process.append(row) 
                            
            results_dict = merge(rows_to_process)
            # build_results_file(results_dict) 
            # print('Success')
        else:
            print('No files to process')
    
    except Exception as ex:
        print('Error while processing')
        print(ex)


In [None]:
dir_path = '/home/sancau/Desktop/kub'
process(dir_path)