In [None]:
import os
import re
import numpy as np
import pandas as pd

In [None]:
def find_sortindex(directory=r'C:\Users\entrup\workspace\PLX_methods_worksheets\trunk\__productstatus__'):
    """Iteriert über alle Formblätter im übergebenen Verzeichnis und liest den verwendeten sort_index aus.
    """
    pattern_id = re.compile('^\s*WORKSHEET_ID\s*=\s*["\']([\w\d_]+)["\']')
    pattern_sortindex = re.compile('^\s*wsd\.sort_index\s*=\s*(\d+)')
    pattern_category = re.compile('^\s*wsd\.category\s*=\s*\{(.*)\}')
    ws_info_list = []
    skipped_files = []
    languages = set()
    
    for root, dirs, files in os.walk(directory):
        for file_ in files:
            file_name, ext = os.path.splitext(file_)
            if ext == '.py':
                # print file_name
                full_path = os.path.join(root, file_)
                find_id = find_sortindex = find_category = True
                ws_id = sortindex = category = ''
                for i, line in enumerate(open(full_path)):
                    if find_id:
                        for match in re.finditer(pattern_id, line):
                            ws_id = match.groups()[0]
                            find_id = False
                    
                    if find_sortindex:
                        for match in re.finditer(pattern_sortindex, line):
                            sortindex = match.groups()[0]  
                            # print lic_cat
                            find_sortindex = False

                    if find_category:
                        for match in re.finditer(pattern_category, line):
                            category = match.groups()[0]
                            # print is_catalog
                            find_category = False
                        
                    if not find_id and not find_sortindex and not find_category:
                        break
                
                if not ws_id:
                    skipped_files.append(file_)
                    break
                if not sortindex:
                    print("{}: missing sort_index.".format(file_name)) 
                elif not category:
                    print("{}: missing category.".format(file_name)) 
                else:
                    cat_list = category.split(',')
                    cat_dict = {}
                    for cat in cat_list:                        
                        key, name = cat.replace('u"', '').replace('"', '').split(':')
                        key = key.strip()
                        if key == '':
                            key = 'en'
                        languages.add(key)
                        name = name.strip()
                        if key in cat_dict.keys():
                            if key not in cat:
                                cat_dict['other'] = name
                            else:
                                cat_dict['other'] = cat_dict['en']
                        cat_dict[key] = name
                    worksheet = {
                        'id': ws_id,
                        'name': file_name,
                        'path': full_path,
                        'sortindex': int(sortindex),
                        'category': cat_dict
                    }
                    ws_info_list.append(worksheet)
    return ws_info_list, languages, skipped_files

In [None]:
data, languages, skipped_files = find_sortindex()
print("\nThere are categories in {} languages: {}".format(len(languages), ", ".join(sorted(list(languages)))))

In [None]:
for item in data:
    if 'de' not in item['category'].keys():
        if 'other' in item['category'].keys():
            item['category']['de'] = item['category']['other']
        else:
            item['category']['de'] = item['category']['en']

In [None]:
for item in data:
    for lang in languages:
        if item['category'].get(lang, False):
            item[lang] = item['category'][lang]
            try:
                item[lang].decode('ascii')
            except UnicodeDecodeError:
                if lang in ['en', 'de']:
                    print item['category'][lang], item['id']
                    del item[lang]
print("Found {} plugins.".format(len(data)))

In [None]:
df = pd.DataFrame.from_dict(data)
cols = ['name', 'id', 'sortindex'] + sorted(languages) + ['path', 'category']
df = df[cols].sort_values(by=['sortindex'])
df.head()

In [None]:
df

In [None]:
cols_export = ['name', 'id', 'sortindex', 'en', 'de', 'path']
df_export = df[cols_export]
writer = pd.ExcelWriter(r'C:\Users\entrup\Documents\Jupyter-Output\SortIndex-Export.xlsx', engine='xlsxwriter')
df_export.to_excel(writer, sheet_name=u'sort_index')
df_export.sort_values(by=['en', 'sortindex']).to_excel(writer, sheet_name=u'en')
df_export.sort_values(by=['de', 'sortindex']).to_excel(writer, sheet_name=u'de')
writer.save()