In [84]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
import math
from typing import List, Tuple
import parse

In [85]:
# matplotlib colors
colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)

In [86]:
def shape_string(string: str) -> str:
    """ shape string (remove space)
    
    Args:
        string: string you want to shaping
    
    Returns:
        shaped(space removed) string
    """
    string.replace(" ", "")
    return string

In [87]:
def get_dataframe(book_name:str) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """get dataframe from excel file.

    Args:
        book_name: name of excel file.

    Returns:
        node dataframe, color dataframe.
    """    
    EXL = pd.ExcelFile(book_name)
    df_color = EXL.parse("color")
    df_node = EXL.parse("node")
    return df_node, df_color

In [88]:
def is_color(string: str) -> bool:
    """ Return the string passed is whether or not intended to represent the color.
    
    "string indicate color" means 
    RGB(%d, %d, %d), or
    matplotlib color.
    if you want add rule, add rule to this function.

    Args:
        string: string data to judge indecating color or not.

    Returns:
        the string passed is whether or not intended to represent the color.
        true: it's color.
        false: it isn't color.
    """
    string = shape_string(string)
    return ((parse.parse("RGB({:d},{:d},{:d})", string) is not None) or (string in colors))

In [89]:
# RGB や matplotlib colorsにhitした時にcolor codeを返す(format: #ABCDEF)
def get_code(string: str) -> str:
    """ RGB(%d,%d,%d) or matplotlib color(e.g. red, blue ...) -> RGB(%02X, %02x, %02X)(e.g. #AABBCC)
    
    from decimanl numbered RGB or matplotlib color to hex numbered RGB
    if you want to add rule, add convert method in this function
    
    Args:
        string: string data to convert to hex number
        
    Returns:
        RGB data written in hex number
    
    Raise:
        if string is not indicate color, raise ValueError
    """
    
    if not is_color(string):
        raise ValueError("color code must be matplotlib colors or RGB(%d, %d, %d)")
    # RGB(%d, %d, %d) style
    if "RGB" in string:
        a = parse.parse("RGB({:d},{:d},{:d})", string)
        return '#%02X%02X%02X' % (a[0], a[1], a[2])
    # matplotlib color style
    elif string in colors:
        return colors[string]

In [90]:
def is_selector(string: str, selector_method_dict) -> bool:
    """ Return the string passed is whether or not "selector".
    
    selector is a function that determines nodes to color.
    
    Args:
        string: String for determining whether it is a selector
        selector_method_dict: selector:method dict
    
    Returns:
        is string selector?
    """
    
    # RGB() is not selector(color)
    if is_color(string): return False
    func = parse.parse("{}({})", string)
    # string is func
    if(func is not None):
        name = func[0]
        args = func[1].split(",")
        print("string:", string)
        print("name:", name)
        print("args:", args)
        return name in selector_method_dict
    return False

In [91]:
def get_method_value(string: str) -> Tuple[str, List[float]]:
    """ selector XXXXXX(value) => XXXXXX, [float(value)].
    
    separate function name and argument.
    
    Args:
        string: string data to separate.
    
    Returns:
        function name, argument value.
    """
    func = parse.parse("{}({})", string)
    name = func[0]
    args = func[1].split(",")
    return name, args

In [92]:
def get_selector_color_dicts(df_color: pd.DataFrame, selector_method_dict):
    """ make selector_color_dict from df_color(excel file).
    
    selector_color_dict means "Which range shows which color".
    TODO: %で割合にする仕様を追加する 
    10 RGB(255,0,0) 20 TOP3 RGB(0,255,0) ...
    => [{selector: equation, color: colormap or color or [color] }]
    selector: MINMAX ... MIN <= x <= MAX 's color is "color"Args:
    selector: TOPx ... TOP x 's color is "color"    df_color: color data readed from excel file.
        selector_method_dict: (selector: method(function)) dict.
    
    Returns:
        selector_color_dict: Which range shows which color
    """
    
    # first, Interpret each line
    # [("color", colorcode) or ("selector", {"method":method, value:value}) or ("number", value)]
    selector_number_color_dict = []
    # get first column name
    column_name = df_color.columns[0]

    for i in range(df_color[column_name].size):
        row = df_color[column_name][i]
        row = str(row)
        # color
        if is_color(row):
            selector_number_color_dict.append(("color", get_code(row)))
        # selector
        elif is_selector(row, selector_method_dict):
            method, values = get_method_value(row)
            selector_number_color_dict.append(("selector", {"method": method, "values": values}))
        elif row.isdigit():
            n = float(row)
            selector_number_color_dict.append(("number" , n))

    # make MINMAX method from Color sandwiched between numbers
    selector_color_dicts = [];
    for i in range(len(selector_number_color_dict)):
        if(selector_number_color_dict[i][0] == "color"):
            # default number is nan
            # prv: preview row
            prv = math.nan
            # nxt: next row
            nxt = math.nan
            # sel: selector
            sel = ""

            # if prev is selector, this color's selector is this
            if(1 <= i and selector_number_color_dict[i-1][0] == "selector"):
                sel = selector_number_color_dict[i-1][1]
            # elif prev is number this color's selector is MINMAX
            if(1 <= i and selector_number_color_dict[i-1][0] == "number"):
                prv = selector_number_color_dict[i-1][1]
            # if next is number this color's selecor is MINMAX
            if(i < len(selector_number_color_dict)-1 and selector_number_color_dict[i+1][0] == "number"):
                nxt = selector_number_color_dict[i+1][1]

            # if this row means selector, add as it is
            if(sel != ""): selector_color_dicts.append({"selector": sel, "color" : selector_number_color_dict[i][1]})
            # else it means MINMAX selector
            else: selector_color_dicts.append({"selector": {"method": "MINMAX", "values": [prv, nxt]}, "color" : selector_number_color_dict[i][1]})
    
    return selector_color_dicts

In [93]:
def get_name_color_dict(df_node: pd.DataFrame, column_name, selector_method_dict, selector_color_dicts):
    """ make correspondence dict of which name indicates which color.
    
    Args:
        df_node: node dataframe
        column_name: 
        selector_method_dict: selector:method dict
        selector_color_dicts: [selector:color] dict
    
    Returns:
        correspondence dict of which name indicates which color.
    """
    name_color_dict = {}
    for dic in selector_color_dicts:
        method = dic["selector"]["method"]
        values = dic["selector"]["values"]
        color = dic["color"]
        if(method in selector_method_dict):
            selected_row = selector_method_dict[method](df_node, column_name, values)
            print(selected_row)

        if "名前" in df_node.columns:
            for n in selected_row["名前"]:
                name_color_dict[n] = color
            
    return name_color_dict

In [94]:
def to_diag(output_filename:str, edge_filename:str, df_node:pd.DataFrame, name_color_dict) -> None:
    """ make diag data from df_node, name_color_dict
    
    Args:
        output_filename: output filename
        edge_filename: edge filename(aaa -> bbb; bbb->ccc ...)
        df_node: node dataframe (from excel file)
        name_color_dict: dict data maked by get_name_color_dict
    """
    # make label for output blockdiag file
    df_node["label"] = df_node["名前"] + " \\n "
    for a in df_node:
        if(a == "名前" or a == "label"): continue
        df_node["label"] += str(a) + ": " + df_node[a].astype(str) + " \\n "

    # Cut off the last 4 characters (eliminate the last newline)
    df_node["label"] = df_node["label"].map(lambda x:x[:-4])
    
    # Variable for deleting the last}
    last_bracket_pos = 0
    i = 0
    output = []
    for line in open(edge_filename):
        output.append(line)
        if('}' in line):
            last_bracket_pos = i
        i += 1

    # remove last "}"
    output = output[:last_bracket_pos]

    output.append('\n')

    # node information 
    for key, row in df_node.iterrows():
        output.append(row["名前"] + "[label=\"" + row["label"] + "\", color=\"" + name_color_dict[row["名前"]] +"\"];\n");

    # add last "}"
    output.append('}')

    f = open(output_filename, 'w')
    f.writelines(output)
    f.close()

In [95]:
# method names

def get_top(df_node, column_name, values):
    selected_row = df_node.copy();
    value = int(values[0])
    selected_row = df_node.copy()
    selected_row = selected_row.nlargest(value, column_name)
    return selected_row

def min_max(df_node, column_name, values):
    selected_row = df_node.copy();
    if(values[0] is not math.nan): selected_row = selected_row[values[0] <= selected_row[column_name]]
    if(values[1] is not math.nan): selected_row = selected_row[values[1] > selected_row[column_name]]
    return selected_row
            
default_selector_method_dict = {
    "TOP": get_top,
    "MINMAX": min_max,
}

In [96]:
if __name__ == '__main__':
    # read data
    df_node, df_color = get_dataframe("data.xlsx")
    # selector:color dict
    selector_color_dicts = get_selector_color_dicts(df_color, default_selector_method_dict)
    # row name:color dict
    name_color_dict = get_name_color_dict(df_node,df_color.columns[0],default_selector_method_dict, selector_color_dicts)
    # output file
    to_diag("out.diag", "map.diag", df_node, name_color_dict)

string: TOP(3)
name: TOP
args: ['3']
             名前    paramA        paramB  paramC
0         A_top  7.397979  43069.578089      10
2       日本語test  5.600000  66741.381375       4
11       E_PAGE  4.600000  44121.264531      30
12  branch_PAGE  9.900000  29226.761041      12
              名前  paramA        paramB  paramC
1          B_TOP    15.0  99409.703087       2
3    それぞれの項目_TOP    13.0  85414.501155     100
4          C_TOP    14.5  95504.455222       3
5  C_detail_page    11.7  82347.414753     124
6       にホN語test    11.8  86705.031812      30
7      list_view    16.7  56451.003900      24
8         D_PAGE    11.0  51592.526965       5
9    D_NEXT_PAGE    15.3  10575.319214      50
             名前  paramA       paramB  paramC
10  E_NEXT_PAGE    51.5  79581.66396      24
             名前  paramA        paramB  paramC
10  E_NEXT_PAGE    51.5  79581.663960      24
7     list_view    16.7  56451.003900      24
9   D_NEXT_PAGE    15.3  10575.319214      50
