In [22]:
import re
from enum import Enum, auto
from typing import List
from functools import reduce
from dataclasses import dataclass
import abc
import pandas as pd
import numpy as np

In [23]:

# abstract base class
class TransformationStrategy():

    @abc.abstractclassmethod
    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        pass

    @abc.abstractclassmethod
    def get_code(self, df_name: str) -> str:
        pass

    def __str__(self) -> str:
        return f"{self.__class__.__name__}"


# load and save
class LoadExcelStrategy(TransformationStrategy):

    def __init__(self, file_name: str, sheet_name:str|int = 0, index:str|None = None):
        self.file_name = file_name
        self.sheet_name = sheet_name
        self.index = index

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        return pd.read_excel(self.file_name, sheet_name=self.sheet_name, index_col=self.index)

    def get_code(self, df_name: str) -> str:
        return f"""
#here we read the data provided by the {self.file_name} from the sheet {self.sheet_name if type(self.sheet_name) != int else "number "+str(self.sheet_name)} and set the index to {self.index}
{df_name} = pd.read_excel("{self.file_name}", sheet_name = {self.sheet_name if type(self.sheet_name) == int else "'"+self.sheet_name+"'"}{f', index_col = "'+self.index+'"' if self.index is not None else ""})
"""


class SaveExcelStrategy(TransformationStrategy):

    def __init__(self, file_name: str, sheet_name:str = "Sheet1", impl_bool_3:bool = False):
        self.file_name = file_name
        self.sheet_name = sheet_name
        self.index = impl_bool_3

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        df.to_excel(self.file_name, index=self.index, sheet_name=self.sheet_name)
        return df

    def get_code(self, df_name: str) -> str:
        return f"""
#here we save the data from {df_name} to the the {self.file_name} into the  sheet {self.sheet_name} and {"use" if self.index else "dont use"} the index
{df_name}.to_excel("{self.file_name}", sheet_name = "{self.sheet_name}", index = {self.index})
"""



# views
class SelectRowsStrategy(TransformationStrategy):

    def __init__(self, query: str):
        self.query = query
        self.engines = [{"engine": "numexpr"}, {"engine": "python"}]

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        for e in self.engines:
            try:
                df = df.query(self.query, **e)
                return df
            except Exception:
                print(f"{e} didn't work on quering {self.eval_string} trying next engine")
        return pd.DataFrame(columns=df.columns)

    def get_code(self, df_name: str) -> str:
        return f"""#Here we try to query the Expression {self.query} with diffrent engines
for engine in {self.engines}:
    try:
        {df_name} = {df_name}.query('{self.query}', **engine)
    except Exception:
        print(engine, "failed to query", '{self.query}', "trying next")
"""


class SelectColumnStrategy(TransformationStrategy):

    def __init__(self, column: List[str]) -> None:
        self.cols = column

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        return df[self.cols]

    def get_code(self, df_name: str) -> str:
        return f"#we get a subset of the dataframe with these columns {self.cols}\n{df_name} = {df_name}[{self.cols}]\n"


class RenameStrategy(TransformationStrategy):

    def __init__(self, from_col: str, to_col: str) -> None:
        self.mapper = {from_col: to_col}

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        return df.rename(columns=self.mapper)

    def get_code(self, df_name: str) -> str:
        return f'#rename column from-> to {self.mapper}\n{df_name}={df_name}.rename(columns={self.mapper})\n'


class deleteDataStrategy(TransformationStrategy):

    def __init__(self, impl_bool_1:bool = False) -> None:
        self.keep = impl_bool_1

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        if self.keep:
            return df[0:0]
        return pd.DataFrame()

    def get_code(self, df_name: str) -> str:
        return f"#delete all data keep structure\n{df_name}[0:0]" if self.keep else f"#delete dataframe completly by creating a new one\n{df_name} = pd.DataFrame()"


# updates row based
class addRowStrategy(TransformationStrategy):

    def __init__(self, values:List) -> None:
        if type(values[0]) == dict:
            self.order = False
            self.tba = {k:[v] for d in values for k,v in d.items()}
        else:
            self.order = True
            self.tba = values
        

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        if self.order:
            df.loc[len(df.index)] = self.tba
        else:
            row_df = pd.DataFrame(data=self.tba)
            df = df.append(row_df, ignore_index=True)
        return df

    def get_code(self, df_name: str) -> str:
        if self.order:
            return f"#add row based on args order\n{df_name}.loc[len({df_name}.index)] = {self.tba}"
        return f"#add row based on keywords by creating a temp df\ntemp_df = pd.DataFrame(data = {self.tba})\n{df_name} = {df_name}.append(temp_df, ignore_index = True)"


class deleteRowStrategy(TransformationStrategy):

    def __init__(self, values:List) -> None:
        self.engines = [{"engine": "numexpr"}, {"engine": "python"}]   
        if type(values[0]) == dict:
            self.order = False
            delist = {k:v for d in values for k,v in d.items()}
            self.tbd = " and ".join([f"""(`{key}` == {entry if type(entry) is not str else '"'+entry+'"' })""" for key, entry in delist.items()])
        else:
            self.order = True
            self.tbd = values

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        if self.order:
            self.tbd = " and ".join(f"""(`{key}` == {entry if type(entry) is not str else '"'+entry+'"' })""" for key, entry in dict(zip(df.columns, self.tbd)).items())
        return SelectRowsStrategy(f" not ({self.tbd})").transform(df)

    def get_code(self, df_name: str) -> str:
        comment = "# delete row by filtering the inverse of the provided values\n"
        if self.order:
            filter_part =f"""deli = '"'
filter = " and ".join(f"(`{{key}}` == {{entry if type(entry) is not str else deli+entry+deli}})" for key, entry in dict(zip({df_name}.columns, {self.tbd})).items())
"""
        else:
            filter_part = f"filter = '{self.tbd}'\n"
        query_part = f"""for engine in {self.engines}:
    try:
        {df_name} = {df_name}.query(f'not ({{filter}})', **engine)
    except Exception:
        print(engine, "failed to query", f'not ({{filter}})', "trying next")
"""
        return comment+filter_part+query_part


class changeRowStrategy(TransformationStrategy):

    def __init__(self, org_value: List, new_value: List) -> None:
        self.cur = deleteRowStrategy(org_value)
        self.new_v = addRowStrategy(new_value)

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        df = self.cur.transform(df)
        return self.new_v.transform(df)

    def get_code(self, df_name: str) -> str:
        return f"""#updating values by first deleting and then adding
#deleting
{self.cur.get_code(df_name)}
#adding
{self.new_v.get_code(df_name)}
"""


# updates on cols
class deleteColumnStrategy(TransformationStrategy):

    def __init__(self, column: str) -> None:
        self.tbd = column

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        return df.drop(self.tbd, axis=1)

    def get_code(self, df_name: str) -> str:
        return f"#delete column\n{df_name} = {df_name}.drop('{self.tbd}', axis = 1)"


class setColumnStrategy(TransformationStrategy):

    def __init__(self, column: str, value:str) -> None:
        self.expr = f"`{column}` = {value}"
        self.engines = [{}, {"engine": "python"}]

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        for engine in self.engines:
            try:
                df = df.eval(self.expr, **engine)
                return df
            except Exception:
                #prop logging of some sort 
                pass
        raise Exception("Transformation failed: not a valid value provided")

    def get_code(self, df_name: str) -> str:
        return f"""#Here we try to evaluate the Expression {self.expr} with diffrent engines
for engine in {self.engines}:
    try:
        {df_name} = {df_name}.eval('{self.expr}', **engine)
    except Exception:
        print(engine, "failed to evaluate", '{self.expr}', "trying next")
"""

class addColumnStrategy(TransformationStrategy):
    def __init__(self, column: str, value:str) -> None:
        self.setter = setColumnStrategy(column, value)
        self.column = column

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        if self.column in df.columns:
            raise Exception(f"Transformation failed: Column {self.column} already in dataframe")
        return self.setter.transform(df)

    def get_code(self, df_name: str) -> str:
        return self.setter.get_code(df_name)

class changeColumnStrategy(TransformationStrategy):
    def __init__(self, column: str, value:str) -> None:
        self.setter = setColumnStrategy(column, value)
        self.column = column

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        if self.column not in df.columns:
            raise Exception(f"Transformation failed: Column {self.column} not in dataframe")
        return self.setter.transform(df)

    def get_code(self, df_name: str) -> str:
        return self.setter.get_code(df_name)

# special functions
class DoNothingStrategy(TransformationStrategy):
    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        return df

    def get_code(self, df_name: str) -> str:
        return f"# fyi here was called a Do Nothing call on {df_name}\n"

class SetIndexStrategy(TransformationStrategy):

    def __init__(self, column:str):
        self.column = column

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        return df.set_index(self.column)

    def get_code(self, df_name: str) -> str:
        return f"# set index to {self.column} in {df_name}\n{df_name}.set_index('{self.column}', inplace = True)"

class ResetIndexStrategy(TransformationStrategy):

    def __init__(self, impl_bool_1:bool = False):
        self.drop= not impl_bool_1

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        return df.reset_index(drop=self.drop)

    def get_code(self, df_name: str) -> str:
        return f"# rest index to in {df_name}\n{df_name}.reset_index(drop = {self.drop}, inplace = True)"








#special special functions
class dotStrategy(TransformationStrategy):
    def __init__(self, func_string: str):
        self.func_string = func_string

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        exec_string = f'edf = df{self.func_string}'
        loc = {}
        exec(exec_string, locals(), loc)
        return loc["edf"]

    def get_code(self, df_name: str) -> str:
        return f"#Here is a call Happing using the dotStrategy aka pure python code beaware\n{df_name} = {df_name}{self.func_string}\n"


In [24]:
class DataTypes(Enum):
    INT = "-?\d+"
    FLOAT ="-?\d+\.\d+"
    STRING ='"[^"]+"'
    BOOL = "True|False"
    NONE = "None"
    EXPR = ".+"
    ATTR = "`[^`]+`"
    ATTRMAP = """(?:`[^`]+`)=(?:-?\d+\.\d+|-?\d+|"[^"]+"|True|False|None)"""

@dataclass(frozen=True)
class StrategyFunction():
    strategy:type[TransformationStrategy]
    definition:str
    groups: List


class FunctionBuilder():

    def build_function(self, strategy:type[TransformationStrategy], needed:dict,*args) -> StrategyFunction:
        """providing a strategy , and definition of a function , build a text based function with a regex to find it 
            and an instruction set to filter the string for parameters (look at needed for more info)
        Args:
            strategy (type[TransformationStrategy]): definition of strategy on which this func should be used on
            needed (dict): {
                text:str --> a text with or without marked spots to place regex for parameter finding e.g. "this is a function {parameter}" -- required
                types:dict a dictonary that maps paramter(defined in text) to the enum DataTypes which the parameter should have -- optional
                repeating: dict maps parameters to boolean if there are of an variable lenght -- optional
                implicite: Bool a boolean parameter can be representet as text itself instead of a parameter --optional
            }
            *args if you want to extend the call with optional parameters --> same as needed

        Returns:
            StrategyFunction: dataclass with all the things needed combined
        """
        whole_function, group_mapper, counter = self.unpacker(needed, 1)
        for arg in args:
            apendy = self.unpacker(arg, counter, True)
            whole_function += apendy[0]
            group_mapper.extend(apendy[1])
            counter = apendy[2]

        
        return StrategyFunction(strategy, whole_function, group_mapper)

    def unpacker(self, function_part:dict, counter:int, optional:bool = False) -> tuple:
        func_regex = function_part.get("text")

        if function_part.get("implicite", False):
            if not optional:
                return (func_regex, [
                    {
                "types":DataTypes.BOOL,
                "group":None,
                "repeating":False,
                "needed": not optional,
                "implicite":True,
                "name":None
            }
                ], counter)
            
            return (f"( {func_regex})?", [{
                "types":DataTypes.BOOL,
                "group":counter,
                "repeating":False,
                "needed": not optional,
                "implicite":True,
                "name":f"impl_bool_{counter}"
            }],counter+1)
        types = function_part.get("types", dict())
        repeating = function_part.get("repeating", dict())
        parameter_reg = {key:self.combination_builder(el, repeating.get(key, False)) for key, el in types.items()}
        if optional:
            func_regex = f"(?: {func_regex})?"
        func_regex = func_regex.format(**parameter_reg)
        group_mapper = list()
        for key, type in types.items():
            group_mapper.append({
                "types":type,
                "group":counter,
                "repeating":repeating.get(key,False),
                "needed": not optional,
                "implicite":False,
                "name":key
            })
            counter += 1
        
        return (func_regex, group_mapper, counter)

    def combination_builder(self, types:tuple, repeating:bool = False)->str:
        comb = "|".join(t.value for t in types)
        if repeating:
            return f"((?:(?:{comb})(?:, )?)+)"
        return f"({comb})"


In [25]:
f = FunctionBuilder()
testinput = [
    {"text":"load data from excel file {file_name}", "types":{"file_name":(DataTypes.STRING,)}},
    {"text":"from the sheet {sheet_name}", "types":{"sheet_name":(DataTypes.STRING,DataTypes.INT)}},
    {"text":"and set the index to the column {index}", "types":{"index":(DataTypes.ATTR,DataTypes.NONE)}},
]
taskfunc = f.build_function(LoadExcelStrategy, *testinput)
print(taskfunc)

StrategyFunction(strategy=<class '__main__.LoadExcelStrategy'>, definition='load data from excel file ("[^"]+")(?: from the sheet ("[^"]+"|-?\\d+))?(?: and set the index to the column (`[^`]+`|None))?', groups=[{'types': (<DataTypes.STRING: '"[^"]+"'>,), 'group': 1, 'repeating': False, 'needed': True, 'implicite': False, 'name': 'file_name'}, {'types': (<DataTypes.STRING: '"[^"]+"'>, <DataTypes.INT: '-?\\d+'>), 'group': 2, 'repeating': False, 'needed': False, 'implicite': False, 'name': 'sheet_name'}, {'types': (<DataTypes.ATTR: '`[^`]+`'>, <DataTypes.NONE: 'None'>), 'group': 3, 'repeating': False, 'needed': False, 'implicite': False, 'name': 'index'}])


In [26]:
class ParameterHandler():

    def repeatingHelper(self, types:tuple):
        return f"({'|'.join(t.value for t in types)})"
    
    def typeHandler(self, type_str:str, datatype:DataTypes):
        match datatype:
            case DataTypes.ATTR|DataTypes.STRING:
                return type_str[1:-1]
            case DataTypes.FLOAT:
                return float(type_str)
            case DataTypes.INT:
                return int(type_str)
            case DataTypes.BOOL:
                return type_str == "True"
            case DataTypes.NONE:
                return None
            case DataTypes.EXPR:
                return type_str
            case DataTypes.ATTRMAP:
                s = type_str.split("=")
                # print(s)
                return {self.typeHandler(s[0], DataTypes.ATTR): self.determineType(s[1], (DataTypes.FLOAT, DataTypes.INT, DataTypes.STRING, DataTypes.NONE, DataTypes.BOOL))}
    
    def determineType(self, match_str:str, types:tuple):
        for t in types:
            if re.fullmatch(t.value, match_str) is not None:
                return self.typeHandler(match_str,t)
    
    def handleRepeating(self, match_str, types:tuple):
        finder = self.repeatingHelper(types)
        return [self.determineType(t, types) for t in re.findall(finder, f"{match_str}, ")]

    def handleInputs(self, matched_str:re.Match, group_def:List[dict]) -> dict:
        res = {"needed":list()}
        for definition in  group_def:
            match definition:
                case {"needed":True, "implicite":True}:
                    res["needed"].append(True)

                case {"needed":False, "implicite":True, "group": group, "name":name}:
                    par = True if matched_str.group(group) else False
                    res[name] = par

                case {"needed":False, "implicite":False, "group": group, "repeating":False, "name":name, "types":types}:
                    if par := matched_str.group(group):
                        res[name] = self.determineType(par, types) 

                case {"needed":True, "implicite":False, "group": group, "repeating":False, "types":types}:
                    res["needed"].append(self.determineType(matched_str.group(group), types))

                case {"needed":False, "implicite":False, "group": group, "repeating":True, "name":name, "types":types}:
                    if par := matched_str.group(group):
                        res[name] = self.handleRepeating(par, types)

                case {"needed":True, "implicite":False, "group": group, "repeating":True, "types":types}:
                    res["needed"].append(self.handleRepeating(matched_str.group(group), types))
                        
                case _:
                    print(f"no match for {definition}")
        return res

        

In [27]:
re.findall(DataTypes.ATTRMAP.value, """`a`=1, `a`=2, `a`=4, `a`="string", `a`=True, `a`=None, `a`=-1.2""")

['`a`=1', '`a`=2', '`a`=4', '`a`="string"', '`a`=True', '`a`=None', '`a`=-1.2']

In [28]:

test_str = [""""this is a test,", 2.2, True, None, 2""",""""lass mal schauen", "was geht,,,,)", 2.12, None"""]

def value_split(str_list):
    r = ParameterHandler().repeatingHelper((DataTypes.FLOAT, DataTypes.INT, DataTypes.STRING, DataTypes.NONE, DataTypes.BOOL))
    return list(re.findall(r,str_list))

for test in test_str:
    for i in value_split(test):print(i)


"this is a test,"
2.2
True
None
2
"lass mal schauen"
"was geht,,,,)"
2.12
None


In [29]:

test_str = 'load data from excel file "test.xlsx" from the sheet "test" and set the index to the column `col 1`'   


print(taskfunc.definition)
print()
print(taskfunc.groups)
print("result")
m = re.fullmatch(taskfunc.definition, test_str)
print(m)
print("group tests")
for par in taskfunc.groups:
    print(m.group(par.get("group")))

print(ParameterHandler().handleInputs(m, taskfunc.groups))

load data from excel file ("[^"]+")(?: from the sheet ("[^"]+"|-?\d+))?(?: and set the index to the column (`[^`]+`|None))?

[{'types': (<DataTypes.STRING: '"[^"]+"'>,), 'group': 1, 'repeating': False, 'needed': True, 'implicite': False, 'name': 'file_name'}, {'types': (<DataTypes.STRING: '"[^"]+"'>, <DataTypes.INT: '-?\\d+'>), 'group': 2, 'repeating': False, 'needed': False, 'implicite': False, 'name': 'sheet_name'}, {'types': (<DataTypes.ATTR: '`[^`]+`'>, <DataTypes.NONE: 'None'>), 'group': 3, 'repeating': False, 'needed': False, 'implicite': False, 'name': 'index'}]
result
<re.Match object; span=(0, 99), match='load data from excel file "test.xlsx" from the sh>
group tests
"test.xlsx"
"test"
`col 1`
{'needed': ['test.xlsx'], 'sheet_name': 'test', 'index': 'col 1'}


In [30]:
print(f.build_function(changeRowStrategy,*[
    {"text":"change row from {org_value} to {new_value}", 
    "types":{"org_value":(DataTypes.FLOAT, DataTypes.INT, DataTypes.STRING, DataTypes.BOOL, DataTypes.NONE), "new_value":(DataTypes.FLOAT, DataTypes.INT, DataTypes.STRING, DataTypes.BOOL, DataTypes.NONE)}, 
    "repeating":{"org_value":True, "new_value":True}
    },
    ]))

StrategyFunction(strategy=<class '__main__.changeRowStrategy'>, definition='change row from ((?:(?:-?\\d+\\.\\d+|-?\\d+|"[^"]+"|True|False|None)(?:, )?)+) to ((?:(?:-?\\d+\\.\\d+|-?\\d+|"[^"]+"|True|False|None)(?:, )?)+)', groups=[{'types': (<DataTypes.FLOAT: '-?\\d+\\.\\d+'>, <DataTypes.INT: '-?\\d+'>, <DataTypes.STRING: '"[^"]+"'>, <DataTypes.BOOL: 'True|False'>, <DataTypes.NONE: 'None'>), 'group': 1, 'repeating': True, 'needed': True, 'implicite': False, 'name': 'org_value'}, {'types': (<DataTypes.FLOAT: '-?\\d+\\.\\d+'>, <DataTypes.INT: '-?\\d+'>, <DataTypes.STRING: '"[^"]+"'>, <DataTypes.BOOL: 'True|False'>, <DataTypes.NONE: 'None'>), 'group': 2, 'repeating': True, 'needed': True, 'implicite': False, 'name': 'new_value'}])


In [31]:
attrTest = {
    "text":"change row from {org_value} to {new_value}",
    "types":{
        "org_value":(DataTypes.ATTRMAP,),
        "new_value":(DataTypes.ATTRMAP,)},
    "repeating":{"org_value":True, "new_value":True}}
att = f.build_function(changeRowStrategy,attrTest)
print(att.definition)
print(att.groups)


testAttr = ['change row from `a`=1, `b`=2, `c`=4 to `a`=1, `b`=2, `c`=4', 'change row from `a`=1, `a`=2, `a`=4, `a`="string", `a`=True, `a`=None, `a`=-1.2 to `a`=1, `a`=2, `a`=4, `a`="string", `a`=True, `a`=None, `ab`="-1.2"']

for testattr in testAttr:
    if m := re.match(att.definition, testattr):
        print(type(m))
        print(ParameterHandler().handleInputs(m,att.groups))
    


change row from ((?:(?:(?:`[^`]+`)=(?:-?\d+\.\d+|-?\d+|"[^"]+"|True|False|None))(?:, )?)+) to ((?:(?:(?:`[^`]+`)=(?:-?\d+\.\d+|-?\d+|"[^"]+"|True|False|None))(?:, )?)+)
[{'types': (<DataTypes.ATTRMAP: '(?:`[^`]+`)=(?:-?\\d+\\.\\d+|-?\\d+|"[^"]+"|True|False|None)'>,), 'group': 1, 'repeating': True, 'needed': True, 'implicite': False, 'name': 'org_value'}, {'types': (<DataTypes.ATTRMAP: '(?:`[^`]+`)=(?:-?\\d+\\.\\d+|-?\\d+|"[^"]+"|True|False|None)'>,), 'group': 2, 'repeating': True, 'needed': True, 'implicite': False, 'name': 'new_value'}]
<class 're.Match'>
{'needed': [[{'a': 1}, {'b': 2}, {'c': 4}], [{'a': 1}, {'b': 2}, {'c': 4}]]}
<class 're.Match'>
{'needed': [[{'a': 1}, {'a': 2}, {'a': 4}, {'a': 'string'}, {'a': True}, {'a': None}, {'a': -1.2}], [{'a': 1}, {'a': 2}, {'a': 4}, {'a': 'string'}, {'a': True}, {'a': None}, {'ab': '-1.2'}]]}


In [32]:
print(f.build_function( deleteDataStrategy,
    {
        "text": "delete the data",
    },
    {
        "text": "but keep the columns",
        "implicite":True
    },
))

StrategyFunction(strategy=<class '__main__.deleteDataStrategy'>, definition='delete the data( but keep the columns)?', groups=[{'types': <DataTypes.BOOL: 'True|False'>, 'group': 1, 'repeating': False, 'needed': False, 'implicite': True, 'name': 'impl_bool_1'}])


In [33]:
print(f.build_function(setColumnStrategy,
    {
        "text": "set column {column} to {expr}",
        "types":{
            "column":(DataTypes.ATTR,),
            "expr":(DataTypes.EXPR,)
        },
    },
))

StrategyFunction(strategy=<class '__main__.setColumnStrategy'>, definition='set column (`[^`]+`) to (.+)', groups=[{'types': (<DataTypes.ATTR: '`[^`]+`'>,), 'group': 1, 'repeating': False, 'needed': True, 'implicite': False, 'name': 'column'}, {'types': (<DataTypes.EXPR: '.+'>,), 'group': 2, 'repeating': False, 'needed': True, 'implicite': False, 'name': 'expr'}])


In [34]:
theListofFuncs = [
    (
        LoadExcelStrategy,
        {
            "text":"load data from excel file {file_name}",
            "types":{"file_name":(DataTypes.STRING,)}
        },
        {
            "text":"from the sheet {sheet_name}",
             "types":{"sheet_name":(DataTypes.STRING,DataTypes.INT)}
        },
        {
            "text":"and set the index to the column {index}", 
            "types":{"index":(DataTypes.ATTR,DataTypes.NONE)}
        },
    ),
    (
        SaveExcelStrategy,
        {
            "text":"save data to excel file {file_name}",
            "types":{"file_name":(DataTypes.STRING,)}
        },
        {
            "text":"in to the sheet {sheet_name}",
             "types":{"sheet_name":(DataTypes.STRING,)}
        },
        {
            "text":"and also keep the index", 
            "implicite":True
        },
    ),
    (
        SelectRowsStrategy,
        {
            "text":"select the rows where {query}",
            "types":{"query":(DataTypes.EXPR,)}
        },
    ),
    (
        SelectColumnStrategy,
        {
            "text":"select the columns named {column}",
            "types":{"column":(DataTypes.ATTR,)},
            "repeating":{"column":True},
        },
    ),
    (
        RenameStrategy,
        {
            "text":"rename column from {from_col} to {to_col}",
            "types":{"from_col":(DataTypes.ATTR,), "to_col":(DataTypes.ATTR,)},
        },
    ),
    (
        deleteDataStrategy,
        {
            "text":"delete the data",
        },
        {
            "text":"but keep the columns",
            "implicite":True
        },
    ),
    (
        addRowStrategy,
        {
            "text":"add row with the values {value}",
            "types":{"value":(DataTypes.FLOAT, DataTypes.INT, DataTypes.STRING, DataTypes.NONE, DataTypes.BOOL)},
            "repeating":{"value":True}
        },
    ),
    (
        addRowStrategy,
        {
            "text":"add row with the values {value}",
            "types":{"value":(DataTypes.ATTRMAP,)},
            "repeating":{"value":True}
        },
    ),
    (
        deleteRowStrategy,
        {
            "text":"delete row with the values {value}",
            "types":{"value":(DataTypes.FLOAT, DataTypes.INT, DataTypes.STRING, DataTypes.NONE, DataTypes.BOOL)},
            "repeating":{"value":True}
        },
    ),
    (
        deleteRowStrategy,
        {
            "text":"delete row with the values {value}",
            "types":{"value":(DataTypes.ATTRMAP,)},
            "repeating":{"value":True}
        },
    ),
    (
        changeRowStrategy,
        {
            "text":"change row from {org_value} to {new_value}",
            "types":
                {
                    "org_value":(DataTypes.FLOAT, DataTypes.INT, DataTypes.STRING, DataTypes.NONE, DataTypes.BOOL),
                    "new_value":(DataTypes.FLOAT, DataTypes.INT, DataTypes.STRING, DataTypes.NONE, DataTypes.BOOL),
                },
            "repeating":
                {
                    "org_value":True,
                    "new_value":True,
                }
        },
    ),
    (
        changeRowStrategy,
        {
            "text":"change row from {org_value} to {new_value}",
            "types":
                {
                    "org_value":(DataTypes.ATTRMAP,),
                    "new_value":(DataTypes.ATTRMAP,),
                },
            "repeating":
                {
                    "org_value":True,
                    "new_value":True,
                }
        },
    ),
    (
        addColumnStrategy,
        {
            "text":"add column {column} with the value {value}",
            "types":
                {
                    "column":(DataTypes.ATTR,),
                    "value":(DataTypes.EXPR,),
                },
        },
    ),
    (
        changeColumnStrategy,
        {
            "text":"change column {column} to the value {value}",
            "types":
                {
                    "column":(DataTypes.ATTR,),
                    "value":(DataTypes.EXPR,),
                },
        },
    ),
    (
        setColumnStrategy,
        {
            "text":"set column {column} to the value {value}",
            "types":
                {
                    "column":(DataTypes.ATTR,),
                    "value":(DataTypes.EXPR,),
                },
        },
    ),
    (
        deleteColumnStrategy,
        {
            "text":"delete column {column}",
            "types":
                {
                    "column":(DataTypes.ATTR,),
                },
        },
    ),
    (
        DoNothingStrategy,
        {
            "text":"do nothing",
        },
    ),
    (
        SetIndexStrategy,
        {
            "text":"set index to {column}",
            "types":{"column":(DataTypes.ATTR,)}
        },
    ),
    (
        ResetIndexStrategy,
        {
            "text":"reset the index",
        },
        {
            "text":"and keep the old one as a column",
            "implicite":True
        },
    ),
]

In [35]:
print(len(theListofFuncs))
buildedFunc = [f.build_function(*des) for des in theListofFuncs]

19


In [36]:
class TSF():
    def __init__(self, additional_funcs:List[StrategyFunction]|None =None) -> None:
        self.parameter_handler:ParameterHandler = ParameterHandler()
        
        self.functions:List[StrategyFunction] = []
        
        if additional_funcs:
            self.functions.extend(additional_funcs)
    
    def get_strategy(self, prov_string:str) -> TransformationStrategy:
        for taskfunc in self.functions:
            if m := re.match(taskfunc.definition, prov_string):
                parameter = self.parameter_handler.handleInputs(m, taskfunc.groups)
                needed_parameter = parameter.pop("needed", list())
                return taskfunc.strategy(*needed_parameter, **parameter)
        if prov_string.startswith("."):
            return dotStrategy(prov_string)
        return None


In [37]:
factory = TSF(additional_funcs=buildedFunc)
testCalls =[
    'load data from excel file "test.xlsx"',
    'load data from excel file "test.xlsx" from the sheet "testo"',
    'load data from excel file "test.xlsx" from the sheet 12 and set the index to the column `Col_1`',
    'load data from excel file "test.xlsx" and set the index to the column `Col_1`',
    'set column `abra kadabra` to the value "test" + " " + "strings"',
    'change column `abra kadabra` to the value "test" + " " + "strings"', 
    'add row with the values `a`=12, `b`="Hund", `c`=-3.1413',
    'delete row with the values `a`=12, `b`="Hund", `c`=-3.1413',
    'delete row with the values 12, "Hund", -3.1413',
    'change row from `a`=12, `b`="Hund", `c`=-3.1413 to `a`=21, `b`="Katze", `c`=3.1413' ,
]
for text in testCalls:
    print(factory.get_strategy(text).get_code("fake"))


#here we read the data provided by the test.xlsx from the sheet number 0 and set the index to None
fake = pd.read_excel("test.xlsx", sheet_name = 0)


#here we read the data provided by the test.xlsx from the sheet testo and set the index to None
fake = pd.read_excel("test.xlsx", sheet_name = 'testo')


#here we read the data provided by the test.xlsx from the sheet number 12 and set the index to Col_1
fake = pd.read_excel("test.xlsx", sheet_name = 12, index_col = "Col_1")


#here we read the data provided by the test.xlsx from the sheet number 0 and set the index to Col_1
fake = pd.read_excel("test.xlsx", sheet_name = 0, index_col = "Col_1")

#Here we try to evaluate the Expression `abra kadabra` = "test" + " " + "strings" with diffrent engines
for engine in [{}, {'engine': 'python'}]:
    try:
        fake = fake.eval('`abra kadabra` = "test" + " " + "strings"', **engine)
    except Exception:
        print(engine, "failed to evaluate", '`abra kadabra` = "test" + " " + "strings"', "

In [39]:
# from BPMN import BPMNEngine
# engine = BPMNEngine("inkltest.bpmn", transform_factory=factory)
# engine.run()

2022-05-11 21:48:12,202 - BPMN.logger - BPMN_Parser.py - INFO - started parsing file:inkltest.bpmn
2022-05-11 21:48:12,204 - BPMN.logger - BPMN_Parser.py - INFO - Parsing completed starting with checks
2022-05-11 21:48:12,205 - BPMN.logger - BPMN_Parser.py - INFO - Checking startevent
2022-05-11 21:48:12,205 - BPMN.logger - BPMN_Parser.py - INFO - Checking startevent finished sucessfully
2022-05-11 21:48:12,206 - BPMN.logger - BPMN_Parser.py - INFO - Checking endevent/s
2022-05-11 21:48:12,206 - BPMN.logger - BPMN_Parser.py - INFO - Checking Endevent/s finished sucessfully
2022-05-11 21:48:12,207 - BPMN.logger - BPMN_Parser.py - INFO - Checking task/s
2022-05-11 21:48:12,208 - BPMN.logger - BPMN_Parser.py - INFO - Checking task/s finished successfully
2022-05-11 21:48:12,209 - BPMN.logger - BPMN_Parser.py - INFO - Checking bpmn:exclusiveGateway/s
2022-05-11 21:48:12,209 - BPMN.logger - BPMN_Parser.py - INFO - no bpmn:exclusiveGateway found checking successfully
2022-05-11 21:48:12,210 

In [4]:
import heapq
l = [1]
heapq.heappop(l)

1