In [1]:
from Synthesizer import synthesize

In [2]:
import pandas as pd

DF = pd.DataFrame
S = pd.Series


class WrongPathError(Exception):
    pass


class TableManager:
    def __init__(self, df: DF):
        self.df = df
    
    def _repr_html_(self):
        return self.df._repr_html_()
    
    def __getitem__(self, *args, **kwargs) -> DF | S:
        return self.df.__getitem__(*args, **kwargs)
    
    @property
    def _columns(self) -> list:
        return self.df.columns
    
    def _len(self, x_axis: bool) -> int:
        return self.df.shape[1 if x_axis else 0]

    def _check_relative_index(self, index: int, x_axis: bool) -> None:

        if abs(index) > self._len(x_axis) / 2:
            raise WrongPathError("Index is absolute. Should use relative index.")
    
    def _check_inbound_index(self, index: int, x_axis: bool) -> None:
        total_len = self._len(x_axis)

        if index >= total_len or index < -total_len:
            raise WrongPathError("Index is out of bound.")
    
    def _copy_modify(self, function) -> DF:
        return TableManager(function(self.df))
    
    def _copy_getitem(self, key) -> DF:
        return self._copy_modify(lambda table: table.__getitem__(key))
    
    def _copy_loc(self, key1, key2 = None) -> DF:
        if key2 is None:
            return self._copy_modify(lambda table: table.loc[key1])
        else:
            return self._copy_modify(lambda table: table.loc[key1, key2])
    
    def _to_absolute_index(self, index: int, x_axis: bool) -> int:
        return index if index >= 0 else self._len(x_axis) + index
    
    def _check_valid_start_end(self, start: int, end: int, x_axis: bool) -> None:
        start = self._to_absolute_index(start, x_axis)
        end = self._to_absolute_index(end, x_axis)

        if start >= end:
            raise WrongPathError("Start index is greater than or equal to end index.")
        
        self._check_inbound_index(start, x_axis)
        self._check_inbound_index(end-1, x_axis)

    def reorder(self, start: int, end: int, move: int, x_axis: bool) -> DF:
        self._check_relative_index(start, x_axis)
        self._check_relative_index(end, x_axis)
        
        self._check_valid_start_end(start, end, x_axis)
        
        if move == 0:
            raise WrongPathError("Move is 0. Should be an effective move.")
        elif move > 0:
            self._check_inbound_index(end-1+move, x_axis)
        else:
            self._check_inbound_index(start+move, x_axis)

        return self._x_reorder(start, end, move) if x_axis else self._y_reorder(start, end, move)
    
    def _x_reorder(self, start: int, end: int, move: int) -> DF:
        cols = list(self._columns)
        cols_to_move = cols[start:end]
        del cols[start:end]
        for i, col in enumerate(cols_to_move):
            cols.insert(start + move + i, col)
        return self._copy_getitem(cols)
    
    def _y_reorder(self, start: int, end: int, move: int) -> DF:
        idx = list(self.df.index)
        idx_to_move = idx[start:end]
        del idx[start:end]
        for i, index in enumerate(idx_to_move):
            idx.insert(start + move + i, index)
        return self._copy_loc(idx)


In [3]:
import pandas as pd
import random
import string

def random_value():
    return random.choice([random.random(), random.choice(string.ascii_letters)])

df1 = pd.DataFrame([[random_value() for _ in range(5)] for _ in range(5)], columns=['A', 'B', 'C', 'D', 'E'])
df2 = pd.DataFrame([[random_value() for _ in range(5)] for _ in range(5)], columns=['V', 'W', 'X', 'Y', 'Z'])

In [4]:
input1 = TableManager(df1)
input2 = TableManager(df2)

In [5]:
output1 = input1.reorder(0, 1, -2, False).reorder(1, -1, -1, False)
output2 = input2.reorder(0, 1, -2, False).reorder(1, -1, -1, False)

In [6]:
input1

Unnamed: 0,A,B,C,D,E
0,0.032137,U,0.617287,0.133527,0.885909
1,0.394776,0.305478,T,0.234029,C
2,b,Z,0.957231,0.955747,0.602328
3,0.259893,0.730682,0.115726,0.523233,0.435344
4,0.789845,0.994741,0.042293,u,0.659959


In [7]:
output1

Unnamed: 0,A,B,C,D,E
2,b,Z,0.957231,0.955747,0.602328
0,0.032137,U,0.617287,0.133527,0.885909
3,0.259893,0.730682,0.115726,0.523233,0.435344
1,0.394776,0.305478,T,0.234029,C
4,0.789845,0.994741,0.042293,u,0.659959


In [12]:
programs = synthesize(
    input_output_simple_list=[
        (input1, output1),
        (input2, output2)
    ],
    variable_names=['x'],
    search_space_file='utils/grammars/TableManagerGrammar.txt',
    metric='VectorMetric',
    metric_parameter='cdist_hamming',
    tactic='height',
    max_height=2,
    statistics=True
)

The synthesizer searched 0 programs up to height #2.


In [3]:
programs

['x[0:-1:None]']