In [14]:
from src.table import Table, Column
from src.column_transformations import Transformation
from src.settings import SQLType
from src.utils import randstr
from src.relmap import Relmap
import random 



In [15]:

graph = Relmap()
graph.generate_random_seeds()
seed = graph.get_all_tables('seed')[0]
seed

qbucXskHesofamjj

In [16]:

related_columns: list[Column] = [col for col, col_meta in graph[seed].items() if col_meta.get('label') == 'makes_up']
related_columns

[qbucXskHesofamjj.rwWkAMSKZqpqUbKi,
 qbucXskHesofamjj.kqhNvdTpJngTeIgK,
 qbucXskHesofamjj.mpotBNOFUFAecwjr]

In [17]:
new_table = Table(
        randstr(),
        'model',
    )
graph.add_node(
    new_table
)

In [18]:

related_columns_classified = dict()
for column in related_columns:
    if not column.is_pk:
        related_columns_classified.setdefault(column.type, []).append(column)
related_columns_classified

{<SQLType.STRING: 'string'>: [qbucXskHesofamjj.kqhNvdTpJngTeIgK],
 <SQLType.FLOAT: 'float'>: [qbucXskHesofamjj.mpotBNOFUFAecwjr]}

In [19]:
for transformation in Transformation.__subclasses__():
    pass 

In [20]:
chance_to_keep_column = 0.9
columns_to_stay = [col for col in related_columns if not col.is_pk and random.random()<=chance_to_keep_column]
columns_to_stay

[qbucXskHesofamjj.kqhNvdTpJngTeIgK, qbucXskHesofamjj.mpotBNOFUFAecwjr]

In [21]:
transformation_number_coeff = 2
transformation_number = random.randint(0, round(transformation_number_coeff*len(related_columns)))
transformation_number

4

In [22]:
# for _ in range(transformation_number):


def select_transformation(related_columns_classified: dict[SQLType, list[Column]], 
                          type_: SQLType, 
                          lower_col_number:int=1, 
                          upper_col_number:int|None=None) -> tuple[Transformation, int]:
    
    if upper_col_number is None: 
        upper_col_number = len(related_columns_classified[type_])
    affected_columns = random.randint(lower_col_number, upper_col_number)

    potential_transformations = []
    min_arity = 999
    max_arity = -1

    for t in Transformation.__subclasses__():
        if type_ in t.main_type_bounds:
            if t.arity[0] > affected_columns or t.arity[1] != 0 and t.arity[1] < affected_columns:
                min_arity = min(min_arity, t.arity[0])
                max_arity = max(max_arity, t.arity[1])
            else:
                potential_transformations.append(t)
    
    if len(potential_transformations) == 0:
        return select_transformation(related_columns_classified, type_, min_arity, max_arity)
    else:
        return random.choice(potential_transformations), affected_columns

transformations: list[dict[str, list[Column]|Transformation]] = []

for _ in range(transformation_number):
    
    transformation, affected_columns = select_transformation(
        related_columns_classified,
        random.choice([type_ for type_ in related_columns_classified])
        )
    transformations.append({
        'transformation': transformation,
        'columns': random.choices(related_columns, k=affected_columns),
        'new_column_name': randstr()
    })

transformations


[{'transformation': src.column_transformations.tr_Length,
  'columns': [qbucXskHesofamjj.mpotBNOFUFAecwjr],
  'new_column_name': 'BgcnNulRwjIaMYuM'},
 {'transformation': src.column_transformations.tr_Least,
  'columns': [qbucXskHesofamjj.rwWkAMSKZqpqUbKi,
   qbucXskHesofamjj.rwWkAMSKZqpqUbKi],
  'new_column_name': 'LeWFwnrYXypmTFKs'},
 {'transformation': src.column_transformations.tr_Least,
  'columns': [qbucXskHesofamjj.rwWkAMSKZqpqUbKi,
   qbucXskHesofamjj.mpotBNOFUFAecwjr],
  'new_column_name': 'SEUeAEaOpfAybXsV'},
 {'transformation': src.column_transformations.tr_Greatest,
  'columns': [qbucXskHesofamjj.kqhNvdTpJngTeIgK,
   qbucXskHesofamjj.mpotBNOFUFAecwjr],
  'new_column_name': 'ZVRMuBjuqUZuJIeG'}]

In [23]:
graph.nodes

NodeView((qbucXskHesofamjj, qbucXskHesofamjj.rwWkAMSKZqpqUbKi, qbucXskHesofamjj.kqhNvdTpJngTeIgK, qbucXskHesofamjj.mpotBNOFUFAecwjr, PlBWfxTVlwFqVACx))

In [24]:

# new_table_name
for transformation_metadata in transformations:
    transformation_instance = transformation_metadata['transformation']()
    graph.add_node(transformation_instance)
    for column in transformation_metadata['columns']:
        graph.add_edge(column, transformation_instance, label='goes_to')
        graph.add_edge(transformation_instance, column, label='argument')
    resulting_column = Column(
        name=randstr(),
        type = transformation_metadata['transformation'].return_type,
        table_name=new_table.name,
    )
    graph.add_node(resulting_column)
    # new_col - transformation
    graph.add_edge(resulting_column, transformation_instance, label='made_of')
    graph.add_edge(transformation_instance, resulting_column, label='makes')
    # new_col - new_table
    graph.connect_table_and_column(new_table, resulting_column)
        


In [25]:
graph[new_table]

AtlasView({PlBWfxTVlwFqVACx.JlDwmHvfCRFPAuJQ: {'label': 'makes_up'}, PlBWfxTVlwFqVACx.nKzgNHjjTOLLCPMQ: {'label': 'makes_up'}, PlBWfxTVlwFqVACx.kbPBRXFihLTYgaFk: {'label': 'makes_up'}, PlBWfxTVlwFqVACx.wPQUIczEwVnYgzYS: {'label': 'makes_up'}})