In [47]:
from src.table import Table, Column
from src.column_transformations import Transformation
from src.settings import SQLType
from src.utils import randstr
import random 


seed = Table.generate_seed(1, 6, 6)
graph = seed.represent_as_graph()
graph 

<networkx.classes.graph.Graph at 0x2f1ca303d90>

In [48]:
related_columns: list[Column] = [col for col, col_meta in graph[seed].items() if col_meta.get('label') == 'makes_up']
related_columns

[BxwFTblkvLBDzoGp.GCyjomFAiMCKeMjt,
 BxwFTblkvLBDzoGp.NJVdjPTachPWpGhQ,
 BxwFTblkvLBDzoGp.lzMPbCQqcurbqeKk,
 BxwFTblkvLBDzoGp.ZgrojptCqOMFVEwG,
 BxwFTblkvLBDzoGp.DLFUuygTBVCUMuPp,
 BxwFTblkvLBDzoGp.SDDZTBnOeABEuYCs,
 BxwFTblkvLBDzoGp.ttMlMitbDCgFYXwo]

In [None]:
new_table_name = randstr()
graph.add_node(
    Table(
        new_table_name,
        'model',
        [],
    )
)

In [None]:



related_columns_classified = dict()
for column in related_columns:
    if not column.is_pk:
        related_columns_classified.setdefault(column.type, []).append(column)
related_columns_classified

{<SQLType.INT: 'int'>: [BxwFTblkvLBDzoGp.NJVdjPTachPWpGhQ],
 <SQLType.STRING: 'string'>: [BxwFTblkvLBDzoGp.lzMPbCQqcurbqeKk],
 <SQLType.FLOAT: 'float'>: [BxwFTblkvLBDzoGp.ZgrojptCqOMFVEwG,
  BxwFTblkvLBDzoGp.DLFUuygTBVCUMuPp,
  BxwFTblkvLBDzoGp.SDDZTBnOeABEuYCs,
  BxwFTblkvLBDzoGp.ttMlMitbDCgFYXwo]}

In [50]:
for transformation in Transformation.__subclasses__():
    pass 

In [51]:
chance_to_keep_column = 0.9
columns_to_stay = [col for col in related_columns if not col.is_pk and random.random()<=chance_to_keep_column]
columns_to_stay

[BxwFTblkvLBDzoGp.NJVdjPTachPWpGhQ,
 BxwFTblkvLBDzoGp.lzMPbCQqcurbqeKk,
 BxwFTblkvLBDzoGp.ZgrojptCqOMFVEwG,
 BxwFTblkvLBDzoGp.DLFUuygTBVCUMuPp,
 BxwFTblkvLBDzoGp.SDDZTBnOeABEuYCs]

In [52]:
transformation_number_coeff = 2
transformation_number = random.randint(0, round(transformation_number_coeff*len(related_columns)))
transformation_number

7

In [55]:
# for _ in range(transformation_number):


def select_transformation(related_columns_classified: dict[SQLType, list[Column]], 
                          type_: SQLType, 
                          lower_col_number:int=1, 
                          upper_col_number:int|None=None) -> tuple[Transformation, int]:
    
    if upper_col_number is None: 
        upper_col_number = len(related_columns_classified[type_])
    affected_columns = random.randint(lower_col_number, upper_col_number)

    potential_transformations = []
    min_arity = 999
    max_arity = -1

    for t in Transformation.__subclasses__():
        if type_ in t.main_type_bounds:
            if t.arity[0] > affected_columns or t.arity[1] != 0 and t.arity[1] < affected_columns:
                min_arity = min(min_arity, t.arity[0])
                max_arity = max(max_arity, t.arity[1])
            else:
                potential_transformations.append(t)
    
    if len(potential_transformations) == 0:
        return select_transformation(related_columns_classified, type_, min_arity, max_arity)
    else:
        return random.choice(potential_transformations), affected_columns

transformations: list[dict[str, list[Column]|Transformation]] = []

for _ in range(transformation_number):
    
    transformation, affected_columns = select_transformation(
        related_columns_classified,
        random.choice([type_ for type_ in related_columns_classified])
        )
    transformations.append({
        'transformation': transformation,
        'columns': random.choices(related_columns, k=affected_columns),
        'new_column_name': randstr()
    })

transformations


[{'transformation': src.column_transformations.tr_Least,
  'columns': [BxwFTblkvLBDzoGp.NJVdjPTachPWpGhQ,
   BxwFTblkvLBDzoGp.DLFUuygTBVCUMuPp],
  'new_column_name': 'ygvbYMhhhQFVVstl'},
 {'transformation': src.column_transformations.tr_Least,
  'columns': [BxwFTblkvLBDzoGp.SDDZTBnOeABEuYCs,
   BxwFTblkvLBDzoGp.ttMlMitbDCgFYXwo],
  'new_column_name': 'vtQEkGmlSRRkcLzb'},
 {'transformation': src.column_transformations.tr_Least,
  'columns': [BxwFTblkvLBDzoGp.ZgrojptCqOMFVEwG,
   BxwFTblkvLBDzoGp.SDDZTBnOeABEuYCs],
  'new_column_name': 'uTceLzCEtfBShkiP'},
 {'transformation': src.column_transformations.tr_Greatest,
  'columns': [BxwFTblkvLBDzoGp.ZgrojptCqOMFVEwG,
   BxwFTblkvLBDzoGp.lzMPbCQqcurbqeKk],
  'new_column_name': 'yffaasxFXygOiUDJ'},
 {'transformation': src.column_transformations.tr_Least,
  'columns': [BxwFTblkvLBDzoGp.ZgrojptCqOMFVEwG,
   BxwFTblkvLBDzoGp.SDDZTBnOeABEuYCs],
  'new_column_name': 'yLoMbgpbAOJvKQKI'},
 {'transformation': src.column_transformations.tr_Greatest,
  

In [58]:
graph.nodes

NodeView((BxwFTblkvLBDzoGp, BxwFTblkvLBDzoGp.GCyjomFAiMCKeMjt, BxwFTblkvLBDzoGp.NJVdjPTachPWpGhQ, BxwFTblkvLBDzoGp.lzMPbCQqcurbqeKk, BxwFTblkvLBDzoGp.ZgrojptCqOMFVEwG, BxwFTblkvLBDzoGp.DLFUuygTBVCUMuPp, BxwFTblkvLBDzoGp.SDDZTBnOeABEuYCs, BxwFTblkvLBDzoGp.ttMlMitbDCgFYXwo))

In [None]:


for transformation_metadata in transformations:
    transformation_instance = transformation_metadata[transformation]()
    graph.add_node(transformation_instance)
    for column in transformation['columns']:
        graph.add_edge(column, transformation_instance, label='goes_to')
        graph.add_edge(transformation_instance, column, label='argument')
    resulting_column = Column(
        name=randstr(),
        type = transformation_metadata[transformation].return_type,
        table_name=new_table_name,
    )
        
    graph[transformation[]]