In [1]:
# @title Import Libraries
import pandas as pd
import time
from realtabformer import REaLTabFormer
from transformers import GPT2Config
from src.data_processing import csv_data_split

In [2]:
train_data, test_data, sample_data = csv_data_split("../data/breast-cancer-wisconsin.csv")
train_data

Unnamed: 0,ID,CT,UCSi,UCSh,Madh,SECS,BN,BC,NN,Mi,Class
198,1017061,1,1,1,1,0,1,3,1,1,0
359,501111,5,1,1,0,0,1,0,1,1,0
481,1181567,1,1,1,1,1,1,1,1,1,0
125,1177007,3,1,1,1,0,1,3,1,1,0
598,1016631,0,3,1,1,0,1,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...
485,1001565,6,1,1,1,0,1,3,1,1,0
260,301107,10,8,8,0,3,1,8,7,8,1
364,657753,3,1,1,1,3,1,0,0,1,0
623,1077790,5,1,1,3,0,1,1,1,1,0


In [3]:
results = pd.DataFrame(columns=["Model", "Time (s)"])
results

Unnamed: 0,Model,Time (s)


In [19]:
def fit_and_track(model, data, model_name):
    start_time = time.time()

    model.fit(data,num_bootstrap=20,target_col="Class")

    end_time = time.time()
    elapsed_time = end_time - start_time


    print(f"Model: {model_name}")


    results.loc[len(results)] = [
        model_name,
        elapsed_time
    ]

    model.save(f"../models/{model_name}")

In [20]:
config_small = GPT2Config(
    n_embd=512,
    n_layer=4,
    n_head=8
)

rtf_model_small = REaLTabFormer(
    model_type="tabular",
    tabular_config=config_small,
    epochs=50,
    batch_size=8,
    mask_rate=0.1
)

rtf_model_reg = REaLTabFormer(
    model_type="tabular",
    epochs=50,
    batch_size=8,
    mask_rate=0.1
)

rtf_model_large = REaLTabFormer(
    model_type="tabular",
    tabular_config=config_large,
    epochs=50,
    mask_rate=0.15
)

# Fit models and track performance
# fit_and_track(rtf_model_small, train_data, "rtf_small_test")
# fit_and_track(rtf_model_reg, train_data, "rtf_regular")
# fit_and_track(rtf_model_large, train_data, "rtf_large")


# Display the results
# results



In [21]:
rtf = REaLTabFormer(
    model_type="tabular",
    tabular_config=config_small,
    epochs=1,
    mask_rate=0.15
)



In [22]:
rtf.fit(sample_data,num_bootstrap=1)

Computing the sensitivity threshold...
Using parallel computation!!!




Bootstrap round:   0%|          | 0/1 [00:00<?, ?it/s]

Sensitivity threshold summary:
count    1.000000
mean    -0.000556
std           NaN
min     -0.000556
25%     -0.000556
50%     -0.000556
75%     -0.000556
max     -0.000556
dtype: float64
Sensitivity threshold: -0.0005555555555555565 qt_max: 0.05


Map:   0%|          | 0/273 [00:00<?, ? examples/s]

Step,Training Loss


  0%|          | 0/135 [00:00<?, ?it/s]

Generated 0 invalid samples out of total 256 samples generated. Sampling efficiency is: 100.0000%
Critic round: 5,                     sensitivity_threshold: -0.0005555555555555565,                         val_sensitivity: -0.020407407407407412,                             val_sensitivities: [-0.01611111111111111, -0.025, -0.025, -0.025, -0.025, -0.025, -0.01611111111111111, -0.025, -0.025, -0.011666666666666667, -0.018333333333333333, -0.025, -0.025, 0.001666666666666667, -0.020555555555555556]


<realtabformer.rtf_trainer.ResumableTrainer at 0x12cdb1ad0>

In [23]:
rtf.save("maybe/")

Copying artefacts from: best-disc-model
Copying artefacts from: mean-best-disc-model
Copying artefacts from: not-best-disc-model
Copying artefacts from: last-epoch-model


In [25]:
for i, block in enumerate(rtf.model.transformer.h):
    num_heads = block.attn.num_heads
    print(f"Layer {i} has {num_heads} attention heads.")

Layer 0 has 4 attention heads.
Layer 1 has 4 attention heads.
Layer 2 has 4 attention heads.
Layer 3 has 4 attention heads.


In [27]:
def print_tensor(model):
    for name, param in model.named_parameters():
        if param.dim() == 2:
            print(name,param.size())
print_tensor(rtf.model)

transformer.wte.weight torch.Size([154, 512])
transformer.wpe.weight torch.Size([1024, 512])
transformer.h.0.attn.c_attn.weight torch.Size([512, 1536])
transformer.h.0.attn.c_proj.weight torch.Size([512, 512])
transformer.h.0.mlp.c_fc.weight torch.Size([512, 2048])
transformer.h.0.mlp.c_proj.weight torch.Size([2048, 512])
transformer.h.1.attn.c_attn.weight torch.Size([512, 1536])
transformer.h.1.attn.c_proj.weight torch.Size([512, 512])
transformer.h.1.mlp.c_fc.weight torch.Size([512, 2048])
transformer.h.1.mlp.c_proj.weight torch.Size([2048, 512])
transformer.h.2.attn.c_attn.weight torch.Size([512, 1536])
transformer.h.2.attn.c_proj.weight torch.Size([512, 512])
transformer.h.2.mlp.c_fc.weight torch.Size([512, 2048])
transformer.h.2.mlp.c_proj.weight torch.Size([2048, 512])
transformer.h.3.attn.c_attn.weight torch.Size([512, 1536])
transformer.h.3.attn.c_proj.weight torch.Size([512, 512])
transformer.h.3.mlp.c_fc.weight torch.Size([512, 2048])
transformer.h.3.mlp.c_proj.weight torch.S

In [28]:
1536/512

3.0