In [1]:
import sys
import os
import numpy as np

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from vae_dataset_generator import initial_population
from utils import load_data
from tree_encoder import JakowskiEncoder

<h1> VAE dataset generator</h1>

<b>initial_population</b> genera il dataset per allenare il VAE.
<p>I parametri sono:</p>
<ul>
    <li><b>X:</b> features utilizzate per allenare i DTs
    <li><b>y:</b> target utilizzato per allenare i DTs
    <li><b>max_depth:</b> profondità massima dei DTs
    <li><b>train_size:</b> dimensioni del training set
    <li><b>test_size:</b> dimensioni del test set
    <li><b>mode:</b> se "RF" i DTs vengono generatri da un RandomForest; se "DT" i DTs vengono generati da un ensemble di DecisionTree ("RF" default)
    <li><b>encoder:</b> JakowskiEncoder
    <li><b>same_depth:</b> se True, i DTs avranno la medesima profondità massima, altrimenti diversa (significativo per mode uguale a "DT")
</ul>

In [4]:
dataset = "iris"
depth = 4

# Load data
X_train, X_val, X_test, y_train, y_val, y_test = load_data(dataset)

n_features_in = X_train.shape[1]
n_classes = len(np.unique(y_train))

# Linear encoding
encoder = JakowskiEncoder(n_features=n_features_in, n_classes=n_classes)

# VAE dataset
#[train, test, train_nodes, train_leaves, train_depths, encoding_time]
result = initial_population(
                            X=X_train,
                            y=y_train, 
                            mode="RF",
                            max_depth=depth,
                            train_size=10000, 
                            test_size=5000, 
                            encoder=encoder
                           )

In [5]:
result[0] # vae train

array([[[ 3.        ,  3.        ,  3.        , ..., -1.        ,
         -1.        ,  0.        ],
        [ 0.28070176,  0.28070176,  0.65789473, ...,  2.        ,
          3.        ,  0.        ]],

       [[ 4.        ,  4.        ,  4.        , ..., -1.        ,
         -1.        ,  0.        ],
        [ 0.29166666,  0.29166666,  0.6875    , ...,  3.        ,
          3.        ,  0.        ]],

       [[ 3.        ,  3.        ,  1.        , ..., -1.        ,
         -1.        ,  0.        ],
        [ 0.28070176,  0.28070176,  0.5138889 , ...,  3.        ,
          3.        ,  0.        ]],

       ...,

       [[ 4.        ,  4.        ,  3.        , ..., -1.        ,
         -1.        ,  0.        ],
        [ 0.25      ,  0.25      ,  0.71052635, ...,  3.        ,
          3.        ,  0.        ]],

       [[ 1.        ,  2.        ,  1.        , ..., -1.        ,
         -1.        ,  0.        ],
        [ 0.3472222 ,  0.35416666,  0.5694444 , ...,  3.     

In [6]:
# Profondità degli alberi del training set del VAE
result[4]

[4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 3,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 3,
 4,
 4,
 4,
 4,
 3,
 4,
 2,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 3,
 3,
 4,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 3,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 3,
 4,
 4,
 3,
 4,
 4,
 4,
 2,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 2,
 4,
 3,
 4,
 4,
 2,
 4,
 4,
 4,
 3,
 4,
 3,
 3,
 2,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 2,
 4,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 2,
