In [1]:

from dynapipe.utilis_func import pipeline_splitting_rule, update_parameters,reset_parameters
reset_parameters()

update_parameters(mode = "cls", estimator_name = "mlp", hidden_layer_sizes = [10],activation=["relu"],learning_rate = ["constant"],solver = ["sgd"])
update_parameters(mode = "cls", estimator_name = "svm", C=[0.1],kernel=["linear"])
update_parameters(mode = "cls", estimator_name = "ada", n_estimators =[50],learning_rate=[1])
update_parameters(mode = "cls", estimator_name = "rf", n_estimators =[50],max_depth=[2])
update_parameters(mode = "cls", estimator_name = "gb", n_estimators =[50],max_depth=[2],learning_rate=[1])
update_parameters(mode = "cls", estimator_name = "xgb", n_estimators =[50],max_depth=[2],learning_rate=[1])

from dynapipe.autoPipe import autoPipe
import pandas as pd
from dynapipe.funcPP import PPtools
from dynapipe.autoPP import dynaPreprocessing

from dynapipe.autoFS import dynaFS_clf
from dynapipe.autoCV import evaluate_model,dynaClassifier,dynaRegressor
df = pd.read_csv('./data/preprocessing/breast_cancer.csv')
custom_parameters = {
    "scaler" : ["None", "standard"],
    # threshold number of category dimension
    "encode_band" : [10],
    # low dimension encoding
    "low_encode" : ["onehot","label"], 
    # high dimension encoding
    "high_encode" : ["frequency", "mean"],
    "winsorizer" : [(0.05,0.05)],
    "sparsity" : [0.75],
    "cols" : [50]
}

Done with the parameters reset.
Previous Parameters are: {'hidden_layer_sizes': [10, 50, 100], 'activation': ['identity', 'relu', 'tanh', 'logistic'], 'learning_rate': ['constant', 'invscaling', 'adaptive'], 'solver': ['lbfgs', 'sgd', 'adam']}
Current Parameters are updated as: {'hidden_layer_sizes': [10], 'activation': ['relu'], 'learning_rate': ['constant'], 'solver': ['sgd']}
Done with the parameters update.
Previous Parameters are: {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'C': [0.1, 1, 10]}
Current Parameters are updated as: {'C': [0.1], 'kernel': ['linear']}
Done with the parameters update.
Previous Parameters are: {'n_estimators': [50, 100, 150], 'learning_rate': [0.1, 1, 10, 100]}
Current Parameters are updated as: {'n_estimators': [50], 'learning_rate': [1]}
Done with the parameters update.
Previous Parameters are: {'n_estimators': [5, 50, 250], 'max_depth': [2, 4, 8, 16, 32]}
Current Parameters are updated as: {'n_estimators': [50], 'max_depth': [2]}
Done with the para

In [2]:
pipe = autoPipe(
[("autoPP",dynaPreprocessing(custom_parameters = custom_parameters, label_col = 'diagnosis', model_type = "cls")),
("datasets_splitting",pipeline_splitting_rule(val_size = 0.2, test_size = 0.2, random_state = 13)),
("autoFS",dynaFS_clf(fs_num = 5, random_state=13, cv = 5, in_pipeline = True, input_from_file = False)),
("autoCV",dynaClassifier(random_state = 13,cv_num = 5,in_pipeline = True, input_from_file = False)),
("model_evaluate",evaluate_model(model_type = "cls"))])

In [3]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_columns',None,'display.max_rows',None)
pd.set_option('max_colwidth', -1)

DICT_PREPROCESSING,DICT_FEATURE_SELECTION,DICT_MODELS_EVALUATION,DICT_DATA,dyna_report= pipe.fit(df)

Now in Progress - autoFS & autoCV Iteration: Estimate about 0.0 minutes left  [####################] 100.0%
The top 5 Models with Best Performance Metrics:
      Dataset Model_Name  \
29  Dataset_4  svm         
34  Dataset_4  xgb         
27  Dataset_3  xgb         
36  Dataset_5  svm         
50  Dataset_7  svm         

                                                                         Best_Parameters  \
29  [('C', 0.1), ('kernel', 'linear')]                                                     
34  [('learning_rate', 1), ('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]   
27  [('learning_rate', 1), ('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]   
36  [('C', 0.1), ('kernel', 'linear')]                                                     
50  [('C', 0.1), ('kernel', 'linear')]                                                     

    Accuracy  Precision  Recall  Latency  
29  0.930     0.889      0.96    3.0      
34  0.912     0.955      0.84    2.8

In [4]:

DICT_MODELS_EVALUATION['Dataset_0']

Unnamed: 0,Model_Name,Accuracy,Precision,Recall,Latency,Best_Parameters,Dataset
0,lgr,0.842,0.833,0.8,5.7,"[('C', 10), ('random_state', 13)]",Dataset_0
0,svm,0.842,0.808,0.84,0.0,"[('C', 0.1), ('kernel', 'linear')]",Dataset_0
0,mlp,0.561,0.0,0.0,3.1,"[('activation', 'relu'), ('hidden_layer_sizes', (10,)), ('learning_rate', 'constant'), ('random_state', 13), ('solver', 'sgd')]",Dataset_0
0,rf,0.877,0.821,0.92,12.9,"[('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]",Dataset_0
0,ada,0.86,0.84,0.84,20.1,"[('learning_rate', 1), ('n_estimators', 50), ('random_state', 13)]",Dataset_0
0,gb,0.842,0.833,0.8,4.0,"[('learning_rate', 1), ('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]",Dataset_0
0,xgb,0.86,0.815,0.88,3.4,"[('learning_rate', 1), ('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]",Dataset_0


In [5]:

dyna_report

Unnamed: 0,Dataset,Model_Name,Best_Parameters,Accuracy,Precision,Recall,Latency
1,Dataset_0,svm,"[('C', 0.1), ('kernel', 'linear')]",0.93,0.889,0.96,3.0
6,Dataset_0,xgb,"[('learning_rate', 1), ('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]",0.912,0.955,0.84,2.0
13,Dataset_1,xgb,"[('learning_rate', 1), ('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]",0.912,0.955,0.84,2.0
27,Dataset_3,xgb,"[('learning_rate', 1), ('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]",0.912,0.955,0.84,2.0
41,Dataset_5,xgb,"[('learning_rate', 1), ('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]",0.912,0.955,0.84,2.0
50,Dataset_7,svm,"[('C', 0.1), ('kernel', 'linear')]",0.912,0.917,0.88,3.0
26,Dataset_3,gb,"[('learning_rate', 1), ('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]",0.895,0.913,0.84,2.0
40,Dataset_5,gb,"[('learning_rate', 1), ('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]",0.895,0.913,0.84,2.0
5,Dataset_0,gb,"[('learning_rate', 1), ('max_depth', 2), ('n_estimators', 50), ('random_state', 13)]",0.895,0.913,0.84,3.0
7,Dataset_1,lgr,"[('C', 0.1), ('random_state', 13)]",0.895,0.913,0.84,3.0


In [20]:
DICT_DATA['Dataset_0']['DICT_Test']["X"].head(10)

Unnamed: 0,concavity_mean,perimeter_mean,radius_mean,concave points_mean,texture_mean
264,0.09061,111.6,17.19,0.06527,22.07
231,0.01633,71.76,11.32,0.006588,26.6
197,0.1103,117.4,18.08,0.05778,21.84
172,0.2032,102.5,15.46,0.1097,13.04
54,0.05253,97.26,15.1,0.03334,22.02
33,0.1657,127.9,19.27,0.07593,26.47
68,0.2508,60.73,9.72,0.04375,17.33
237,0.09042,132.5,20.48,0.06022,21.46
51,0.01857,87.21,13.64,0.01723,16.34
196,0.1385,90.63,13.77,0.06526,22.29


In [10]:
DICT_PREPROCESSING['Dataset_0']

"winsor_0-Scaler_None-- Encoded Features:['diagnosis', 'Size_3', 'area_mean', 'compactness_mean', 'concave points_mean', 'concavity_mean', 'fractal_dimension_mean', 'perimeter_mean', 'radius_mean', 'smoothness_mean', 'symmetry_mean', 'texture_mean', 'onehot_Age_20-29', 'onehot_Age_30-39', 'onehot_Age_40-49', 'onehot_Age_50-59', 'onehot_Age_60-69', 'onehot_Age_70-79', 'Label_Position_1', 'onehot_Position_2_NaN', 'onehot_Position_2_central', 'onehot_Position_2_left_low', 'onehot_Position_2_left_up', 'onehot_Position_2_right_low', 'onehot_Position_2_right_up', 'Frequency_Size_1', 'onehot_Size_2_0-2', 'onehot_Size_2_12-14', 'onehot_Size_2_15-17', 'onehot_Size_2_24-26', 'onehot_Size_2_3-5', 'onehot_Size_2_6-8', 'onehot_Size_2_9-11', 'Label_Treatment', 'onehot_Type_1_ge40', 'onehot_Type_1_lt40', 'onehot_Type_1_premeno', 'onehot_Type_2_NaN', 'onehot_Type_2_no', 'onehot_Type_2_yes', 'Label_Type_3']"

In [5]:
autoFlow['Dataset_9']

Unnamed: 0,Model_Name,Accuracy,Precision,Recall,Latency,Best_Parameters,Dataset
0,lgr,0.877,0.875,0.84,4.0,"[('learning_rate', 1), ('max_depth', 2), ('n_e...",Dataset_9
0,svm,0.877,0.875,0.84,4.0,"[('learning_rate', 1), ('max_depth', 2), ('n_e...",Dataset_9
0,mlp,0.439,0.439,1.0,4.0,"[('learning_rate', 1), ('max_depth', 2), ('n_e...",Dataset_9
0,rf,0.93,0.92,0.92,12.1,"[('learning_rate', 1), ('max_depth', 2), ('n_e...",Dataset_9
0,ada,0.86,0.84,0.84,16.7,"[('learning_rate', 1), ('max_depth', 2), ('n_e...",Dataset_9
0,gb,0.86,0.84,0.84,4.0,"[('learning_rate', 1), ('max_depth', 2), ('n_e...",Dataset_9
0,xgb,0.895,0.852,0.92,2.0,"[('learning_rate', 1), ('max_depth', 2), ('n_e...",Dataset_9


In [21]:
a = {'learning_rate': 1, 'max_depth': 2, 'n_estimators': 50, 'random_state': 13}
lis = a.items()
[i for i in lis]

[('learning_rate', 1),
 ('max_depth', 2),
 ('n_estimators', 50),
 ('random_state', 13)]

In [23]:
total_flow['performance'] = str([i for i in lis])