In [1]:
import subprocess
import pandas as pd
import numpy as np
import requests
import json
import os
import paho.mqtt.client as client
from pathlib import Path
from tqdm import tqdm
import time
import traceback

In [2]:
execure_url = 'http://0.0.0.0:5000/execute'
fetch_url = 'http://0.0.0.0:5000/fetch'
iot_app_path = "/home/asim/takshshila/IOT/IoTApp/app.py"
script_directory = "/home/asim/takshshila/IOT/Datasets/setup-stuff/gateway_and_dataset"
data_directory = "/home/asim/takshshila/IOT/Datasets/DATASETS_CLASSIFICATION/BINARY_PROBLEMS"
result_dir = "/home/asim/takshshila/IOT/Datasets/setup-stuff/gateway_and_dataset/Results_Classification"
algorithm = "classification"

net_sizes = [4,5]
#net_sizes = [3, 4, 5]
#net_sizes = [3,4,5,6,7]
# net_sizes = [6,7]
number_of_shuffles = 4
convergence_threshold = 0.05
num_chunks=5 # size of chunks 50*
data_passes=400 #number of chunks
chunk_passes= 30  # epochs
learning_rates =[0.1, 0.05, 0.075,0.008]#[0.1, 0.075, 0.009] #[0.05, 0.002,0.0005]#[0.1, 0.075, 0.008]#[0.01, 0.075 ,0.002, 0.0005]#[0.002, 0.005, 0.0008] #[0.1, 0.05, 0.075]
partition_size = 100

sessionID= "06f0483a-987c-11ed-97d7-ebb1648f0903" #"5ec46db2-92b3-11ed-aaca-51e78ce51e2f"#"30929878-9043-11ed-a725-8326e5fa41d3"


In [3]:
prereqObj={
    "phase": "prereq",
    "mode":"mqtt",
    "algorithm": algorithm,
}

In [4]:
phase0Obj={
    "phase": "phase0",
    "num_chunks": num_chunks,
    "mode":"mqtt",
    "algorithm": algorithm,
    "session_id": sessionID
}



In [5]:
phase1Obj={
    "phase": "phase1",
    "data_passes": data_passes,
    "net_sizes": net_sizes,
    "chunk_passes": chunk_passes,
    "num_chunks": num_chunks,
    "num_classes":2,
    "learning_rates": learning_rates,
    "number_of_shuffles":number_of_shuffles,
    "num_feature_spaces": 1,
    "neigh_rate": 0.8,
    "train_test_split": 1,
    "phase3_passes": 1,
    "top_ranks": 2,
    "partition_size": partition_size,
    "neuron_init_criteria":"farthest_point",
    "algorithm": algorithm,
    "mode":"mqtt",
    "session_id": sessionID
}

In [6]:
phase2Obj = {
    "phase": "phase2",
    "data_passes": data_passes,
    "net_sizes": net_sizes,
    "chunk_passes": chunk_passes,
    "num_chunks": num_chunks,
    "learning_rates": learning_rates,
    "convergence_threshold":convergence_threshold,
    "num_feature_spaces": 1,
    "neigh_rate": 0.8,
    "train_test_split": 1,
    "phase3_passes": 1,
    "top_ranks": 100,
    "partition_size": 100,
    "neuron_init_criteria":"farthest_point",
    "mode":"mqtt",
    "algorithm": algorithm,
    "session_id": sessionID
}

In [7]:
phase3Obj = {
    "phase": "phase3",
    "data_passes": data_passes,
    "net_sizes": net_sizes,
    "chunk_passes": chunk_passes,
    "learning_rates": learning_rates,
    "num_chunks": num_chunks,
    "num_feature_spaces": 1,
    "neigh_rate": 0.8,
    "train_test_split": 1,
    "phase3_passes": 1,
    "top_ranks": 2,
    "partition_size": 100,
    "mode":"mqtt",
    "algorithm": algorithm,
    "session_id": sessionID
}

In [8]:
def run_phase(algorithm, body):
    r = requests.post(execure_url, json=body)
    prereq_res = r.json()
    return prereq_res
    

In [9]:
def fetch_results(algorithm, phase, session_id):
    body = {"phase": phase, "algorithm": algorithm,"session_id": session_id}
    r = requests.post(fetch_url, json=body)
    prereq_res = r.json()
    return prereq_res

In [10]:
def process_results(data_dir, dataset, seed, short, sep=','):
    proc, gateway_proc, details = None, None, {}
    try:
        gateway_proc = subprocess.Popen(['python', os.path.join(script_directory, 'gateway_simulation.py')])
        proc = subprocess.Popen(['python', os.path.join(script_directory, 'sonar_stream_V2.py'), dataset, str(seed), short, sep])
    #       proc = subprocess.Popen(['python', r"/home/asim/takshshila/IOT/TestScripts/sonar_stream_V2.py", dataset, str(seed), short, sep, split])
        backend_proc = subprocess.Popen(['python3', iot_app_path])

        time.sleep(10)
        print("Dataset ### ", dataset)
        print("Prereq phase:::: ")
        prereqResult = run_phase(algorithm, prereqObj)

        print("Prereq result:::: ", prereqResult)
        sessionID = prereqResult['body']['session-id']
        
        numClasses=prereqResult['body']['num_classes']
        time.sleep(10)
        
    
        print("Phase 0:::: ")
        phase0Obj['session_id'] = sessionID
        phase0Result = run_phase(algorithm, phase0Obj)
        time.sleep(10)

        print("Phase 1:::: ")
        phase1Obj['session_id'] = sessionID
        phase1Obj['num_classes'] = numClasses
        phase1Result =run_phase(algorithm, phase1Obj)
        time.sleep(10)

        print("Phase 2:::: ")
        phase2Obj['session_id'] = sessionID
        phase2Obj['num_classes'] = numClasses
        phase2Result =run_phase(algorithm, phase2Obj)
        time.sleep(10)

       
        print("Fetching Results :::: ")
        phase2Result=fetch_results(algorithm, "phase2", sessionID)
        if phase2Result['success']:
            details ={
                'dataset': dataset,
                'seed': seed,
                'session_Id':sessionID,
                'precision': phase2Result['body']['precision'],
                'recalls': phase2Result['body']['recalls'],
                'f1scores': phase2Result['body']['f1scores'],
                'accuracies':phase2Result['body']['accuracies'],
                'total_kernel_time':phase2Result['body']['kernel_time'],
                
                'distance_base_map':phase2Result['body']['distance_base_map'],
                'radius_map':phase2Result['body']['radius_map'],
                'active_centers_dominant_class_count':phase2Result['body']['active_centers_dominant_class_count'],
                
                'active_centers_per_class':phase2Result['body']['active_centers_per_class'],
                'active_centers_total_count':phase2Result['body']['active_centers_total_count'],
                'active_centers_dominant_class':phase2Result['body']['active_centers_dominant_class'],
                'neuron_centers':phase2Result['body']['neuron_centers'],
                'feature_order':phase2Result['body']['feature_order'],
                
            }

        time.sleep(10)

        backend_proc.kill()
        proc.kill()
        gateway_proc.kill()
    except:
        traceback.print_exc()
        if backend_proc:
            backend_proc.kill()
        if proc:
            proc.kill()
        if gateway_proc:
            gateway_proc.kill()

    return details
    

In [None]:
#dataset_dir =r"/home/asim/takshshila/Datasets/DATASETS_CLASSIFICATION/BINARY_PROBLEMS"

df_column_order = ['dataset','seed','session_Id','feature_order','total_kernel_time','precision','recalls','f1scores','accuracies','distance_base_map','radius_map','active_centers_per_class','active_centers_total_count','active_centers_dominant_class','neuron_centers']



datasets = ["Adult", "Breast Cancer", "Credit Screening", "Ionosphere", "Liver Disorder", "Pima Indian", "Sonar"]
shorts = ["AD", "BC", "CR", "IO", "LD", "PI", "SN"] 
header = [False, False, False, False, False, False, False]
seeds = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


# datasets = ["Adult"]
# shorts = ["AD"] 
# header = [False]
# seeds = [1]


for i in range(len(datasets)):
    df = pd.DataFrame()
    for seed in tqdm(seeds):
        dataset = datasets[i]
        short = shorts[i]
        details = process_results(data_directory, dataset, seed, short)
        df = df.append(details, ignore_index=True)
        
        filepath = Path(result_dir, dataset, "classification_{}_{}_{}_{}_{}_{}_{}_{}.csv".format(dataset, data_passes,chunk_passes,netsizeString, split, num_chunks,convergence_threshold, str(learning_rates)))
        filepath.parent.mkdir(parents=True, exist_ok=True)
        df[df_column_order].to_csv(filepath, index=False)
    

  0%|                                                    | 0/10 [00:00<?, ?it/s]

sonar connected
 * Serving Flask app "app" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)


Dataset ###  Adult
Prereq phase:::: 
connecting to broker
Subscribing to all topics
adding topic to latest_elements 


127.0.0.1 - - [18/Mar/2023 08:44:21] "[37mPOST /execute HTTP/1.1[0m" 200 -


Prereq result::::  {'success': True, 'body': {'session-id': 'bd19fce2-c5a3-11ed-acbb-679ecd411b4c', 'num_classes': 2}}
Phase 0:::: 


127.0.0.1 - - [18/Mar/2023 08:44:41] "[37mPOST /execute HTTP/1.1[0m" 200 -


Phase 1:::: 
API called: /execute


Class: PhaseInjector
Operation: Phase execution injection
Phase map: {'phase': 'prereq', 'mode': 'mqtt', 'algorithm': 'classification'}


Class: PrereqPhase
Operation: Prereq phase construction
Phase configs: {'phase': 'prereq', 'mode': 'mqtt', 'algorithm': 'classification'}


Class: PrereqPhase
Operation: Execution


Operation: Data stream injection
Mode: mqtt


Class: MQTT
Operation: Constructing MQTT client and fetching data
Fetch configs: {'mode': 'mqtt', 'client_name': 'ZCU1', 'client_topic': 'from/zcu', 'subscription': 'from/gateway', 'message': 'send_train_data', 'num_chunks': 2}


Class: MQTTMeta
Operation: MQTT client construction
Client topic: from/zcu
Subscription Topic: from/gateway


Connected to MQTT Broker! send_train_data
API called: /execute


Class: PhaseInjector
Operation: Phase execution injection
Phase map: {'phase': 'phase0', 'num_chunks': 5, 'mode': 'mqtt', 'algorithm': 'classification', 'session_id': 'bd19fce2-c5a3-11ed-acbb-6

Class: Mediator
Operation: Configuring and executing Mediator


Class: DataNormalization
Operation: Configuring and executing chunk normalization


Class: DataNormalizationMeta
Operation: Data normalization object construction


Operation: Data stream injection
Mode: mqtt


Class: MQTT
Operation: Constructing MQTT client and fetching data
Fetch configs: {'operation': 'fetch_data', 'normalizer_task': 'normalize', 'algorithm': 'classification', 'feature_normalizer': MinMaxScaler(), 'target_normalizer': None, 'mode': 'mqtt', 'client_name': 'ZCU', 'client_topic': 'from/zcu', 'subscription': 'from/gateway', 'message': 'send_train_data', 'num_chunks': 5, 'target_to_id': {' <=50K': 0, ' >50K': 1}}


Class: DataNormalizationMeta
Operation: normalizing chunk


Class: NeuronTrainingMeta
Operation: Neuron training kernel execution


Class: Mediator
Operation: Configuring and executing Mediator


Class: DataNormalization
Operation: Configuring and executing chunk normalization


Class: DataNormali

## Test Running only phase 3

In [None]:
def process_results(data_dir, dataset, seed, sessionID, split):
    proc, gateway_proc, details = None, None, {}
    try:
        gateway_proc = subprocess.Popen(['python', os.path.join(data_dir, 'gateway_simulation.py')])
        proc = subprocess.Popen(['python', os.path.join(data_dir, 'ECG_Stream_V2.py'), dataset, str(seed),split])

        
#         backend_proc = subprocess.Popen(['python3', iot_app_path])
        
        time.sleep(10)
        
        print("Prereq phase:::: ")
        prereqResult = run_phase(algorithm, prereqObj)
        print("Prereq result:::: ", prereqResult)
        sessionID = prereqResult['body']['session-id']
        time.sleep(10)
        
        print("Phase 0:::: ")
        phase0Obj['session_id'] = sessionID
        phase0Result = run_phase(algorithm, phase0Obj)
        time.sleep(10)
        
        print("Phase 1:::: ")
        phase1Obj['session_id'] = sessionID
        phase1Result =run_phase(algorithm, phase1Obj)
        time.sleep(10)
        
        print("Phase 2:::: ")
        phase2Obj['session_id'] = sessionID
        phase2Result =run_phase(algorithm, phase2Obj)
        time.sleep(10)
        
        print("Phase 3:::: ")
        phase3Obj['session_id'] = sessionID
        phase3Result = run_phase(algorithm, phase3Obj)
        time.sleep(10)
        
    
        print("Fetching Results :::: ", sessionID)
        phase3Result=fetch_results(algorithm, "phase3", sessionID)
        if phase3Result['success']:
            details ={
                'dataset': dataset,
                'seed': seed,
                'session_Id':sessionID,
                'train_rmse': phase3Result['body']['train_error'],
                'valid_rmse': phase3Result['body']['validation_error'],
                'test_rmse': phase3Result['body']['test_error'],
                'best_score':phase3Result['body']['best_score'],
                'scores':phase3Result['body']['scores'],
                'val_rmses':phase3Result['body']['val_rmses'],
                'train_rmses':phase3Result['body']['train_rmses'],
                'best_val_lr':phase3Result['body']['best_val_lr'],
                'best_fs_num_features':phase3Result['body']['best_fs_num_features'],
                'best_model_active_linear_weights':phase3Result['body']['best_model_active_linear_weights'],
                'best_model_active_gaussian_weights':phase3Result['body']['best_model_active_gaussian_weights'],
                'bound_hitting_gaussian_weights':phase3Result['body']['bound_hitting_gaussian_weights'],
                'neuron_centers':phase3Result['body']['neuron_centers'],
                'neuron_activity':phase3Result['body']['neuron_activity'],
                'radius_map':phase3Result['body']['radius_map'],
                'targets':phase3Result['body']['targets'],
                'predictions':phase3Result['body']['predictions'],
                'total_kernel_time':phase3Result['body']['total_kernel_time'],
                'total_host_time':phase3Result['body']['total_host_time'],
                'phase1_kernel_time':phase3Result['body']['phase1_kernel_time'],
                'phase1_host_time':phase3Result['body']['phase1_host_time'],
                'phase2_kernel_time':phase3Result['body']['phase2_kernel_time'],
                'phase2_host_time':phase3Result['body']['phase2_host_time'],
                'phase3_kernel_time':phase3Result['body']['phase3_kernel_time'],
                'phase3_host_time':phase3Result['body']['phase3_host_time'],
                'best_features':phase3Result['body']['best_feature_list'],
                'best_feature_names':phase3Result['body']['best_features_names'],
            }
            
        time.sleep(20)
        
#         backend_proc.kill()
        proc.kill()
        gateway_proc.kill()
    except:
        traceback.print_exc()
#         if backend_proc:
#             backend_proc.kill()
        if proc:
            proc.kill()
        if gateway_proc:
            gateway_proc.kill()
            
    return details


In [None]:
df_column_order = ['dataset','seed','session_Id','total_kernel_time','total_host_time','best_features','best_feature_names','train_rmse','valid_rmse','test_rmse','best_score','best_val_lr','best_fs_num_features','best_model_active_linear_weights','best_model_active_gaussian_weights','bound_hitting_gaussian_weights','scores','val_rmses','train_rmses','phase1_host_time','phase2_kernel_time','phase2_host_time','phase3_kernel_time','phase3_host_time','phase1_kernel_time', 'neuron_centers', 'neuron_activity', 'radius_map', 'targets', 'predictions']

datasets = ["Facebook_data"] #,"dengue_features"
# sessionIDs = ["8233474a-9bf6-11ed-bedf-c31d2beceb91","78cfaabc-9bf7-11ed-bedf-c31d2beceb91","71599a3a-9bf8-11ed-bedf-c31d2beceb91","6b3a526a-9bf9-11ed-bedf-c31d2beceb91",
#               "62183e12-9bfa-11ed-bedf-c31d2beceb91","6745e76c-9bfb-11ed-bedf-c31d2beceb91","676b4bdc-9bfc-11ed-bedf-c31d2beceb91","5f363ae8-9bfd-11ed-bedf-c31d2beceb91",
#               "5bcc2eca-9bfe-11ed-bedf-c31d2beceb91","5c3392bc-9bff-11ed-bedf-c31d2beceb91"]

sessionIDs = ["62183e12-9bfa-11ed-bedf-c31d2beceb91"]
df = pd.DataFrame()

seeds = [1, 50, 100, 150,200, 250, 300, 350, 400, 450]
# seeds = [200]
netsizeString = "_".join([str(val) for val in net_sizes])
split = "0.9"
for index, dataset in enumerate(datasets):
    for seed, session in tqdm(zip(seeds, sessionIDs)):
        result = process_results(data_directory, dataset,seed, session, split)
        df = df.append(result, ignore_index=True)
        filepath = Path(result_dir, dataset, "Test_{}_{}_{}_{}_{}_.csv".format(dataset, data_passes,chunk_passes,netsizeString, split))
        filepath.parent.mkdir(parents=True, exist_ok=True)
        df[df_column_order].to_csv(filepath, index=False)

In [None]:
phase3Result=fetch_results(algorithm, "phase3", "sessionId")

## Testing

In [None]:
def storingPhase3(sessionId, dataset, seed):
    phase3Result=fetch_results(algorithm, "phase3", sessionId)
    if phase3Result['success']:        
        details = {
            'dataset': dataset,
            'seed': seed,
            'session':sessionId,
            'train_rmse': phase3Result['body']['train_error'],
            'valid_rmse': phase3Result['body']['validation_error'],
            'test_rmse': phase3Result['body']['test_error'],
            'best_score':phase3Result['body']['best_score'],
            'scores':phase3Result['body']['scores'],
            'val_rmses':phase3Result['body']['val_rmses'],
            'train_rmses':phase3Result['body']['train_rmses'],
            'best_val_lr':phase3Result['body']['best_val_lr'],
            'best_fs_num_features':phase3Result['body']['best_fs_num_features'],
            'best_model_active_linear_weights':phase3Result['body']['best_model_active_linear_weights'],
            'best_model_active_gaussian_weights':phase3Result['body']['best_model_active_gaussian_weights'],
            'bound_hitting_gaussian_weights':phase3Result['body']['bound_hitting_gaussian_weights'],
            'neuron_centers':phase3Result['body']['neuron_centers'],
            'neuron_activity':phase3Result['body']['neuron_activity'],
            'radius_map':phase3Result['body']['radius_map'],
            'targets':phase3Result['body']['targets'],
            'predictions':phase3Result['body']['predictions'],
            'total_kernel_time':phase3Result['body']['kernel_time'],
            'total_host_time':phase3Result['body']['total_host_time'],
            'phase1_kernel_time':phase3Result['body']['phase1_kernel_time'],
            'phase1_host_time':phase3Result['body']['phase1_host_time'],
            'phase2_kernel_time':phase3Result['body']['phase2_kernel_time'],
            'phase2_host_time':phase3Result['body']['phase2_host_time'],
            'phase3_kernel_time':phase3Result['body']['phase3_kernel_time'],
            'phase3_host_time':phase3Result['body']['phase3_host_time'],
            'best_feature_order':phase3Result['body']['best_feature_order'],
            'best_feature_list':phase3Result['body']['best_feature_list'],

            }
        
        return details
    

In [None]:
df_column_order = ['dataset','seed','session_Id','total_kernel_time','total_host_time','best_feature_order','best_feature_list','train_rmse','valid_rmse','test_rmse','best_score','best_val_lr','best_fs_num_features','best_model_active_linear_weights','best_model_active_gaussian_weights','bound_hitting_gaussian_weights','scores','val_rmses','train_rmses','phase1_host_time','phase2_kernel_time','phase2_host_time','phase3_kernel_time','phase3_host_time','phase1_kernel_time', 'neuron_centers', 'neuron_activity', 'radius_map', 'targets', 'predictions']

dataset = "Insurance"
seeds = [50, 100]
sessionIds = ["140c95c6-9789-11ed-893b-4f1e32c0f7e9", "2f522840-978a-11ed-893b-4f1e32c0f7e9"]


df = pd.DataFrame()
netsizeString = "_".join([str(val) for val in net_sizes])
for seed, sessionId in zip(seeds, sessionIds):
    details = storingPhase3(sessionId, dataset, seed)
    df = df.append(details, ignore_index=True)
    filepath = Path(result_dir, dataset, "{}_{}_.csv".format(dataset, netsizeString))
    filepath.parent.mkdir(parents=True, exist_ok=True)
    df[df_column_order].to_csv(filepath, index=False)

In [None]:
prereqResult = run_phase(algorithm, prereqObj)
print("Prereq result:: ",prereqResult)
sessionID = prereqResult['body']['session-id']

In [None]:
print("Phase 0 ")
phase0Obj['session_id'] = sessionID
phase0Result = run_phase(algorithm, phase0Obj)
time.sleep(10)

In [None]:
sessionID

In [None]:
print("Phase :1")
phase1Obj['session_id'] = sessionID
phase1Result =run_phase(algorithm, phase1Obj)
time.sleep(10)

In [None]:
print("Phase 2 ")
phase2Obj['session_id'] = sessionID
phase2Result =run_phase(algorithm, phase2Obj)
time.sleep(10)

In [None]:
# prereqResult = run_phase(algorithm, prereqObj)
# print("Prereq result:: ",prereqResult)
# sessionID = prereqResult['body']['session-id']
# time.sleep(10)
# print("Phase 0 ")
# phase0Obj['session_id'] = sessionID
# phase0Result = run_phase(algorithm, phase0Obj)
# time.sleep(10)

# print("Phase :1")
# phase1Obj['session_id'] = sessionID
# phase1Result =run_phase(algorithm, phase1Obj)
# time.sleep(10)
#sessionID="04228570-92cb-11ed-8131-97f9b566c791"

# print("Phase 2 ")
# phase2Obj['session_id'] = sessionID
# phase2Result =run_phase(algorithm, phase2Obj)
# time.sleep(10)

# print("Phase 3 ")
# phase3Obj['session_id'] = sessionID
# phase3Result = run_phase(algorithm, phase3Obj)
# time.sleep(10)

# print("Training Finished !!!")
# phase3Result=fetch_results(algorithm, "phase3", sessionID)

### Fetch Testing

In [None]:
phase3Result=fetch_results(algorithm, "phase3", sessionID)
if phase3Result['success']:        
    details = {
        'dataset': 'Instant_Liking',
        'seed': 50, # dummy
        'session':sessionID,
        'train_rmse': phase3Result['body']['train_error'],
        'valid_rmse': phase3Result['body']['validation_error'],
        'test_rmse': phase3Result['body']['test_error'],
        'best_score':phase3Result['body']['best_score'],
        'scores':phase3Result['body']['scores'],
        'val_rmses':phase3Result['body']['val_rmses'],
        'train_rmses':phase3Result['body']['train_rmses'],
        'best_val_lr':phase3Result['body']['best_val_lr'],
        'best_fs_num_features':phase3Result['body']['best_fs_num_features'],
        'best_model_active_linear_weights':phase3Result['body']['best_model_active_linear_weights'],
        'best_model_active_gaussian_weights':phase3Result['body']['best_model_active_gaussian_weights'],
        'bound_hitting_gaussian_weights':phase3Result['body']['bound_hitting_gaussian_weights'],
        'neuron_centers':phase3Result['body']['neuron_centers'],
        'neuron_activity':phase3Result['body']['neuron_activity'],
        'radius_map':phase3Result['body']['radius_map'],
        'targets':phase3Result['body']['targets'],
        'predictions':phase3Result['body']['predictions'],
        'total_kernel_time':phase3Result['body']['kernel_time'],
        'total_host_time':phase3Result['body']['total_host_time'],
        'phase1_kernel_time':phase3Result['body']['phase1_kernel_time'],
        'phase1_host_time':phase3Result['body']['phase1_host_time'],
        'phase2_kernel_time':phase3Result['body']['phase2_kernel_time'],
        'phase2_host_time':phase3Result['body']['phase2_host_time'],
        'phase3_kernel_time':phase3Result['body']['phase3_kernel_time'],
        'phase3_host_time':phase3Result['body']['phase3_host_time'],
        'best_feature_order':phase3Result['body']['best_feature_order'],
        'best_feature_list':phase3Result['body']['best_feature_list'],
        
        }
    
    df_column_order = ['dataset','seed','session_Id','total_kernel_time','total_host_time','best_feature_order','best_feature_list','train_rmse','valid_rmse','test_rmse','best_score','best_val_lr','best_fs_num_features','best_model_active_linear_weights','best_model_active_gaussian_weights','bound_hitting_gaussian_weights','scores','val_rmses','train_rmses','phase1_host_time','phase2_kernel_time','phase2_host_time','phase3_kernel_time','phase3_host_time','phase1_kernel_time', 'neuron_centers', 'neuron_activity', 'radius_map', 'targets', 'predictions']
    df = pd.DataFrame()
    netsizeString = "_".join([str(val) for val in net_sizes])
    
    
    df = df.append(details, ignore_index=True)
    filepath = Path(result_dir, dataset, "Epochs_{}_seed_{}_{}_.csv".format(dataset, s, netsizeString))
    df[df_column_order].to_csv(filepath, index=False)


In [None]:
phase3Result['body'].keys()

In [None]:
df_column_order = ['dataset','seed','session_Id','total_kernel_time','total_host_time','best_feature_order','best_feature_list','train_rmse','valid_rmse','test_rmse','best_score','best_val_lr','best_fs_num_features','best_model_active_linear_weights','best_model_active_gaussian_weights','bound_hitting_gaussian_weights','scores','val_rmses','train_rmses','phase1_host_time','phase2_kernel_time','phase2_host_time','phase3_kernel_time','phase3_host_time','phase1_kernel_time', 'neuron_centers', 'neuron_activity', 'radius_map', 'targets', 'predictions']

dataset ="Instant_Liking"
df = pd.DataFrame()
net_sizes = str(4)
seed = str(50)

df = df.append(details, ignore_index=True)
filepath = Path(result_dir, dataset, "Epochs_{}_seed_{}_{}_.csv".format(dataset, seed,net_sizes ))
# filepath.parent.mkdir(parents=True, exist_ok=True)
df[df_column_order].to_csv(filepath, index=False)

In [None]:
df_column_order = ['dataset','seed','session_Id','total_kernel_time','total_host_time','best_feature_order','best_feature_list','train_rmse','valid_rmse','test_rmse','best_score','best_val_lr','best_fs_num_features','best_model_active_linear_weights','best_model_active_gaussian_weights','bound_hitting_gaussian_weights','scores','val_rmses','train_rmses','phase1_host_time','phase2_kernel_time','phase2_host_time','phase3_kernel_time','phase3_host_time','phase1_kernel_time', 'neuron_centers', 'neuron_activity', 'radius_map', 'targets', 'predictions']

dataset ="XOR_noiseless"
df = pd.DataFrame()
seeds = 34

df = df.append(details, ignore_index=True)
filepath = Path(result_dir, dataset, "Epochs_{}_seed_{}_.csv".format(dataset, seeds ))
filepath.parent.mkdir(parents=True, exist_ok=True)
# df.to_csv(filepath, index=False)
df[df_column_order].to_csv(filepath, index=False)

In [None]:
phase3Result

#### Actual Training

In [None]:

#df_column_order = ['dataset','seed','kernel_time','train_rmse','valid_rmse','test_rmse','best_score','best_val_lr','best_fs_num_features','best_model_active_linear_weights','best_model_active_gaussian_weights','bound_hitting_gaussian_weights','scores','val_rmses','train_rmses', 'neuron_centers', 'neuron_activity', 'radius_map', 'targets', 'predictions']
df_column_order = ['dataset','seed','session_Id','total_kernel_time','total_host_time','best_feature_order','best_feature_list','train_rmse','valid_rmse','test_rmse','best_score','best_val_lr','best_fs_num_features','best_model_active_linear_weights','best_model_active_gaussian_weights','bound_hitting_gaussian_weights','scores','val_rmses','train_rmses','phase1_host_time','phase2_kernel_time','phase2_host_time','phase3_kernel_time','phase3_host_time','phase1_kernel_time', 'neuron_centers', 'neuron_activity', 'radius_map', 'targets', 'predictions']

# dataset, s = "Facebook", 50
#     # "Telecom_data", # -> done
#     # "yearMSD_new", # -> done
#     # "arrhythmia", # -> done
# #     # "Big_mart_sales",
#     # "blogData", # -> done
#     # "communities", # -> done
#     # "dengue_features", # -> done
#     # "ECG0_p02",
#     # "ENERGY_DATA_COMPLETE" # -> done
# ]
# seeds = [50]
datasets = ["House_Price_Adv_Regression", "Telecom_data", "Insurance","OnlineNewsPopularity"]
df = pd.DataFrame()
seeds = [50,100]
netsizeString = "_".join([str(val) for val in net_sizes])
for index, dataset in enumerate(datasets):
    for s in tqdm(seeds):
        result = process_results(data_directory, dataset,50,df)
        df = df.append(result, ignore_index=True)
        filepath = Path(result_dir, dataset, "Epochs_{}_seed_{}_{}_.csv".format(dataset, s, netsizeString))
        df[df_column_order].to_csv(filepath, index=False)

In [None]:
df_column_order = ['dataset','seed','session_Id','total_kernel_time','total_host_time','best_feature_order','best_feature_list','train_rmse','valid_rmse','test_rmse','best_score','best_val_lr','best_fs_num_features','best_model_active_linear_weights','best_model_active_gaussian_weights','bound_hitting_gaussian_weights','scores','val_rmses','train_rmses','phase1_host_time','phase2_kernel_time','phase2_host_time','phase3_kernel_time','phase3_host_time','phase1_kernel_time', 'neuron_centers', 'neuron_activity', 'radius_map', 'targets', 'predictions']
# datasets = [
# #     "Facebook_data", # -> done
#     "Features_TestSet", # -> done
#     "House_Price_Adv_Regression", # -> done
#     "Instant_Liking",
#     "Insurance", # -> done
# #     # "Isolet" need to redo cuz of some error in RF,
#     # "new_data_trans", # -> done
#     # "OnlineNewsPopularity", # -> done
#     # "ParkinsonData", # -> done
#     # "Sberbank_Russian_Housing_Market", # -> done
#     # "slice_localization_data", # -> done
#     # "Telecom_data", # -> done
#     # "yearMSD_new", # -> done
#     # "arrhythmia", # -> done
# #     # "Big_mart_sales",
#     # "blogData", # -> done
#     # "communities", # -> done
#     # "dengue_features", # -> done
#     # "ECG0_p02",
#     # "ENERGY_DATA_COMPLETE" # -> done
# ]
# seeds = [50]
datasets = ["House_Price_Adv_Regression", "Instant_Liking", "Insurance","OnlineNewsPopularity"]
df = pd.DataFrame()
seeds = [50,100]
netsizeString = [str(val) for val in net]
for index, dataset in enumerate(datasets):
    for s in tqdm(seeds):
        result = process_results(data_directory, dataset,50,df)
        df = df.append(result, ignore_index=True)
        filepath = Path(result_dir, dataset, "Epochs_{}_seed_{}_.csv".format(dataset, s ))
        filepath.parent.mkdir(parents=True, exist_ok=True)
        df.to_csv(filepath, index=False)
        #df[df_column_order].to_csv(filepath, index=False)
        #print(result)