In [1]:
# Modify sys.path
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)

In [None]:
import numpy as np
import pandas as pd
import pickle
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.neural_network import MLPRegressor
import random
import config.config as config
from src.data_processing import read_arff, preprocess_data
from src.utils import generate_random_attribute_combinations
from src.evaluation import create_surrogate_model_dataset

In [3]:
# Load dataset
DATA_PATH = os.path.join('..', 'data', config.DATASET_NAME)

dataset = read_arff(DATA_PATH)
df_dict = preprocess_data(dataset)

train_X_timeseries, train_Y_timeseries, val_X_timeseries, val_Y_timeseries, test_X_timeseries, test_Y_timeseries = df_dict['timeseries']
train_X, train_Y, val_X, val_Y, test_X, test_Y = df_dict['normalized']

# Creation of the dataset for the surrogated model 

In [None]:
random_combinations = generate_random_attribute_combinations(config.N_ATTRIB, config.N_RANDOM_COMBINATIONS)
surrogate_dataset = create_surrogate_model_dataset(random_combinations, train_X, train_Y, val_X, val_Y, test_X, test_Y)

In [None]:
# Save dataset
surrogate_dataset.to_pickle(f'../variables/{config.DATASET_SAVE_NAME}-surrogate-dataset.pickle')

# Creation of the surrogated model

In [7]:
# Load surrogate dataset
with open(f'../variables/{config.DATASET_SAVE_NAME}-surrogate-dataset.pickle', 'rb') as f:
    surrogate_dataset = pickle.load(f)

In [8]:
surrogate_dataset_X = [i.reshape(1, -1) for i in surrogate_dataset['Attributes']]
surrogate_dataset_X = np.asanyarray(surrogate_dataset_X).reshape(len(surrogate_dataset), config.N_ATTRIB)

surrogate_dataset_Y = surrogate_dataset['H Val'].to_numpy()

## Random Forest

In [9]:
for i in range(config.N_SEEDS):
    surrogate = RandomForestRegressor(random_state=i)
    surrogate.fit(surrogate_dataset_X, surrogate_dataset_Y)

    # Save surrogate model
    with open(f'../models/{config.DATASET_SAVE_NAME}-surrogate-RF-'+ str(i) +'.pickle', 'wb') as f:
        pickle.dump([surrogate], f)

## SGDR

In [11]:
for i in range(config.N_SEEDS):
    random.seed(i)
    surrogate = SGDRegressor(random_state=i)
    surrogate.fit(surrogate_dataset_X, surrogate_dataset_Y)

    # Save surrogate model
    with open(f'../models/{config.DATASET_SAVE_NAME}-surrogate-SGDR-'+ str(i) +'.pickle', 'wb') as f:
        pickle.dump([surrogate], f)

## MLP

In [13]:
for i in range(config.N_SEEDS):
    random.seed(i)
    surrogate = MLPRegressor(random_state=i)
    surrogate.fit(surrogate_dataset_X, surrogate_dataset_Y)

    # Save surrogate model
    with open(f'../models/{config.DATASET_SAVE_NAME}-surrogate-MLP-'+ str(i) +'.pickle', 'wb') as f:
        pickle.dump([surrogate], f)