In [66]:
%load_ext autoreload
%autoreload 2
import os
import pandas as pd
import pickle
from utils.simulator import FdaSimulator

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [67]:
# data paths
input_data_path = os.path.join("data", "input")
output_data_path = os.path.join("data", "output")

In [68]:
# Explore dataframe that contains the scenarios
df_scenarios = pd.read_csv(os.path.join(input_data_path, "scenarios_test.csv"))

In [69]:
# Prepare some global parameters used to generate data
num_simulations = 10
n_basis_simulated_data = 5
sd_x_serie = 0.01
cnt = 30
alpha_p =  1 * cnt
beta_p = 3 * cnt
positions = [0.15, 0.35, 0.55, 0.85]
intercept_brownian = 0
slope_brownian = 1

In [70]:
# Instantiate the class
fda_simulator = FdaSimulator()
times = ["t_" + str(x) for x in fda_simulator.abscissa_points]

In [71]:
for _, scenario in df_scenarios.iterrows():
    scenario_id = scenario["scenario_id"]
    type_covariate = scenario["type_covariate"]
    type_transformation = scenario["type_transformation"]
    eta = scenario["eta"]
    sample_size = scenario["sample_size"]
    # Create the folder for the current scenario if it does not exist
    output_dir = os.path.join(output_data_path, f"senario_{scenario_id}")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    for i_sim in range(num_simulations):
        # Simulate
        X, phi_X, epsilon, beta_data, col_indexes_bct, target = fda_simulator.simulate(
            type_covariate=type_covariate,
            type_transformation=type_transformation,
            sample_size=sample_size,
            eta=eta,
            # Series representation
            n_basis_simulated_data=n_basis_simulated_data,
            sd_x=sd_x_serie,
            # Beta parameters
            alpha_param=alpha_p,
            beta_param=beta_p,
            # Brownian parameters
            intercept_brownian=intercept_brownian,
            slope_brownian=slope_brownian,
            positions=positions
        )

        # Transform X and y to pandas objects
        df_X = pd.DataFrame(
            data=X,
            columns=times
        )

        df_target = pd.DataFrame(
            data=target,
            columns=["target"]
        )
        # Store the data
        X_file = os.path.join(output_dir, f"X_sim_{i_sim}.csv")
        target_file = os.path.join(output_dir, f"target_sim_{i_sim}.csv")
        col_indexes_bct_file = os.path.join(output_dir, f"col_indexes_bct_{i_sim}.pkl")
        df_X.to_csv(X_file, index=False)
        df_target.to_csv(target_file, index=False)
        with open(col_indexes_bct_file, 'wb') as f:
            pickle.dump(col_indexes_bct, f)