In [1]:
def get_features_and_outputs_from_simulations(output_name, timestamp):
    """
    Reads the simulation parameter map, fetches Cassandra to obtain the correspoding output for the specified timestamp.
    Splits the data into train/test sets with, returns c3.Dataset for each one of them.
    
    
    Inputs:
    - str output_name: name of the variable in SimulationModelOutput under investigation
    - str timestamp: shape of the timestamp to fetch the timeseries, e.g. "2017-08-19T09:00:00.000"
    
    Returns:
    - c3.Dataset X_train: train set of features
    - c3.Dataset X_test: test set of features
    - c3.Dataset y_train: train set of outputs
    - c3.Dataset y_test: test set of outputs

    """
    import pandas as pd
    from sklearn.model_selection import train_test_split
    
    # fetch simulation parameters
    parameters = c3.SimulationModelParameters.fetch().objs
    parameters = parameters.toJson()
    df = pd.DataFrame(parameters)
    simulations = pd.DataFrame(df['id'])
    X = df[df.columns[5:]]
    
    # define simple metric
    metric_name = "Average_" + output_name + "_SimulationSample" 
    metric_descr = "Calculates average of " + output_name + " for a given set of SimulationSample"
    metric_expr = "avg(avg(normalized.data." + output_name + "))"
    metric = c3.SimpleMetric(id = metric_name,
                                    name = metric_name,
                                    description = metric_descr,
                                    srcType = "SimulationSample",
                                    path = "output",  # the timeseries is in the output field of SimSam
                                    expression = metric_expr
                                  )
    
    # define metric spec
    spec = c3.EvalMetricsSpec(
                                    ids = simulations['id'],
                                    expressions = [metric_name],
                                    start = timestamp,
                                    end = timestamp,
                                    interval = "SECOND" 
                                )

    # evaluate metric, cast it to pandas
    evalMetricsResult = c3.SimulationSample.evalMetricsWithMetadata(
                                                                        spec=spec,
                                                                        overrideMetrics=[metric]
                                                                    )
    y = c3.EvalMetricsResult.toPandas(result=evalMetricsResult)
    
    
    # split into train/test sets
    datasets = train_test_split(X, y, test_size=0.1, random_state=42)
    
    # cast into c3 Datasets
    X_train = c3.Dataset.fromPython(datasets[0])
    X_test = c3.Dataset.fromPython(datasets[1])
    y_train = c3.Dataset.fromPython(datasets[2])
    y_test = c3.Dataset.fromPython(datasets[3])
    
    return X_train, X_test, y_train, y_test

In [1]:
parameters = c3.SimulationModelParameters.fetch().objs

In [7]:
parameters

c3.Arry<SimulationModelParameters>([c3.SimulationModelParameters(
  id='EnsNo_1_SimNo_0',
  meta=c3.Meta(
         tenantTagId=151,
         tenant='dev',
         tag='tc02d',
         created=datetime.datetime(2021, 10, 15, 18, 7, 43, tzinfo=datetime.timezone.utc),
         createdBy='provisioner',
         updated=datetime.datetime(2021, 10, 15, 18, 7, 43, tzinfo=datetime.timezone.utc),
         updatedBy='provisioner',
         timestamp=datetime.datetime(2021, 10, 15, 18, 7, 43, tzinfo=datetime.timezone.utc),
         fetchInclude='[]',
         fetchType='SimulationModelParameters'),
  version=1,
  ensemble=c3.SimulationEnsemble(id='1'),
  acure_bl_nuc=0.5,
  acure_ait_width=0.65,
  acure_cloud_ph=0.396,
  acure_carb_ff_ems=1.0,
  acure_carb_ff_ems_eur=0.5,
  acure_carb_ff_ems_nam=0.5,
  acure_carb_ff_ems_chi=0.5,
  acure_carb_ff_ems_asi=0.5,
  acure_carb_ff_ems_mar=0.5,
  acure_carb_ff_ems_r=0.5,
  acure_carb_bb_ems=1.0,
  acure_carb_bb_ems_sam=0.5,
  acure_carb_bb_ems_naf=0.5,


In [8]:
tensor = c3.Tensor(parameters)

TypeError: __init__() takes 1 positional argument but 2 were given