In [None]:
import cloudberry.api as cb

In [None]:
# Define Cloudberry configuration
cb_port = 9000
cb_config = cb.CloudberryConfig(f'http://localhost:{cb_port}')

# Initialize selected API(s)
cb_data = cb.Data(cb_config)
cb_analytics = cb.Analytics(cb_config)
cb_anomalies = cb.Anomalies(cb_config)
cb_buckets = cb.Buckets(cb_config)
cb_meta = cb.Metadata(cb_config)
cb_meta_experiment = cb_meta.experiment_api()
cb_meta_experiment_configuration = cb_meta.experiment_configuration_api()
cb_meta_experiment_computation = cb_meta.experiment_computation_api()
cb_query = cb.Query(cb_config)
cb_uploader = cb.AgeFileUploader(cb_config)
cb_csv_uploader = cb.CsvFileUploader(cb_config)
cb_api_configuration = cb.ApiConfiguration(cb_config)
cb_deleteion = cb.Deletion(cb_config)

In [None]:
# cb.Data

import math
import datetime

get_current_timestamp = lambda: math.trunc(datetime.datetime.utcnow().timestamp())

#computation = cb_meta_experiment_computation.find_all()[0]

current_time = get_current_timestamp()
data_points = [
    cb.DataPoint(time=current_time, fields={'version': 0.1}, tags={'origin': 'jupyter_notebook', 'type': 'api_demo'}),
    cb.DataPoint(time=current_time + 1, fields={'version': 0.2}, tags={'origin': 'jupyter_notebook', 'type': 'api_demo'}),
    cb.DataPoint(time=current_time + 2, fields={'version': 0.1}, tags={'origin': 'notebook', 'type': 'api_demo'}),
    cb.DataPoint(time=current_time + 3, fields={'version': 0.2}, tags={'origin': 'notebook', 'type': 'api_demo'}),
    #cb.DataPoint(time=current_time + 4, fields={'version': 0.7}, tags={'another': 'notebook'}, computation=computation),
]

# Save data points explicitly
result = cb_data.save_data(data_points)
print(result)

# Get those data points with filters
result = cb_data.get_data(cb.DataFilters(tags={'origin': 'notebook'}))
print(len(result.data))

result = cb_data.get_data(cb.DataFilters(fields={'version': 0.1}))
print(len(result.data))

result = cb_data.get_data(cb.DataFilters(tags={'origin': 'notebook'}, fields={'version': 0.1}))
print(len(result.data), result.data)

# *New* - get points with computation param
#result1 = cb_data.get_data(cb.DataFilters(tags={'another': 'notebook'}, computation=computation))
#result2 = cb_data.get_data(cb.DataFilters(tags={'another': 'notebook', 'computationId': computation.computation_id_hex}))
#print("Queries are equal: " + str(result1.data == result2.data))

# *New* - get points by tags presence, ignoring the value

result = cb_data.get_data(cb.DataFilters(tags_presence=['origin']))
print(len(result.data))

result = cb_data.get_data(cb.DataFilters(tags_presence=['origin', 'halo']))
print(len(result.data))


# Delete data points
# result = cb_data.get_data(cb.DataFilters(tags={'type': 'api_demo'}))
# print('before: ', len(result.data))

# result = cb_data.delete_data(cb.DataFilters(tags={'type': 'api_demo'}))
# print(result)

# result = cb_data.get_data(cb.DataFilters(tags={'type': 'api_demo'}))
# print('after: ', len(result.data))

In [None]:
# cb.Buckets

buckets_names = cb_buckets.get_buckets_names()
print(buckets_names)

result = cb_buckets.create_bucket('wiadro_0')
print(result)

buckets_names = cb_buckets.get_buckets_names()
print(buckets_names)

result = cb_buckets.delete_bucket('wiadro_0')
print(result)

buckets_names = cb_buckets.get_buckets_names()
print(buckets_names)

In [None]:
# cb.Query

result = cb_query.query_series("""
from(bucket:"cloudberry_logs")
|> range(start: 0)
|> filter(fn: (r) => r._field == "AVERAGE_FITNESS")
|> keep(columns: ["_value", "_time"])
""")

result.as_data_frame

In [None]:
# cb.AgeFileUploader

computations = []

age_log_file_path = './data/emas-20190412T120536.log'
age_log_experiment_name = 'EMAS_AGE'
age_log_headers_keys = {
    "[WH]": "[W]",
    "[SH]": "[S]",
    "[BH]": "[B]"
}

computation = cb_uploader.upload_file(
    file_path=age_log_file_path,
    experiment_name=age_log_experiment_name,
    details=cb.AgeUploadDetails(headers_keys=age_log_headers_keys)
)
computations.append(computation)

In [None]:
computations

In [None]:
# cb.Analytics

# Upload more data to Cloudberry
emas_files = [
    "./data/emas-20190411T232808.log",
    "./data/emas-20190411T234810.log",
    "./data/emas-20190412T000813.log",
    "./data/emas-20190412T120536.log",
]
for emas_file in emas_files:
    computation = cb_uploader.upload_file(
             file_path=emas_file,
             experiment_name=age_log_experiment_name,
             details=cb.AgeUploadDetails(headers_keys=age_log_headers_keys)
         )
    computations.append(computation)

In [None]:
best_fitness_field = 'BEST_SOLUTION_SO_FAR'
fitness_field = 'AVERAGE_FITNESS'

# Use case 1. Compare multiple computations by their ids

series = cb_analytics.compare_computations(
    computations=computations,
    field_name=fitness_field
)

cb.DataSeriesPlots.compare(series=series, 
                           x_field='_time', 
                           y_field=fitness_field,
                           title='Average fitness comparison')

In [None]:
# With new cloudberry.plots module
import cloudberry.plots as cbp
plot_properties = cbp.PlotProperties.default()
plot_properties.default_series_kind = cbp.PlotSeriesKind.LINE
plot_properties.title = f'{fitness_field} comparision'
plot_properties.x_axis_name = 'Time'
plot_properties.y_axis_name = fitness_field

# Converters to plots model
def to_plot_series(data_series, y_field, x_field="_time"):
    return cbp.PlotSeries(name = data_series.series_name, 
                          data = data_series.as_data_frame,
                          x_field = x_field,
                          y_field = y_field,
                          y_err_field = None)

def to_plot_series_multi(data_series_multi, y_field, x_field="_time"):
    return list(map(lambda s: to_plot_series(s, y_field, x_field), data_series_multi))

In [None]:
plot_series = to_plot_series_multi(series, fitness_field)
builder = cbp.PlotBuilder(plot_properties)
for s in plot_series:
    if s.name == 'AVG':
        builder.add_avg_series(s)
    else:
        builder.add_series(s)

In [None]:
# Adding trend lines
trend_series = plot_series[0].name
builder.add_trend(f'{trend_series} - trend', cbp.PlotlyTrendLine(trend_series, cbp.PlotlyTrendLineKind.KNN10))
builder.add_trend('Lower bound', cbp.PlotlyTrendLine('AVG', cbp.PlotlyTrendLineKind.CONST, 4.55))
builder.add_trend('Upper bound', cbp.PlotlyTrendLine('AVG', cbp.PlotlyTrendLineKind.CONST, 4.65))
builder.plot()

In [None]:
# Exporting plot to file
plot_properties.show_legend = False
plot_to_export = builder.plot()
cbp.PlotlyExporter(plot_to_export).write_image("demo_plot.png", scale=4) # PNG is default
cbp.PlotlyExporter(plot_to_export).write_image("demo_plot.svg", scale=4, image_format="svg") # other formats are supported

In [None]:
series[0].as_data_frame

In [None]:
best_n_serie = cb_analytics.best_n_computations_for_configuration(
    n=5,
    field_name=best_fitness_field,
    configuration=cb_meta_experiment_configuration.find_all()[0],
    goal=cb.OptimizationGoal.MAX,
    kind=cb.OptimizationKind.FINAL_VALUE
)

In [None]:
best_n_serie[0].as_data_frame

In [None]:
series = cb_analytics.compare_computations(
    computations=computations,
    field_name=best_fitness_field
)

cb.DataSeriesPlots.compare(series=series, 
                           x_field='_time', 
                           y_field=best_fitness_field,
                           title='Best solution so far comparison')

In [None]:
configuration = cb_meta_experiment_configuration.find_by_experiment_name(age_log_experiment_name)[0]
# Use case 2. Compare all computations for given configuration id

series = cb_analytics.compare_computations_for_configuration(
    configuration=configuration,
    field_name=fitness_field
)


cb.DataSeriesPlots.compare(series=series, 
                           x_field='_time', 
                           y_field=fitness_field,
                           title='Average fitness comparison')

In [None]:
# Another feature: append data from CSV
# - must specify configuration ID explicitly as CSV does not contain experiment configuration

age_csv_file_path = './data/emas_1.csv'

new_computation = cb_csv_uploader.upload_file(
    file_path=age_csv_file_path,
    experiment_name=age_log_experiment_name,
    details=cb.CsvUploadDetails(
        tags_names=['WORKPLACE_ID'],
        configuration=configuration,
        computation=None # None for new computation
    )
)

In [None]:
configurations = cb_meta_experiment_configuration.find_by_experiment_name(age_log_experiment_name)[0:3]

# Use case 3. Compare N configurations

series = cb_analytics.compare_configurations(
    configurations=configurations,
    field_name=fitness_field
)

cb.DataSeriesPlots.compare(series=series, 
                           x_field='_time', 
                           y_field=fitness_field,
                           title='Average fitness comparison')

In [None]:
# Use case 4. Compare all configurations for experiment

series = cb_analytics.compare_configurations_for_experiment(
    experiment_name=age_log_experiment_name,
    field_name=fitness_field
)

cb.DataSeriesPlots.compare(series=series, 
                           x_field='_time', 
                           y_field=fitness_field,
                           title='Average fitness comparison')

In [None]:
# Use case 5.1 Get best computations (optimization) by their final values

series = cb_analytics.best_n_computations(
    n=2,
    field_name='BEST_SOLUTION_SO_FAR',
    goal=cb.OptimizationGoal.MAX,
    kind=cb.OptimizationKind.FINAL_VALUE
)

cb.DataSeriesPlots.compare(series=series,
                           x_field='_time',
                           y_field='BEST_SOLUTION_SO_FAR',
                           title='Best final solution')


In [None]:
# Use case 5.2 Get best computations (optimization) by convergence (area under curve / integral)

series = cb_analytics.best_n_computations(
    n=2,
    field_name='AVERAGE_FITNESS',
    goal=cb.OptimizationGoal.MAX,
    kind=cb.OptimizationKind.AREA_UNDER_CURVE
)

cb.DataSeriesPlots.compare(series=series,
                           x_field='_time',
                           y_field='AVERAGE_FITNESS',
                           title='Max fitness (AUC)')


In [None]:
# Get mean and stddev for given computations
field_name = 'AVERAGE_FITNESS'


series = cb_analytics.avg_and_stddev_for_computations(
    computations=computations,
    field_name=field_name,
    interval=10,
    time_unit=cb.TimeUnit.DAYS,
)

In [None]:
series.as_data_frame

In [None]:
# cb.DataSeriesPlots.compare(series=[series],
#                            x_field='_time',
#                            y_field='AVG',
#                            yerr_field='STDDEV',
#                            title='Average + Standard Deviation')

In [None]:
# (Issue #40) Use case 6.1. get computations that values has reached given thresholds (either over or under)
# Criteria mode can be one of the following: ANY, AVERAGE, FINAL

series = cb_analytics.thresholds_exceeding_computations(
    field_name='AVERAGE_FITNESS',
    criteria_mode=cb.CriteriaMode.AVERAGE,
    thresholds=cb.Thresholds(lower=4.6045)
)

print(series[0].series_name)

cb.DataSeriesPlots.compare(series=series,
                           x_field='_time',
                           y_field='AVERAGE_FITNESS',
                           title='Series with max fitness over threshold')

In [None]:
# Use case 6.2. analyze computations for possible anomalies.

reports = cb_anomalies.get_reports(
    field_name='AVERAGE_FITNESS',
    computations=computations
)

print(reports[0].__dict__)

In [None]:
# crud metadata

In [None]:
# experiment

In [None]:
created_experiment = cb_meta_experiment.find_or_create('experiment name', parameters={'important': 'very'})
print(created_experiment)

In [None]:
new_name = 'updated namee'
cb_meta_experiment.update(experiment=created_experiment, name=new_name, parameters={'color': 'red'}, override_params = False)

In [None]:
cb_meta_experiment.find_all()

In [None]:
cb_meta_experiment.find_by_name(new_name)

In [None]:
# experiment configuration

In [None]:
configuration_file_name = "configuration file name"
created_configuration = cb_meta_experiment_configuration.find_or_create(created_experiment, configuration_file_name, {"param1": "value"})
print(created_configuration)

In [None]:
cb_meta_experiment_configuration.find_by_configuration_file_name(configuration_file_name)

In [None]:
cb_meta_experiment_configuration.find_by_experiment_name(new_name)

In [None]:
new_file_name="new config file name"
cb_meta_experiment_configuration.update(configuration=created_configuration, configuration_file_name=new_file_name, parameters={"additionalParam": True}, override_params=False)

In [None]:
# experiment computation

In [None]:
cb_meta_experiment_computation.create(created_configuration)

In [None]:
cb_meta_experiment_computation.find_by_configuration(created_configuration)

In [None]:
# (#47) - ApiConfiguration API

property_key = cb.ApiPropertiesIndex.OVERRIDDEN_DEFAULT_BUCKET_NAME

cb_api_configuration.set_property(property_key, "cloudberry_logs2")
property_value = cb_api_configuration.get_property(property_key)
print(property_value)

cb_api_configuration.delete_property(property_key)

property_value = cb_api_configuration.get_property(property_key)
print(property_value)


In [None]:
configs = cb_meta_experiment_configuration.find_all()[0:5]

best_configurations = cb_analytics.best_n_configurations(
    2,
    'BEST_SOLUTION_SO_FAR',
    configs,
    cb.OptimizationGoal.MIN,
    cb.OptimizationKind.AREA_UNDER_CURVE
)

In [None]:
# THRESHOLDS PER CONFIGURATION - compute mean series for CONGIFURATION and check if overally it exceeds given threshold(s)
field = 'AVERAGE_FITNESS'
series = cb_analytics.thresholds_exceeding_configurations(
    field_name=field,
    criteria_mode=cb.CriteriaMode.FINAL,
    thresholds=cb.Thresholds(upper=2.6),
    configurations=configs
)

In [None]:
cb.DataSeriesPlots.compare(series=series,
                           x_field='_time',
                           y_field=field,
                           title='Series with max fitness over threshold')

In [None]:
cb.DataSeriesPlots.compare(series=series,
                           x_field='_time',
                           y_field=field,
                           title='Series with max fitness over threshold')

In [None]:
cb.DataSeriesPlots.compare(series=series,
                           x_field='_time',
                           y_field=field,
                           title='Series with max fitness over threshold')

In [None]:
cb.DataSeriesPlots.compare(series=series,
                           x_field='_time',
                           y_field=field,
                           title='Series with max fitness over threshold')

In [None]:
# delete data from meta and influx

In [None]:
some_comp = cb_meta_experiment_computation.find_all()[0]
cb_deleteion.delete_computations([some_comp])

In [None]:
some_config = cb_meta_experiment_configuration.find_all()[0]
cb_deleteion.delete_configurations([some_config])

In [None]:
some_exp = cb_meta_experiment.find_all()[0]
cb_deleteion.delete_experiments([some_exp])