In [None]:
import time
from src.method_selector import MlMethodSelector, ClassicMethodSelector
from src.measurement_provider import MeasurementProvider
from src.serializer import SenMLCBORSerializer, SenMLJSONSerializer
from src.signal_generator import SignalGenerator
from sys import getsizeof
from src.metric import FeatureMetricEnum, SimilarityMetricEnum
from src.data_type import Measurement
import pandas as pd
import matplotlib as plt
from src.metric import SimilarityMetricEnum
plt.rcParams["figure.dpi"] = 100
pd.set_option('display.max_columns', None)

In [None]:
def compute_sizes(dataset, compressed_data, metrics):
  senML_cbor_size_original = getsizeof(SenMLCBORSerializer.serialize(dataset, '/72/', '1/2', metrics))
  senML_cbor_size_compressed = getsizeof(SenMLCBORSerializer.serialize(compressed_data, '/72/', '1/2', metrics))
  senML_cbor_size_diff = senML_cbor_size_original - senML_cbor_size_compressed
  senML_cbor_size_ratio = senML_cbor_size_diff / senML_cbor_size_original
  # print(senML_cbor_size_original, senML_cbor_size_compressed, senML_cbor_size_diff, senML_cbor_size_ratio)

  senML_json_size_original = getsizeof(SenMLJSONSerializer.serialize(dataset, '/72/', '1/2', metrics))
  senML_json_size_compressed = getsizeof(SenMLJSONSerializer.serialize(compressed_data, '/72/', '1/2', metrics))
  senML_json_size_diff = senML_json_size_original - senML_json_size_compressed
  senML_json_size_ratio = senML_json_size_diff / senML_json_size_original
  # print(senML_json_size_original, senML_json_size_compressed, senML_json_size_diff, senML_json_size_ratio)
  return {
    'senML_cbor_size_original': senML_cbor_size_original,
    'senML_cbor_size_compressed': senML_cbor_size_compressed,
    'senML_cbor_size_diff': senML_cbor_size_diff,
    'senML_cbor_size_ratio': senML_cbor_size_ratio,

    'senML_json_size_original': senML_json_size_original,
    'senML_json_size_compressed': senML_json_size_compressed,
    'senML_json_size_diff': senML_json_size_diff,
    'senML_json_size_ratio': senML_json_size_ratio,
  }

def current_milis():
  return time.time() * 1000

In [None]:
measurement_provider = MeasurementProvider()
dataset1 = measurement_provider.get_random3()
dataset2 = [measurement_provider.json_to_measurements('stock1.json')]

In [None]:
ml_method_selector = MlMethodSelector()
# ml_method_selector.use_default_strategy([
#   SimilarityMetricEnum.arithmetic_average,
#   SimilarityMetricEnum.median,
#   SimilarityMetricEnum.covariance,
# ])
ml_method_selector.set_measurements(dataset1)
score = ml_method_selector.train()
print(score)

classic_method_selector = ClassicMethodSelector()

In [None]:
print("datasize,ml_time,classic_time,ml_compressed_size,classic_compressed_size,ml_compression_ratio,classic_compression_ratio,ml_method,classic_method,ml_score,classic_score,ml_cbor_original_size,ml_cbor_compressed_size,ml_cbor_compressratio,classic_cbor_original_size,classic_cbor_compressed_size,classic_cbor_compressratio,ml_json_original_size,ml_json_compressed_size,ml_json_compressratio,classic_json_original_size,classic_json_compressed_size,classic_json_compressratio")

results = {
  "datasize": [],
  "ml_time": [],
  "classic_time": [],
  "ml_compressed_size": [],
  "classic_compressed_size": [],
  "ml_compression_ratio": [],
  "classic_compression_ratio": [],
  "ml_method": [],
  "classic_method": [],
  "ml_score": [],
  "classic_score": [],
  "ml_cbor_original_size": [],
  "ml_cbor_compressed_size": [],
  "ml_cbor_compressratio": [],
  "classic_cbor_original_size": [],
  "classic_cbor_compressed_size": [],
  "classic_cbor_compressratio": [],
  "ml_json_original_size": [],
  "ml_json_compressed_size": [],
  "ml_json_compressratio": [],
  "classic_json_original_size": [],
  "classic_json_compressed_size": [],
  "classic_json_compressratio": [],
}

for datasize in [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]:
  # measurements = measurement_provider.to_measurements(SignalGenerator(0, datasize).with_peaks(3).with_peaks(3, direction=-1).sin(0.2, 0.2))
  measurements = measurement_provider.to_measurements(SignalGenerator(0, datasize).linear(2).sin(0.2, 0.2))

  time_start = current_milis()
  compressed_data, stats, metrics = ml_method_selector.compress_with_best(measurements)
  time_end = current_milis()
  serialized_data_stats = compute_sizes(measurements, compressed_data, metrics)
  metrics_score = ClassicMethodSelector().compute_similarity_with_default_strategy(measurements, compressed_data)

  ml_time = time_end - time_start
  ml_compressed_size = stats['compressed_size']
  ml_compression_ratio = stats['compression_rate']
  ml_method = stats['method_name']
  ml_score = metrics_score
  ml_cbor_original_size = serialized_data_stats['senML_cbor_size_original']
  ml_cbor_compressed_size = serialized_data_stats['senML_cbor_size_compressed']
  ml_cbor_compressratio = serialized_data_stats['senML_cbor_size_ratio']
  ml_json_original_size = serialized_data_stats['senML_json_size_original']
  ml_json_compressed_size = serialized_data_stats['senML_json_size_compressed']
  ml_json_compressratio = serialized_data_stats['senML_json_size_ratio']

  results['datasize'].append(datasize);
  results['ml_time'].append(ml_time);
  results['ml_compressed_size'].append(ml_compressed_size);
  results['ml_compression_ratio'].append(ml_compression_ratio);
  results['ml_method'].append(ml_method);
  results['ml_score'].append(ml_score);
  results['ml_cbor_original_size'].append(ml_cbor_original_size);
  results['ml_cbor_compressed_size'].append(ml_cbor_compressed_size);
  results['ml_cbor_compressratio'].append(ml_cbor_compressratio);
  results['ml_json_original_size'].append(ml_json_original_size);
  results['ml_json_compressed_size'].append(ml_json_compressed_size);
  results['ml_json_compressratio'].append(ml_json_compressratio);

  ##############################################

  time_start = current_milis()
  compressed_data, stats, metrics, metrics_score = classic_method_selector.compress_with_best_default_strategy(measurements)
  time_end = current_milis()
  serialized_data_stats = compute_sizes(measurements, compressed_data, metrics)

  classic_time = time_end - time_start
  classic_compressed_size = stats['compressed_size']
  classic_compression_ratio = stats['compression_rate']
  classic_method = stats['method_name']
  classic_score = metrics_score
  classic_cbor_original_size = serialized_data_stats['senML_cbor_size_original']
  classic_cbor_compressed_size = serialized_data_stats['senML_cbor_size_compressed']
  classic_cbor_compressratio = serialized_data_stats['senML_cbor_size_ratio']
  classic_json_original_size = serialized_data_stats['senML_json_size_original']
  classic_json_compressed_size = serialized_data_stats['senML_json_size_compressed']
  classic_json_compressratio = serialized_data_stats['senML_json_size_ratio']

  results['classic_time'].append(classic_time);
  results['classic_compressed_size'].append(classic_compressed_size);
  results['classic_compression_ratio'].append(classic_compression_ratio);
  results['classic_method'].append(classic_method);
  results['classic_score'].append(classic_score);
  results['classic_cbor_original_size'].append(classic_cbor_original_size);
  results['classic_cbor_compressed_size'].append(classic_cbor_compressed_size);
  results['classic_cbor_compressratio'].append(classic_cbor_compressratio);
  results['classic_json_original_size'].append(classic_json_original_size);
  results['classic_json_compressed_size'].append(classic_json_compressed_size);
  results['classic_json_compressratio'].append(classic_json_compressratio);

  print(f'{datasize},{ml_time},{classic_time},{ml_compressed_size},{classic_compressed_size},{ml_compression_ratio},{classic_compression_ratio},{ml_method},{classic_method},{ml_score},{classic_score},{ml_cbor_original_size},{ml_cbor_compressed_size},{ml_cbor_compressratio},{classic_cbor_original_size},{classic_cbor_compressed_size},{classic_cbor_compressratio},{ml_json_original_size},{ml_json_compressed_size},{ml_json_compressratio},{classic_json_original_size},{classic_json_compressed_size},{classic_json_compressratio}')

In [None]:
df = pd.DataFrame.from_dict(results)
df

In [None]:
df.plot(x='datasize', y=['ml_time', 'classic_time'], grid=True)
df.plot(x='datasize', y=['ml_compression_ratio', 'classic_compression_ratio'], grid=True, ylim=0)
df.plot(x='datasize', y=['ml_compressed_size', 'classic_compressed_size'], grid=True, ylim=0)
df.plot(x='datasize', y=['ml_score', 'classic_score'], grid=True, ylim=(0, 18))
df.plot(x='datasize', y=['ml_cbor_compressed_size', 'classic_cbor_compressed_size', 'ml_json_compressed_size', 'classic_json_compressed_size'], grid=True, ylim=0)