In [None]:
import pod5
import numpy as np
import matplotlib.pyplot as plt
import pgnano.stats_analysis.primitives as pgnprim
import pgnano.stats_analysis.models as pgnmodels
from pgnano.stats_analysis.map_reduce_stats import abstract_mapper, run_mapper, abstract_reducer, run_mapper_reducer, ReducerReportResult, MapperReportResult
import pgnano.stats_analysis.jupyter_data_preparation as pgndata
import os
from pgnano.constants.constants import stats_analysis_root_path
from typing import List
import statistics as st
import multiprocessing as mp
from collections import namedtuple
from tqdm.auto import tqdm
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from pgnano.stats_analysis.abstract_plotter import abstract_plot
from pgnano.stats_analysis.models import SklearnModel
from pgnano.stats_analysis.parameter_search import linear_parameter_search

In [None]:
signal_data, chunked_data = pgndata.flatten_sample_data(pgnprim.PGPoreType.P10_4_1, 100)

In [None]:
report = run_mapper_reducer(pgnmodels.CppModel(), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.EvenOddTimestepCodingModel(), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.EvenOddDerivativeCodingModel(), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.StabilitySeparatorModel(), signal_data)
print(report)

In [7]:
report = run_mapper_reducer(pgnmodels.SklearnModel(LinearRegression()), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.SklearnModel(MLPRegressor(hidden_layer_sizes=[3,3,3], activation='relu', max_iter=2000)), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.DerivativeContextModel(), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.DerivativeContextModel2([3]), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.DerivativeContextModel3(), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.DerivativeContextModel4([3]), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.DerivativeContextModel4([3]), chunked_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.DerivativeContextModel4([10, 20]), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.DerivativeContextModel4([3,5,10]), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.DerivativeContextModel4([3,5,10,20]), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.DerivativeContextModel4([1,2,3,5,10,20,30]), signal_data)
print(report)

In [None]:
res = linear_parameter_search([
    [1],
    [2],
    [3],
    [5],
    [10],
    [3,5,10],
    [1,2,3],
    [3,5,10,20],
    [1,2,3,5,10,20,30]
], signal_data)
print(res)

In [None]:
report = run_mapper_reducer(pgnmodels.RunCodingModel(), signal_data)
print(report)

In [None]:
report = run_mapper_reducer(pgnmodels.CppModel(), chunked_data)
#print(len(list(filter(lambda x: x.compression_ratio > 1, res))))
print(report)

In [None]:
def mapper_nibble(x):
    signal = x[1]
    idx = x[0]
    model = pgnmodels.NibbleSimpleErrorModel()
    vbz_model = pgnmodels.VbzModel()
    signal_length = len(signal)
    code_len = model.get_code_lenght(signal)
    uncompressed_bits = signal_length * 16
    compression_ratio = code_len / uncompressed_bits
    is_better_compressed = code_len < uncompressed_bits
    return MapperReportResult(idx, code_len, signal_length, uncompressed_bits, compression_ratio, is_better_compressed, vbz_model.get_code_lenght(signal))


with mp.Pool(os.cpu_count()) as p:
    res = list(tqdm(p.imap(mapper_nibble,enumerate(signal_data)), total=len(signal_data)))

print(len(list(filter(lambda x: x.compression_ratio > 1, res))))

report = ReducerReportResult(
    macro_avg_ratio=st.mean(map(lambda x: x.compression_ratio, res)),
    micro_avg_ratio= (sum(map(lambda x: x.code_len, res))) / (sum(map(lambda x: x.uncompressed_bits, res))),
    number_better_compressed=sum(map(lambda x: 1 if x.is_better_compressed else 0, res)),
    total_number=len(res),
    percentage_better_compressed=100*(sum(map(lambda x: 1 if x.is_better_compressed else 0, res)) / len(res)),
    against_vbz_micro_avg_ratio= (sum(map(lambda x: x.code_len, res))) / (sum(map(lambda x: x.vbz_compressed_bits, res)))
)
print(report)

In [None]:
start=2250
size=250
limit=start + size
one_signal = signal_data[0]
prev = 0
predicted_signal = []
for signal in one_signal:
    predicted_signal.append(prev)
    prev = signal
#plt.plot(one_signal[start:limit])
#plt.plot(predicted_signal[start:limit])
plt.plot(list(map(lambda x: abs(x[0] - x[1]), zip(one_signal[start:limit], predicted_signal[start:limit]))))


In [None]:
start=2250
size=50
model = SklearnModel(LinearRegression())
one_signal = signal_data[0]
abstract_plot(model, one_signal, start, size)

In [None]:
start=2250
size=250
model = SklearnModel(MLPRegressor(hidden_layer_sizes=[3,3,3], activation='relu', max_iter=2000))
one_signal = signal_data[0]
abstract_plot(model, one_signal, start, size)

In [None]:
mapped_signal = []
prev_val = 0
for x in one_signal:
    mapped_signal.append(int(x) - prev_val)
    prev_val = int(x)
mapped_signal = list(map(lambda x: 2*x if x >= 0 else -2*x - 1, mapped_signal[1:250]))
plt.plot(mapped_signal)
print(f"mean: {st.mean(mapped_signal)}")
print(f"stddev: {st.stdev(mapped_signal)}")

In [None]:
low_hg, high_hg = pgnprim.split_by_bytes_hg(signal_data[0])
hg1, hg2, hg3, hg4 = pgnprim.split_by_nibbles_hg(signal_data[0])

In [None]:
low_x = low_hg.get_indexes()
low_y = low_hg.get_data()

plt.plot(low_x, low_y)

In [None]:
high_x = high_hg.get_indexes()
high_y = high_hg.get_data()

plt.plot(high_x, high_y)

In [None]:
nibble1_x = hg1.get_indexes()
nibble1_y = hg1.get_data()

plt.plot(nibble1_x, nibble1_y)

In [None]:
nibble2_x = hg2.get_indexes()
nibble2_y = hg2.get_data()

plt.plot(nibble2_x, nibble2_y)

In [None]:
nibble3_x = hg3.get_indexes()
nibble3_y = hg3.get_data()

plt.plot(nibble3_x, nibble3_y)

In [None]:
nibble4_x = hg4.get_indexes()
nibble4_y = hg4.get_data()

plt.plot(nibble4_x, nibble4_y)

In [None]:
# Estimate code lenght
d = signal_data[:1000]
#print(len(list(filter(lambda x: x < 1 << 12,d))))
model = pgnprim.CppModel()
model.get_code_lenght(d)