In [1]:
import json
from random import randint, sample
import pandas as pd
import numpy as np
import pickle
np.set_printoptions(suppress=True)

pd.set_option('display.max_rows', 150)

with open('../../data/nonwear-check/R/results/annotations.json', 'r') as f:
    annotations = json.load(f)

record_annotation_index = {}
for record_annotation in annotations['record_annotations']:
    if record_annotation['id'] not in record_annotation_index:
        record_annotation_index[record_annotation['id']] = {}
    id = record_annotation['id']
    record_annotation.pop('id')
    record_annotation_index[id].update(record_annotation)

segment_annotation_index = {}
for segment_annotation in annotations['segment_annotations']:
    if segment_annotation['id'] not in segment_annotation_index:
        segment_annotation_index[segment_annotation['id']] = {}
    id = segment_annotation['id']
    segment_annotation.pop('id')
    segment_annotation_index[id].update(segment_annotation)
# segment_annotation_index

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.utils import shuffle
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

import xgboost
import treelite

# from nni.feature_engineering.gradient_selector import FeatureGradientSelector

# import sys
# sys.path.append("/data/workspace/NonWear-Check")

In [3]:
df_features = pd.read_csv('../../data/nonwear-check/R/results/features_ppg-g_scale-ar.csv', index_col=None)
# df_features = df_features.astype(np.float32)
# df_features

In [4]:
for x in df_features.columns:
    if 'ppg__fft' in x:
        print(x)

ppg__fft_aggregated__aggtype_"centroid"
ppg__fft_aggregated__aggtype_"kurtosis"
ppg__fft_aggregated__aggtype_"skew"
ppg__fft_aggregated__aggtype_"variance"
ppg__fft_coefficient__coeff_0__attr_"abs"
ppg__fft_coefficient__coeff_0__attr_"angle"
ppg__fft_coefficient__coeff_0__attr_"imag"
ppg__fft_coefficient__coeff_0__attr_"real"
ppg__fft_coefficient__coeff_10__attr_"abs"
ppg__fft_coefficient__coeff_10__attr_"angle"
ppg__fft_coefficient__coeff_10__attr_"imag"
ppg__fft_coefficient__coeff_10__attr_"real"
ppg__fft_coefficient__coeff_11__attr_"abs"
ppg__fft_coefficient__coeff_11__attr_"angle"
ppg__fft_coefficient__coeff_11__attr_"imag"
ppg__fft_coefficient__coeff_11__attr_"real"
ppg__fft_coefficient__coeff_12__attr_"abs"
ppg__fft_coefficient__coeff_12__attr_"angle"
ppg__fft_coefficient__coeff_12__attr_"imag"
ppg__fft_coefficient__coeff_12__attr_"real"
ppg__fft_coefficient__coeff_13__attr_"abs"
ppg__fft_coefficient__coeff_13__attr_"angle"
ppg__fft_coefficient__coeff_13__attr_"imag"
ppg__fft_coe

In [5]:
df_objects = pd.read_csv('../../data/nonwear-check/R/results/objects_ppg-g_scale.csv', index_col=None)
# df_objects

In [6]:
from tsfresh.feature_selection.significance_tests import target_binary_feature_real_test
from tsfresh.feature_selection.relevance import calculate_relevance_table

X_columns2select = [c for c in df_features.columns if c[:3] == 'ppg']
relevance_table = calculate_relevance_table(df_features.loc[:, X_columns2select], df_features['wear_category_id'])
relevance_table.to_csv('../../data/nonwear-check/R/results/calculate_relevance_table.csv', index=None)

In [7]:
relevance_table_selected = relevance_table[relevance_table['p_value'] == 0]['feature']
# relevance_table_selected

In [9]:
X_columns = [
    'ppg__kurtosis',
    'ppg__absolute_sum_of_changes',
    'ppg__autocorrelation__lag_1',
    'ppg__autocorrelation__lag_2',
    'ppg__autocorrelation__lag_3',
    'ppg__autocorrelation__lag_4',
    'ppg__autocorrelation__lag_5',
    'ppg__autocorrelation__lag_6',
    'ppg__spkt_welch_density__coeff_2',
    'ppg__spkt_welch_density__coeff_5',
    'ppg__spkt_welch_density__coeff_8',
    'ppg__ar_0',
    'ppg__ar_1',
    'ppg__ar_2',
    'ppg__ratio_value_number_to_time_series_length',
    'ppg__sum_of_reoccurring_data_points',
    'ppg__percentage_of_reoccurring_values_to_all_values',
    'ppg__iqrs',
    'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_0.2__ql_0.0',
    'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_0.4__ql_0.0',
    'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_0.8__ql_0.0',
    'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_1.0__ql_0.0',
    'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_1.0__ql_0.2',
    'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_0.6__ql_0.4',
    'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_0.8__ql_0.4',
    'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_1.0__ql_0.4',
    'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_1.0__ql_0.8',
    'ppg__change_quantiles__f_agg_"mean"__isabs_True__qh_0.2__ql_0.0',
    'ppg__change_quantiles__f_agg_"mean"__isabs_True__qh_0.4__ql_0.0',
    'ppg__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.0',
    'ppg__change_quantiles__f_agg_"mean"__isabs_True__qh_0.4__ql_0.2',
    'ppg__change_quantiles__f_agg_"mean"__isabs_True__qh_0.6__ql_0.2',
    'ppg__change_quantiles__f_agg_"mean"__isabs_True__qh_0.6__ql_0.4',
    'ppg__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.4',
    'ppg__change_quantiles__f_agg_"mean"__isabs_True__qh_1.0__ql_0.6']

print(len(X_columns))

y_column = 'wear_category_id'

# with open('/data/workspace/data/nonwear-check/R/results/X_columns_g.pickle', 'wb') as f:
#     pickle.dump(X_columns, f)
X_columns

35


['ppg__kurtosis',
 'ppg__absolute_sum_of_changes',
 'ppg__autocorrelation__lag_1',
 'ppg__autocorrelation__lag_2',
 'ppg__autocorrelation__lag_3',
 'ppg__autocorrelation__lag_4',
 'ppg__autocorrelation__lag_5',
 'ppg__autocorrelation__lag_6',
 'ppg__spkt_welch_density__coeff_2',
 'ppg__spkt_welch_density__coeff_5',
 'ppg__spkt_welch_density__coeff_8',
 'ppg__ar_0',
 'ppg__ar_1',
 'ppg__ar_2',
 'ppg__ratio_value_number_to_time_series_length',
 'ppg__sum_of_reoccurring_data_points',
 'ppg__percentage_of_reoccurring_values_to_all_values',
 'ppg__iqrs',
 'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_0.2__ql_0.0',
 'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_0.4__ql_0.0',
 'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_0.8__ql_0.0',
 'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_1.0__ql_0.0',
 'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_1.0__ql_0.2',
 'ppg__change_quantiles__f_agg_"var"__isabs_False__qh_0.6__ql_0.4',
 'ppg__change_quantiles__f_agg_"var"

In [29]:
# X_columns_g  = pickle.load(open('../../data/nonwear-check/R/results/X_columns_g.pickle', 'rb'))
# X_columns_ir = pickle.load(open('../../data/nonwear-check/R/results/X_columns_ir.pickle', 'rb'))

# X_columns = X_columns_g.copy()
# for c in X_columns_ir:
#     if c not in X_columns_g:
#         X_columns.append(c)
# X_columns

In [10]:
params = {'max_depth': 3, 'objective':'binary:logistic', 'n_estimators': 1}

segment_ids = df_features['segment_id'].unique()
probs_list = []
preds_list = []
y_test_list = []
segment_score = {}
for segment_id in segment_ids:
    test_index = df_features['segment_id'] == segment_id
    X_train, y_train = df_features.loc[~test_index, X_columns].values, df_features.loc[~test_index, y_column].values
    X_test,  y_test  = df_features.loc[test_index, X_columns].values, df_features.loc[test_index, y_column].values

    D_train = xgboost.DMatrix(X_train, label=y_train)
    D_test = xgboost.DMatrix(X_test, label=y_test)

    bst = xgboost.train(params, D_train, 25, [(D_train, 'train')], verbose_eval=False)

    probs = bst.predict(D_test)
    preds = np.array(probs) > 0.5
    probs_list.extend(probs)
    preds_list.extend(preds)
    y_test_list.extend(list(y_test))

    print(segment_id, accuracy_score(y_test, preds))
    segment_score[segment_id] = {'accuracy_score': accuracy_score(y_test, preds), 'error_position_in_segment': np.where(y_test != preds)[0] * 64}

Parameters: { n_estimators } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


1586616076 0.9615384615384616
Parameters: { n_estimators } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


1586699587 0.984375
Parameters: { n_estimators } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


1586616942 0.9923076923076923
Parameters: { n_estimators } might not be used.

  This 

In [12]:
len(df_features[df_features['segment_id'] == 1586616968])

130

In [12]:
for segment_id, value in segment_score.items():
    if value['accuracy_score'] <= 0.9:
        record_id = segment_annotation_index[int(segment_id)]['record_id']
        print(record_annotation_index[record_id])
        print(value['accuracy_score'])
        print(value['error_position_in_segment'])
        print("========================================================\n")

accuracy_score(y_test_list, np.array(probs_list) > 0.5)


{'name': '20200411-yuwenhan-002.csv', 'creation_date': '2020/4/11'}
0.7615384615384615
[  64  128  320  384  512 1472 1536 1792 1856 2048 2112 2240 2368 2432
 2624 2752 2816 3392 4032 4096 4864 4928 5056 5120 5376 5440 5504 5568
 5888 7360 7808]

{'name': '20200411_blackglass_001.csv', 'creation_date': '2020/4/11'}
0.8990825688073395
[ 768  832 1024 1088 1728 3328 3392 3456 3968 4032 4096]

{'name': '20200412_red_lowlight_001.csv', 'creation_date': '2020/4/12'}
0.7384615384615385
[ 128 1280 1600 1664 1728 1984 2816 2880 3072 3456 3520 3584 3776 3840
 4224 4288 4416 4480 4544 4608 4800 4864 4928 5248 5312 5376 5696 5888
 6400 6656 7296 7360 7872 7936]

{'name': '20200412_phoneshanning_003.csv', 'creation_date': '2020/4/12'}
0.6692307692307692
[   0   64  448  512  640 1408 2240 2304 2752 2816 2880 2944 3072 3136
 3264 3648 3712 3776 4160 4224 4544 4864 4928 4992 5056 5440 5504 5632
 5952 6272 6400 6528 6592 6656 6720 6784 6976 7488 7552 7680 7872 7936
 8000]

{'name': '20200410_bluesurf

0.9764950907468015

In [13]:
params = {'max_depth': 3, 'objective':'binary:logistic', 'n_estimators': 1}

X = df_features[X_columns].values
y = df_features[y_column].values

D = xgboost.DMatrix(X, label=y)

bst = xgboost.train(params, D, 25, [(D, 'train')])

probs = bst.predict(D)
preds = np.array(probs) > 0.5
accuracy = accuracy_score(y, preds)
print("overall accuracy: ", accuracy)
print(np.sum(y == 0))
print(np.sum(y == 1))
tn, fp, fn, tp = confusion_matrix(y, preds).ravel()
print(tn, fp, fn, tp)

Parameters: { n_estimators } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[0]	train-error:0.02003
[1]	train-error:0.01994
[2]	train-error:0.01745
[3]	train-error:0.01765
[4]	train-error:0.01716
[5]	train-error:0.01597
[6]	train-error:0.01468
[7]	train-error:0.01240
[8]	train-error:0.01200
[9]	train-error:0.01190
[10]	train-error:0.01101
[11]	train-error:0.00962
[12]	train-error:0.00942
[13]	train-error:0.00843
[14]	train-error:0.00793
[15]	train-error:0.00774
[16]	train-error:0.00724
[17]	train-error:0.00734
[18]	train-error:0.00635
[19]	train-error:0.00585
[20]	train-error:0.00605
[21]	train-error:0.00575
[22]	train-error:0.00565
[23]	train-error:0.00526
[24]	train-error:0.00545
overall accuracy:  0.9945452742239412
6078
4005
6066 12 43 3962


In [14]:
model = treelite.Model.from_xgboost(bst)

toolchain = 'gcc'
platform = 'unix'

model.export_srcpkg(platform=platform, toolchain=toolchain,
                    pkgpath='../../data/nonwear-check/R/results/model_G_20200420-001.zip', libname='mymodel.so',
                    verbose=True)

[22:18:27] /io/treelite/src/frontend/xgboost.cc:359: Global bias of the model: 0.5
[22:18:27] /io/treelite/src/frontend/xgboost.cc:397: gbm_param_.num_feature = 0
[22:18:27] /io/treelite/src/frontend/xgboost.cc:398: gbm_param_.num_output_group = 0
[22:18:27] /io/treelite/src/compiler/ast_native.cc:22: Using ASTNativeCompiler
[22:18:27] /io/treelite/src/compiler/ast/split.cc:10: Parallel compilation disabled; all member trees will be dumped to a single source file. This may increase compilation time and memory usage.
[22:18:27] /io/treelite/src/c_api/c_api.cc:297: Code generation finished. Writing code to files...
[22:18:27] /io/treelite/src/c_api/c_api.cc:314: Writing file recipe.json...
[22:18:27] /io/treelite/src/c_api/c_api.cc:314: Writing file main.c...
[22:18:27] /io/treelite/src/c_api/c_api.cc:314: Writing file header.h...


In [None]:
record = pd.read_csv('/data/workspace/data/佩戴检测/EA301476695E_Data_2cm_blackdesk_1.csv', header=None, index_col=None)
g_value = record[record[0] == 1].values[::2, 1]
ir_value = record[record[0] == 4].values[::2, 1]

In [None]:
print(g_value.shape, ir_value.shape)

In [None]:
np.set_printoptions(suppress=True)
np.savetxt('/data/workspace/data/g_value.txt', g_value, fmt="%d,")
np.savetxt('/data/workspace/data/ir_value.txt', ir_value, fmt="%d,")

## 验证c和仿真一致性

In [25]:
ppg = df_objects[df_objects['id'] == 6785].ppg.values
list(ppg)

[12855,
 12854,
 12853,
 12856,
 12854,
 12851,
 12854,
 12854,
 12853,
 12861,
 12855,
 12859,
 12858,
 12859,
 12861,
 12858,
 12860,
 12860,
 12859,
 12860,
 12859,
 12860,
 12858,
 12859,
 12859,
 12860,
 12858,
 12858,
 12857,
 12857,
 12857,
 12857,
 12858,
 12857,
 12854,
 12854,
 12856,
 12856,
 12854,
 12852,
 12855,
 12854,
 12856,
 12854,
 12853,
 12857,
 12854,
 12854,
 12853,
 12853,
 12856,
 12854,
 12858,
 12859,
 12859,
 12858,
 12859,
 12861,
 12860,
 12861,
 12860,
 12859,
 12865,
 12864]

In [26]:
for x in X_columns:
    print(df_features[df_features['id'] == 6785][x])

6785   -0.339289
Name: ppg__kurtosis, dtype: float64
6785    105.0
Name: ppg__absolute_sum_of_changes, dtype: float64
6785    0.66588
Name: ppg__autocorrelation__lag_1, dtype: float64
6785    0.606097
Name: ppg__autocorrelation__lag_2, dtype: float64
6785    0.579926
Name: ppg__autocorrelation__lag_3, dtype: float64
6785    0.476099
Name: ppg__autocorrelation__lag_4, dtype: float64
6785    0.486186
Name: ppg__autocorrelation__lag_5, dtype: float64
6785    0.275259
Name: ppg__autocorrelation__lag_6, dtype: float64
6785    102.921234
Name: ppg__spkt_welch_density__coeff_2, dtype: float64
6785    2.586082
Name: ppg__spkt_welch_density__coeff_5, dtype: float64
6785    0.002141
Name: ppg__spkt_welch_density__coeff_8, dtype: float64
6785    0.471558
Name: ppg__ar_0, dtype: float64
6785    0.273973
Name: ppg__ar_1, dtype: float64
6785    0.080493
Name: ppg__ar_2, dtype: float64
6785    0.203125
Name: ppg__ratio_value_number_to_time_series_length, dtype: float64
6785    771418.0
Name: ppg__sum

In [None]:
df = pd.DataFrame({'ppg': ppg})
df['id'] = 1
# df['ppg'] = df['ppg']

from tsfresh.examples.har_dataset import download_har_dataset, load_har_dataset, load_har_classes
import seaborn as sns
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters
from extract_features import ar_feature

extraction_settings = ComprehensiveFCParameters()

def iqrs_feature(df_objects, cols2extract):
    print(df_objects.groupby('id')['ppg'].quantile(q=0.75), print(df_objects.groupby('id')['ppg'].quantile(q=0.25)))
    print(np.quantile(ppg, q=0.25), np.quantile(ppg, q=0.75))
    iqrs = df_objects.groupby('id')['ppg'].quantile(
        q=0.75) - df_objects.groupby('id')['ppg'].quantile(q=0.25)
    return pd.DataFrame({'ppg__iqrs': iqrs})

df_iqrs = iqrs_feature(df, ['ppg'])
df_tsfresh_featurs = extract_features(df, column_id='id', impute_function=impute, default_fc_parameters=extraction_settings)
df_ar = ar_feature(df_objects)

df_features = pd.concat([df_tsfresh_featurs, df_iqrs, df_ar], axis=1)

In [27]:
d = xgboost.DMatrix(df_features[df_features['id'] == 6785][X_columns].values)
bst.predict(d)

array([0.99801636], dtype=float32)

In [None]:
np.var(a)

In [None]:
from glob import glob

record_paths = glob('/data/workspace/data/records/*.csv')

sum = 0
num = 0
for rp in record_paths:
    record = pd.read_csv(rp, header=None, index_col=None)
    ir = record[record[0] == 4].iloc[:, 1].values
    if ir.shape[0] < 10: continue
    print(ir.max(), ir.min())
    sum += np.sum(ir)
    num += ir.shape[0]

In [None]:
np.log2(5000000) - np.log2(5010000)

In [82]:
x = np.asarray([7, 7, 9, 1, 2, 3, 12, 3, 5, 6, 6, 8, 9])
bin_cat = pd.qcut(x, [0.1, 0.6], labels=False)
bin_cat_0 = bin_cat == 0
bin_cat_0

array([ True,  True, False, False, False,  True, False,  True,  True,
        True,  True, False, False])

In [30]:
x_re[:, np.newaxis]

array([[[1, 2]],

       [[2, 3]],

       [[3, 4]],

       [[4, 5]],

       [[5, 6]],

       [[6, 7]],

       [[7, 8]],

       [[8, 9]]])

In [31]:
x_re[np.newaxis, :]

array([[[1, 2],
        [2, 3],
        [3, 4],
        [4, 5],
        [5, 6],
        [6, 7],
        [7, 8],
        [8, 9]]])

In [32]:
x_re[:, np.newaxis] - x_re[np.newaxis, :]

array([[[ 0,  0],
        [-1, -1],
        [-2, -2],
        [-3, -3],
        [-4, -4],
        [-5, -5],
        [-6, -6],
        [-7, -7]],

       [[ 1,  1],
        [ 0,  0],
        [-1, -1],
        [-2, -2],
        [-3, -3],
        [-4, -4],
        [-5, -5],
        [-6, -6]],

       [[ 2,  2],
        [ 1,  1],
        [ 0,  0],
        [-1, -1],
        [-2, -2],
        [-3, -3],
        [-4, -4],
        [-5, -5]],

       [[ 3,  3],
        [ 2,  2],
        [ 1,  1],
        [ 0,  0],
        [-1, -1],
        [-2, -2],
        [-3, -3],
        [-4, -4]],

       [[ 4,  4],
        [ 3,  3],
        [ 2,  2],
        [ 1,  1],
        [ 0,  0],
        [-1, -1],
        [-2, -2],
        [-3, -3]],

       [[ 5,  5],
        [ 4,  4],
        [ 3,  3],
        [ 2,  2],
        [ 1,  1],
        [ 0,  0],
        [-1, -1],
        [-2, -2]],

       [[ 6,  6],
        [ 5,  5],
        [ 4,  4],
        [ 3,  3],
        [ 2,  2],
        [ 1,  1],
        [ 0,  0]

In [33]:
np.abs(x_re[:, np.newaxis] - x_re[np.newaxis, :])

array([[[0, 0],
        [1, 1],
        [2, 2],
        [3, 3],
        [4, 4],
        [5, 5],
        [6, 6],
        [7, 7]],

       [[1, 1],
        [0, 0],
        [1, 1],
        [2, 2],
        [3, 3],
        [4, 4],
        [5, 5],
        [6, 6]],

       [[2, 2],
        [1, 1],
        [0, 0],
        [1, 1],
        [2, 2],
        [3, 3],
        [4, 4],
        [5, 5]],

       [[3, 3],
        [2, 2],
        [1, 1],
        [0, 0],
        [1, 1],
        [2, 2],
        [3, 3],
        [4, 4]],

       [[4, 4],
        [3, 3],
        [2, 2],
        [1, 1],
        [0, 0],
        [1, 1],
        [2, 2],
        [3, 3]],

       [[5, 5],
        [4, 4],
        [3, 3],
        [2, 2],
        [1, 1],
        [0, 0],
        [1, 1],
        [2, 2]],

       [[6, 6],
        [5, 5],
        [4, 4],
        [3, 3],
        [2, 2],
        [1, 1],
        [0, 0],
        [1, 1]],

       [[7, 7],
        [6, 6],
        [5, 5],
        [4, 4],
        [3, 3],
        [2

In [43]:
np.max(np.abs(x_re[:, np.newaxis] - x_re[np.newaxis, :]), axis=2) <= 2

array([[ True,  True,  True, False, False, False, False, False],
       [ True,  True,  True,  True, False, False, False, False],
       [ True,  True,  True,  True,  True, False, False, False],
       [False,  True,  True,  True,  True,  True, False, False],
       [False, False,  True,  True,  True,  True,  True, False],
       [False, False, False,  True,  True,  True,  True,  True],
       [False, False, False, False,  True,  True,  True,  True],
       [False, False, False, False, False,  True,  True,  True]])

In [45]:
C = np.sum(np.max(np.abs(x_re[:, np.newaxis] - x_re[np.newaxis, :]),
                  axis=2) <= 2, axis=0)
C
# np.sum(np.log(C))

array([3, 4, 5, 5, 5, 5, 4, 3])

In [None]:
C = np.sum(np.max(np.abs(x_re[:, np.newaxis] - x_re[np.newaxis, :]),
                  axis=2) <= r, axis=0) / (N - m + 1)
return np.sum(np.log(C)) / (N - m + 1.0)