In [1]:
from sklearn.ensemble import RandomForestClassifier
import sklearn.model_selection
import pandas as pd
import sqlite3

from datetime import datetime as dt

from dataset_util import preprocess, uci_mhealth, uci_pamap2
from dataset_util.extract_input_features import all_feature, extract_features
import matplotlib.pyplot as plt

import numpy as np
from config import SQLITE_DATABASE_FILE, TRAINING_SET_PROPORTION
from scikitplot.metrics import plot_confusion_matrix, plot_roc
from evaluate_classification import evaluation_metrics
from cross_validation import cross_validate_multiclass_group_kfold


In [8]:
with sqlite3.connect(SQLITE_DATABASE_FILE) as conn:
    # features = pd.read_sql_query(uci_mhealth.raw_table_valid_data_query, conn)
    data = pd.read_sql_query(uci_mhealth.raw_table_query_shared_data, conn)
    
sliding_windows = preprocess.full_df_to_sliding_windows(data)
features = extract_features(sliding_windows, all_feature)
clsf = RandomForestClassifier(n_estimators=500, class_weight="balanced", n_jobs=-1)

result = cross_validate_multiclass_group_kfold(clsf, *preprocess.to_classification(features), n_splits=5)
mean_result = { m : np.mean(v) for (m,v) in result.items()}

print("cv results:")
print("-"*50)
result_df = pd.DataFrame(mean_result.items(), columns=["metric", "result"])
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(result_df)

cv results:
--------------------------------------------------
                    metric     result
0                 fit_time   5.127285
1               score_time  10.087956
2      test_total_accuracy   0.942646
3     train_total_accuracy   1.000000
4     test_sensitivity_1.0   0.990067
5    train_sensitivity_1.0   1.000000
6     test_specificity_1.0   0.996774
7    train_specificity_1.0   1.000000
8        test_f1_score_1.0   0.957824
9       train_f1_score_1.0   1.000000
10            test_auc_1.0   0.998863
11           train_auc_1.0   1.000000
12    test_sensitivity_2.0   1.000000
13   train_sensitivity_2.0   1.000000
14    test_specificity_2.0   0.895135
15   train_specificity_2.0   1.000000
16       test_f1_score_2.0   0.930242
17      train_f1_score_2.0   1.000000
18            test_auc_2.0   0.996189
19           train_auc_2.0   1.000000
20    test_sensitivity_3.0   0.999360
21   train_sensitivity_3.0   1.000000
22    test_specificity_3.0   1.000000
23   train_specificity_3.