In [None]:
%load_ext autoreload
%autoreload 2

# add src to sys-path
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
sys.path.append('./src')
from matplotlib.pyplot import figure
from geopy.distance import geodesic
from sklearn.decomposition import PCA

from data_processing.mtb_data_provider import MtbDataProvider
from data_processing.mtb_data_tagger import MtbDataTagger
from data_processing.mtb_classifier import MtbClassifier
from visualization.mtb_visualizer import MtbVisualizer
from data_processing.helpers import Helpers

from numpy.random import seed
import tensorflow
tensorflow.random.set_seed(42)
seed(42)
np.random.seed(42)

In [None]:
mtb_data_provider = MtbDataProvider()
mtb_classifier = MtbClassifier()

LATITUDE_KEY = 'position_lat'
LONGITUDE_KEY = 'position_long'

## Create windowed samples

In [None]:
dataset_filename='mtb_dataset_20201007'
input_columns = ['timestamp', 'SensorAccelerationX_HD', 'SensorAccelerationY_HD', 'SensorAccelerationZ_HD', 'SensorSpeed', 'heart_rate', 'altitude', 'SensorHeading', LATITUDE_KEY, LONGITUDE_KEY]
label_columns=['osm_mtb:scale', 'osm_surface', 'trailforks_difficulty', 'trailforks_physical_rating', 'trailforks_difficulty_user_avg', 'trailforks_amtb-rating']
window_lengths = [150, 300, 600, 1500]
sub_sample_lengths = [50, 150, 300, 600]

In [None]:
mtb_data_provider.prepare_and_save_samples(
    dataset_filename, 
    input_columns, 
    label_columns,
    window_lengths=window_lengths, 
    sub_sample_lengths=sub_sample_lengths, 
    force_overwrite=True,
    auto_padd_left_right=True,
    min_cluster_size=1)

## Classification

In [None]:
input_columns = ['SensorAccelerationX_HD', 'SensorAccelerationY_HD', 'SensorAccelerationZ_HD', 'SensorSpeed', 'heart_rate', 'altitude', 'SensorHeading']

classifier_names = [
    "KNeighborsClassifier", 
    "DecisionTreeClassifier", 
    "RandomForestClassifier", 
    "MLPClassifier"


label_columns = [
    ('osm_surface', '0'),
    ('osm_mtb:scale', 0),
    ('trailforks_difficulty', 0),
    ('trailforks_trailtype', 0),
]

### Classification on all data

In [None]:
for label_column, ignore_label in label_columns:
    mtb_classifier.run_classification(
        dataset_input = dataset_filename,     
        input_columns = input_columns, 
        label_column = label_column,
        ignore_label=ignore_label,
        window_lengths=window_lengths, 
        classifier_names = classifier_names, 
        mtb_data_provider = mtb_data_provider, 
        mtb_visualizer = mtb_visualizer, 
        n_splits = 5, 
        step_size=1,
        label_appearance_threshold=0.3,
        clear_outliers = False, 
        save_plots = True)

In [None]:
for label_column, _ in label_columns:
    omtb_scale = np.load('evaluation/mtb_dataset_20201007_' + label_column + '.npy', allow_pickle=True)

    for name, result in dict(omtb_scale.all()).items():
        folds = np.concatenate(result)
        class_count = name.split('_')[-1]
        guessing = 1/float(class_count)
        
        if 'adjusted' not in name:
            print(name)
            print("\tClasses:", class_count)
            print("\tAccuracy: %0.2f (+/- %0.2f)" % (folds.mean(), folds.std() * 2), "\t\tGuessing would be: %0.2f" % guessing)
        else:
            print("\tAdjusted Accuracy: %0.2f (+/- %0.2f)" % (folds.mean(), folds.std() * 2))
    print("\n")

### Classification for isolated sensor measurements

In [None]:
input_sets = [['SensorAccelerationX_HD', 'SensorAccelerationY_HD', 'SensorAccelerationZ_HD'], ['SensorSpeed'], ['heart_rate'], ['altitude'], ['SensorHeading']]

for input_columns in input_sets:
    print("=========================================================================================================")
    print("=========\n", input_columns, "=========\n")
    print("=========================================================================================================")    
    
    for label_column, ignore_label in label_columns:
        mtb_classifier.run_classification(
            dataset_input = dataset_filename,     
            input_columns = input_columns, 
            label_column = label_column,
            ignore_label=ignore_label,
            window_lengths=window_lengths, 
            classifier_names = classifier_names, 
            mtb_data_provider = mtb_data_provider, 
            mtb_visualizer = mtb_visualizer, 
            n_splits = 5, 
            step_size=1,
            label_appearance_threshold=0.3,
            clear_outliers = False, 
            save_plots = False)
        
        omtb_scale = np.load('evaluation/mtb_dataset_20201007_' + label_column + '.npy', allow_pickle=True)
        
        for name, result in dict(omtb_scale.all()).items():
            folds = np.concatenate(result)
            class_count = name.split('_')[-1]
            guessing = 1/float(class_count)

            if 'adjusted' not in name:
                print(name)
                print("\tAccuracy: %0.2f (+/- %0.2f)" % (folds.mean(), folds.std() * 2), "\t\tGuessing would be: %0.2f" % guessing)
            else:
                print("\tAdjusted Accuracy: %0.2f (+/- %0.2f)" % (folds.mean(), folds.std() * 2))
        print("\n")