In [40]:
import glob
from datetime import datetime
import os

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats, signal
from scipy.stats import mode
from scipy.fft import fft
from sklearn.ensemble import RandomForestClassifier as RFC, ExtraTreesClassifier as ETC
from sklearn.model_selection import RepeatedStratifiedKFold, cross_val_score, StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

import lightgbm as lgb
import xgboost as xgb

from tqdm import tqdm, trange

# Preprocessed feature plugin

In [41]:
def remove_acc():
    def get_streams(x_data):
        speed, acc = get_speed_acc(x_data)
        x_data = pd.concat([x_data, speed], axis=1)
        x_data = get_all_joint_distances(x_data)
        return x_data

    def get_features(x_data):
        features = []
        cols = x_data.columns.tolist()
        #Calculate features (STD, Average, Max, Min, Median, Variance) for each data columns X Y Z 
        for k in cols:
            features.append(x_data[k].std(ddof=0))
            # features.append(np.average(x_data[k]))
            features.append(np.max(x_data[k]))
            features.append(np.min(x_data[k]))
            features.append(np.median(x_data[k]))        
        return features
    joint_names = ['FH', 'TH', 'RH', 'RS', 'RO', 'RE', 'RW', 'LS', 'LE', 'LW', 'RA', 'LA', 'VS']
    pos_cols = [i+j for i in joint_names for j in ["_X", "_Y", "_Z"]]
    speed_cols = [f"{col}_speed" for col in pos_cols]
    # acc_cols = [f"{col}_acc" for col in pos_cols]
    distance_cols = ['dist_FH_LS', 'dist_FH_RS', 'dist_LS_LW', 'dist_RS_RW', 'dist_VS_LE', 'dist_VS_RE', 
        'dist_VS_LW', 'dist_VS_RW','dist_VS_RH', 'dist_VS_TH', 'dist_LW_RW', 'dist_LA_LW', 'dist_RA_RW', 
        'dist_LW_TH', 'dist_RW_TH', 'dist_TH_LA']
    colnames = pos_cols + speed_cols + distance_cols
    return get_streams, get_features, colnames

In [42]:
def angle_red_remove_acc():
    def get_streams(x_data):
        speed, acc = get_speed_acc(x_data)
        x_data = pd.concat([x_data, speed], axis=1)
        x_data = get_all_joint_distances(x_data)
        x_data = get_all_joint_angles_diff_red(x_data)
        return x_data

    def get_features(x_data):
        features = []
        cols = x_data.columns.tolist()
        #Calculate features (STD, Average, Max, Min, Median, Variance) for each data columns X Y Z 
        for k in cols:
            features.append(x_data[k].std(ddof=0))
            # features.append(np.average(x_data[k]))
            features.append(np.max(x_data[k]))
            features.append(np.min(x_data[k]))
            features.append(np.median(x_data[k]))        
        return features
    joint_names = ['FH', 'TH', 'RH', 'RS', 'RO', 'RE', 'RW', 'LS', 'LE', 'LW', 'RA', 'LA', 'VS']
    pos_cols = [i+j for i in joint_names for j in ["_X", "_Y", "_Z"]]
    speed_cols = [f"{col}_speed" for col in pos_cols]
    # acc_cols = [f"{col}_acc" for col in pos_cols]
    distance_cols = ['dist_FH_LS', 'dist_FH_RS', 'dist_LS_LW', 'dist_RS_RW', 'dist_VS_LE', 'dist_VS_RE', 
        'dist_VS_LW', 'dist_VS_RW','dist_VS_RH', 'dist_VS_TH', 'dist_LW_RW', 'dist_LA_LW', 'dist_RA_RW', 
        'dist_LW_TH', 'dist_RW_TH', 'dist_TH_LA']
    angle_dif_cols = ["LSLELW", "RSRERW", "RSLSLE", "LSRSRE", "VSRORH"]
    colnames = pos_cols + speed_cols + distance_cols + angle_dif_cols
    return get_streams, get_features, colnames

# Stream Extractors

In [43]:
def get_speed_acc(x_data):
    if "activity" in x_data.columns:
        x_data = x_data.drop(columns=["activity", "subject_id"])
    else:
        x_data = x_data.drop(columns=["subject_id"])
    speed = x_data.diff().fillna(0)
    acc = speed.diff().fillna(0)
    speed.columns = [f"{col}_speed" for col in speed.columns]
    acc.columns = [f"{col}_acc" for col in acc.columns]
    return speed, acc

def get_speed_acc_jerk(x_data):
    x_data = x_data.drop(columns=["activity", "subject_id"])
    speed = x_data.diff().fillna(0)
    acc = speed.diff().fillna(0)
    jerk = acc.diff().fillna(0)
    speed.columns = [f"{col}_speed" for col in speed.columns]
    acc.columns = [f"{col}_acc" for col in acc.columns]
    jerk.columns = [f"{col}_jerk" for col in acc.columns]
    return speed, acc, jerk

In [44]:
def joint_distance(x_data, joint1, joint2):
    """
    returns the distance between two joints. 
    """
    x1, y1, z1 = x_data[f"{joint1}_X"], x_data[f"{joint1}_Y"], x_data[f"{joint1}_Z"]
    x2, y2, z2 = x_data[f"{joint2}_X"], x_data[f"{joint2}_Y"], x_data[f"{joint2}_Z"]
    distance = np.sqrt((x2-x1)**2 + (y2-y1)**2 + (z2-z1)**2)
    return distance

def get_all_joint_distances(x_data):
    """
    calculates all the necessary joint distances from the `x_data`, 
    adds columns to it and returns the modified `x_data`.
    the two joints should not be essentially consecutive, 
    because the distance between two consecutive joints is always constant.
    For example, distance between left_wrist and left_elbow is always constant.
    """
    # joints
    # Front head        ->  left shoulder       (1->8)
    x_data["dist_FH_LS"] = joint_distance(x_data, "FH", "LS")
    # Front head        ->  right shoulder      (1->4)
    x_data["dist_FH_RS"] = joint_distance(x_data, "FH", "RS")
    # left shoulder     ->  left wrist          (8->10)
    x_data["dist_LS_LW"] = joint_distance(x_data, "LS", "LW")
    # right shoulder    ->  right wrist         (4->7)
    x_data["dist_RS_RW"] = joint_distance(x_data, "RS", "RW")
    # v sacral          ->  left elbow          (13->9)
    x_data["dist_VS_LE"] = joint_distance(x_data, "VS", "LE")
    # v sacral          ->  right elbow         (13->6)
    x_data["dist_VS_RE"] = joint_distance(x_data, "VS", "RE")
    # v sacral          ->  left wrist          (13->10)
    x_data["dist_VS_LW"] = joint_distance(x_data, "VS", "LW")
    # v sacral          ->  right wrist         (13->7)
    x_data["dist_VS_RW"] = joint_distance(x_data, "VS", "RW")
    # v sacral          ->  rear head           (13->3)
    x_data["dist_VS_RH"] = joint_distance(x_data, "VS", "RH")
    # v sacral          ->  top head            (13->2)
    x_data["dist_VS_TH"] = joint_distance(x_data, "VS", "TH")
    # left wrist        ->  right wrist         (10->7)
    x_data["dist_LW_RW"] = joint_distance(x_data, "LW", "RW")
    # left asis         ->  left wrist          (12->10)
    x_data["dist_LA_LW"] = joint_distance(x_data, "LA", "LW")
    # right asis        ->  right wrist         (11->7)
    x_data["dist_RA_RW"] = joint_distance(x_data, "RA", "RW")
    # left wrist        ->  top head            (10->2)
    x_data["dist_LW_TH"] = joint_distance(x_data, "LW", "TH")
    # right wrist       ->  top head            (7->2)
    x_data["dist_RW_TH"] = joint_distance(x_data, "RW", "TH")
    # top head          ->  left asis           (2->12)
    x_data["dist_TH_LA"] = joint_distance(x_data, "TH", "LA")
    return x_data



In [45]:
def joint_angle(x_data, joint1, joint2, joint3):
    x1, y1, z1 = x_data[f"{joint1}_X"], x_data[f"{joint1}_Y"], x_data[f"{joint1}_Z"]
    x2, y2, z2 = x_data[f"{joint2}_X"], x_data[f"{joint2}_Y"], x_data[f"{joint2}_Z"]
    x3, y3, z3 = x_data[f"{joint3}_X"], x_data[f"{joint3}_Y"], x_data[f"{joint3}_Z"]
    v1 = np.array([x2-x1, y2-y1, z2-z1]).T
    v2 = np.array([x3-x2, y3-y2, z3-z2]).T
    v1_unit = v1/np.expand_dims(np.linalg.norm(v1, axis=1), axis=1)
    v2_unit = v2/np.expand_dims(np.linalg.norm(v2, axis=1), axis=1)
    angle = np.arccos(np.sum(v1_unit*v2_unit, axis=1)) # dot multiplication
    return angle

def get_all_joint_angles(x_data):
    # joints
    # left shoulder     ->  left elbow      ->  left wrist      (8->9->10)
    x_data["angle_LS_LE_LW"] = joint_angle(x_data, "LS", "LE", "LW")
    # right shoulder    ->  right elbow     ->  right wrist     (4->6->7)
    x_data["angle_RS_RE_RW"] = joint_angle(x_data, "RS", "RE", "RW")
    # right shoulder    ->  left shoulder   ->  front head      (4->8->1)
    x_data["angle_RS_LS_FH"] = joint_angle(x_data, "RS", "LS", "FH")
    # right shoulder    ->  left shoulder   ->  left elbow      (4->8->9)
    x_data["angle_RS_LS_LE"] = joint_angle(x_data, "RS", "LS", "LE")
    # left shoulder     ->  right shoulder  ->  right elbow     (8->4->6)
    x_data["angle_LS_RS_RE"] = joint_angle(x_data, "LS", "RS", "RE")
    # v sacral          ->  right offset    ->  rear head       (13->5->3)
    x_data["angle_VS_RO_RH"] = joint_angle(x_data, "VS", "RO", "RH")
    # vsacral           ->  top head        ->  front head      (13->2->1)
    x_data["angle_VS_TH_FH"] = joint_angle(x_data, "VS", "TH", "FH")
    # v sacral          ->  left shoulder   ->  left elbow      (13->8->9)
    x_data["angle_VS_LS_LE"] = joint_angle(x_data, "VS", "LS", "LE")
    # v sacral          ->  right shoulder  ->  right elbow     (13->4->6)
    x_data["angle_VS_RS_RE"] = joint_angle(x_data, "VS", "RS", "RE")
    # left asis         ->  left shoulder   ->  left elbow      (12->8->9)
    x_data["angle_LA_LS_LE"] = joint_angle(x_data, "LA", "LS", "LE")
    # right asis        -> right shoulder   ->  right elbow     (11->4->6)
    x_data["angle_RA_RS_RE"] = joint_angle(x_data, "RA", "RS", "RE")
    return x_data

In [64]:
def get_all_joint_angles_diff_red(x_data):
    # joints
    x_data["ang_dif_LS_LE_LW"] = np.nan_to_num(np.append(np.diff(joint_angle(x_data, "LS", "LE", "LW")), 0)).tolist()
    x_data["ang_dif_RS_RE_RW"] = np.nan_to_num(np.append(np.diff(joint_angle(x_data, "RS", "RE", "RW")), 0)).tolist()
    x_data["ang_dif_RS_LS_LE"] = np.nan_to_num(np.append(np.diff(joint_angle(x_data, "RS", "LS", "LE")), 0)).tolist()
    x_data["ang_dif_LS_RS_RE"] = np.nan_to_num(np.append(np.diff(joint_angle(x_data, "LS", "RS", "RE")), 0)).tolist()
    x_data["ang_dif_VS_RO_RH"] = np.nan_to_num(np.append(np.diff(joint_angle(x_data, "VS", "RO", "RH")), 0)).tolist()
    return x_data

# train functions

In [47]:
def train_segmentation(df, overlap_rate, time_window):
    seg_data = []
    #convert overlap rate to step for sliding window
    overlap = int((1 - overlap_rate)*time_window)
    # interpolate
    df = df.interpolate().ffill().fillna(0)
    for i in range(0, len(df)-time_window+1, overlap):
        seg_data.append(df.loc[i:i+time_window-1, :].copy().reset_index(drop=True))
    return seg_data


def train_rename_columns(df):
    df.columns = [
        "FH_X", "FH_Y", "FH_Z",     #1
        "TH_X", "TH_Y", "TH_Z",     #2
        "RH_X", "RH_Y", "RH_Z",     #3
        "RS_X", "RS_Y", "RS_Z",     #4
        "RO_X", "RO_Y", "RO_Z",     #5
        "RE_X", "RE_Y", "RE_Z",     #6
        "RW_X", "RW_Y", "RW_Z",     #7
        "LS_X", "LS_Y", "LS_Z",     #8
        "LE_X", "LE_Y", "LE_Z",     #9
        "LW_X", "LW_Y", "LW_Z",     #10
        "RA_X", "RA_Y", "RA_Z",     #11
        "LA_X", "LA_Y", "LA_Z",     #12
        "VS_X", "VS_Y", "VS_Z",     #13
        "subject_id", "activity",   # Other columns
    ]
    return df


def train_dataloader(overlap, window_size, verbose=True):
    if verbose:
        print("loading the data...", end="\t")
    data_list = []
    file_lengths = []
    ##### CHANGE PATH ######
    files = tqdm(glob.glob("../TrainData/*/*/*.csv")) if verbose else glob.glob("../TrainData/*/*/*.csv")
    ##### ##### ##### ######
    for file in files:
        tempdf = pd.read_csv(file)
        tempdf = train_rename_columns(tempdf)
        segmented_data = train_segmentation(tempdf, overlap, window_size)
        if len(segmented_data)>0:
            file_lengths.append(len(segmented_data))   
        data_list.extend(segmented_data)
    return data_list, file_lengths


def train_feature_extractor(data_list, verbose=True):
    if verbose:
        print(f"extracting the features...", end="  ")
    X, y = [], []
    num_range = trange(0,len(data_list)) if verbose else range(0,len(data_list))
    for j in num_range:
        #extract only xyz columns
        x_data = data_list[j].drop(columns=["subject_id","activity"])
        X.append(get_features(x_data))
        y.append(data_list[j].reset_index(drop=True).loc[0, "activity"])
    return X, y

def majority_voting(predictions, file_lengths):
    filtered_predictions = []
    index = 0
    for length in file_lengths:
        file_pred = predictions[index:index+length]
        modes = mode(file_pred)
        majority_choice = modes.mode[0]
        filtered_predictions.extend([majority_choice]*length)
        index += length
    return filtered_predictions

In [48]:
def get_trainset(overlap_rate, window_size, verbose=True):
    data_list, file_lengths = train_dataloader(overlap_rate, window_size, verbose=verbose)
    stream_list = []
    for df in data_list:
        stream_list.append(get_streams(df))
    X, y = train_feature_extractor(stream_list, verbose=verbose)
    return X, y, file_lengths

In [49]:
def trainer(model, X_train, y_train, verbose=True):
    scores = []
    X_train = np.array(X_train)
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    model.fit(X_train_scaled, y_train)
    pred = model.predict(X_train_scaled)
    score = accuracy_score(y_train, pred)
    if verbose:
        print(f"Score: {score}")
    return model, scaler

# test functions

In [50]:
def test_segmentation(df, overlap_rate, time_window):
    seg_data = []
    #convert overlap rate to step for sliding window
    overlap = int((1 - overlap_rate)*time_window)
    # interpolate
    df = df.interpolate().ffill().fillna(0)
    for i in range(0, len(df)-time_window+1, overlap):
        seg_data.append(df.loc[i:i+time_window-1, :].copy().reset_index(drop=True))
    return seg_data


def test_rename_columns(df):
    df.columns = [
        "FH_X", "FH_Y", "FH_Z",     #1
        "TH_X", "TH_Y", "TH_Z",     #2
        "RH_X", "RH_Y", "RH_Z",     #3
        "RS_X", "RS_Y", "RS_Z",     #4
        "RO_X", "RO_Y", "RO_Z",     #5
        "RE_X", "RE_Y", "RE_Z",     #6
        "RW_X", "RW_Y", "RW_Z",     #7
        "LS_X", "LS_Y", "LS_Z",     #8
        "LE_X", "LE_Y", "LE_Z",     #9
        "LW_X", "LW_Y", "LW_Z",     #10
        "RA_X", "RA_Y", "RA_Z",     #11
        "LA_X", "LA_Y", "LA_Z",     #12
        "VS_X", "VS_Y", "VS_Z",     #13
        "subject_id", # "activity",   # Other columns
    ]
    return df


def test_dataloader(path, overlap, window_size, single, verbose=True):
    if verbose:
        print("loading the data...", end="\t")
    data_list = []
    file_lengths = []
    if not single:
        files = tqdm(glob.glob(path + "/*.csv")) if verbose else glob.glob(path + "/*.csv")
    else:
        files = [path]
    for file in files:
        tempdf = pd.read_csv(file)
        tempdf = test_rename_columns(tempdf)
        segmented_data = test_segmentation(tempdf, overlap, window_size)
        if len(segmented_data)>0:
            file_lengths.append(len(segmented_data))   
        data_list.extend(segmented_data)
    return data_list, file_lengths


def test_feature_extractor(data_list, verbose=True):
    if verbose:
        print(f"extracting the features...", end="  ")
    X = []
    num_range = trange(0,len(data_list)) if verbose else range(0,len(data_list))
    for j in num_range:
        #extract only xyz columns
        x_data = data_list[j].drop(columns=["subject_id"])
        X.append(get_features(x_data))
    return X


def majority_voting(predictions, file_lengths):
    filtered_predictions = []
    index = 0
    for length in file_lengths:
        file_pred = predictions[index:index+length]
        modes = mode(file_pred)
        majority_choice = modes.mode[0]
        filtered_predictions.extend([majority_choice]*length)
        index += length
    return filtered_predictions

In [51]:
def get_testset(path, overlap_rate, window_size, single=False, verbose=True):
    data_list, file_lengths = test_dataloader(path, overlap_rate, window_size, verbose=verbose, single=single)
    stream_list = []
    for df in data_list:
        stream_list.append(get_streams(df))
    X = test_feature_extractor(stream_list, verbose=verbose)
    return X, file_lengths

In [52]:
def predictor(model, X_test, scalar, file_lengths, voting=False, verbose=True):
    X_test = np.array(X_test)
    X_test_scaled = scalar.transform(X_test)
    y_pred = model.predict(X_test_scaled)
    filtered_pred = majority_voting(y_pred, file_lengths)
    if voting:
        return filtered_pred
    return y_pred

In [53]:
def get_mode(file_pred):
    modes = mode(file_pred)
    majority_choice = modes.mode[0]
    return majority_choice

In [61]:
def result(model, scalar, overlap_rate, window_size):
    root = "../TestData"
    predictions = pd.DataFrame({"filename": [], "prediction": [], "results": []})
    for i in trange(1,51):
        if i in [40, 27]:
            continue
        filename = str(i)+".csv"
        path = os.path.join(root, filename)
        X_test, test_file_lengths = get_testset(path, overlap_rate, window_size, single=True, verbose=False)
        y_pred = predictor(model, X_test, scalar, test_file_lengths, voting=False, verbose=False)
        filtered_pred = int(get_mode(y_pred))
        predictions = predictions.append({"filename": filename, "prediction": filtered_pred, "results": y_pred}, 
                ignore_index=True)
        predictions["prediction"] = predictions["prediction"].astype(int)
    return predictions

# Model 1: RFC

In [57]:
overlap_rate, window_size = 0.8, 4000
get_streams, get_features, colnames = remove_acc()
ft_names = [i+j for i in colnames for j in ["_std", "_max", "_min", "_med"]]
X_train, y_train, _ = get_trainset(overlap_rate, window_size, verbose=True)

loading the data...	

100%|██████████| 151/151 [00:18<00:00,  8.00it/s]


extracting the features...  

100%|██████████| 512/512 [00:55<00:00,  9.25it/s]


In [58]:
model = RFC(2500, criterion="gini", n_jobs=-1)
model, scalar = trainer(model, X_train, y_train)

Score: 1.0


In [62]:
predictions1 = result(model, scalar, overlap_rate, window_size)

100%|██████████| 50/50 [00:44<00:00,  1.13it/s]


# Model 2: RFC with angle

In [65]:
overlap_rate, window_size = 0.8, 4000
get_streams, get_features, colnames = angle_red_remove_acc()
ft_names = [i+j for i in colnames for j in ["_std", "_max", "_min", "_med"]]
X_train, y_train, _ = get_trainset(overlap_rate, window_size, verbose=True)

loading the data...	

100%|██████████| 151/151 [00:12<00:00, 11.77it/s]


extracting the features...  

100%|██████████| 512/512 [00:44<00:00, 11.57it/s]


In [66]:
model = RFC(3000, criterion="gini", n_jobs=-1)
model, scalar = trainer(model, X_train, y_train)

Score: 1.0


In [67]:
predictions2 = result(model, scalar, overlap_rate, window_size)

100%|██████████| 50/50 [00:56<00:00,  1.13s/it]


# Model 3: ETC

In [68]:
overlap_rate, window_size = 0.75, 4000
get_streams, get_features, colnames = remove_acc()
ft_names = [i+j for i in colnames for j in ["_std", "_max", "_min", "_med"]]
X_train, y_train, _ = get_trainset(overlap_rate, window_size, verbose=True)

loading the data...	

100%|██████████| 151/151 [00:16<00:00,  8.97it/s]


extracting the features...  

100%|██████████| 414/414 [00:37<00:00, 11.17it/s]


In [69]:
model = ETC(2400, criterion="entropy", max_depth=12, min_samples_split=4, n_jobs=-1)
model, scalar = trainer(model, X_train, y_train)

Score: 1.0


In [70]:
predictions3 = result(model, scalar, overlap_rate, window_size)

100%|██████████| 50/50 [00:51<00:00,  1.02s/it]


# Combine results

In [71]:
predictions1.to_csv("../predictions1.csv", index=False)
predictions2.to_csv("../predictions2.csv", index=False)
predictions3.to_csv("../predictions3.csv", index=False)

In [72]:
all_preds = pd.concat([predictions1, predictions2, predictions3], axis=1)
all_preds

Unnamed: 0,filename,prediction,results,filename.1,prediction.1,results.1,filename.2,prediction.2,results.2
0,1.csv,9,"[9, 9]",1.csv,9,"[9, 9]",1.csv,9,"[9, 9]"
1,2.csv,5,"[5, 5]",2.csv,5,"[5, 5]",2.csv,5,"[5, 5]"
2,3.csv,7,"[7, 8]",3.csv,7,"[7, 8]",3.csv,7,"[7, 8]"
3,4.csv,10,"[10, 10]",4.csv,10,"[10, 10]",4.csv,10,"[10, 10]"
4,5.csv,8,"[10, 8, 8]",5.csv,8,"[10, 8, 8]",5.csv,8,"[8, 8]"
5,6.csv,8,"[10, 8]",6.csv,8,"[10, 8]",6.csv,8,"[8, 8]"
6,7.csv,3,"[3, 3]",7.csv,3,"[3, 3]",7.csv,3,"[3, 3]"
7,8.csv,1,"[1, 1, 1]",8.csv,1,"[1, 1, 1]",8.csv,1,"[1, 1]"
8,9.csv,4,"[4, 4]",9.csv,4,"[4, 4]",9.csv,4,"[4, 4]"
9,10.csv,7,"[7, 7, 7]",10.csv,7,"[7, 7, 7]",10.csv,7,"[7, 7]"


In [75]:
all_preds.to_csv("../all_3_pred.csv", index=False)

In [76]:
manual_change = {"43.csv": 10, "44.csv": 4}

In [83]:
def combine():
    final_prediction = pd.DataFrame({"segment_id": [], "Label": []})
    for i in range(len(predictions1)):
        filename = predictions1.loc[i, "filename"]
        model_preds = [predictions1.loc[i, "prediction"], predictions2.loc[i, "prediction"],
                        predictions3.loc[i, "prediction"]]
        final_virdict = get_mode(model_preds)
        final_prediction = final_prediction.append({"segment_id": filename, "Label": final_virdict}, ignore_index=True)
        final_prediction["Label"] = final_prediction["Label"].astype(int)
    return final_prediction

In [84]:
final_prediction = combine()

In [87]:
final_prediction.to_csv("../final_prediction.csv", index=False)

In [88]:
final_prediction["Label"].value_counts()

3     10
9      6
10     6
1      6
6      6
8      5
7      3
4      3
5      2
2      1
Name: Label, dtype: int64

# Driver Code

In [19]:
overlap_rate, window_size = 0.75, 3000
get_streams, get_features, colnames = current_best()
ft_names = [i+j for i in colnames for j in ["_std", "_max", "_min", "_med"]]

In [21]:
X_train, y_train, _ = get_trainset(overlap_rate, window_size, verbose=True)

loading the data...	

100%|██████████| 151/151 [00:18<00:00,  8.37it/s]


extracting the features...  

100%|██████████| 736/736 [02:31<00:00,  4.86it/s]


In [28]:
model = ETC(n_estimators=1200, n_jobs=-1)
model, scalar = trainer(model, X_train, y_train)

Score: 1.0


In [None]:
predictions = result(model, scalar, overlap_rate, window_size)

In [32]:
predictions.to_csv("../demo_pred_etc.csv", index=False)