<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Dataset-parsers-and-cleaning-functions" data-toc-modified-id="Dataset-parsers-and-cleaning-functions-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Dataset parsers and cleaning functions</a></span></li><li><span><a href="#User-data-test" data-toc-modified-id="User-data-test-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>User data test</a></span></li><li><span><a href="#Training" data-toc-modified-id="Training-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Training</a></span><ul class="toc-item"><li><span><a href="#Importing-data-(no-cross-validation)" data-toc-modified-id="Importing-data-(no-cross-validation)-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Importing data (no cross-validation)</a></span></li></ul></li><li><span><a href="#Single-Class-Classifier:-Training" data-toc-modified-id="Single-Class-Classifier:-Training-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Single Class Classifier: Training</a></span><ul class="toc-item"><li><span><a href="#Model-Choices" data-toc-modified-id="Model-Choices-5.1"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Model Choices</a></span></li><li><span><a href="#Logistic-Regression" data-toc-modified-id="Logistic-Regression-5.2"><span class="toc-item-num">5.2&nbsp;&nbsp;</span>Logistic Regression</a></span></li><li><span><a href="#Support-Vector" data-toc-modified-id="Support-Vector-5.3"><span class="toc-item-num">5.3&nbsp;&nbsp;</span>Support-Vector</a></span></li><li><span><a href="#Random-Forest" data-toc-modified-id="Random-Forest-5.4"><span class="toc-item-num">5.4&nbsp;&nbsp;</span>Random Forest</a></span></li><li><span><a href="#ANN" data-toc-modified-id="ANN-5.5"><span class="toc-item-num">5.5&nbsp;&nbsp;</span>ANN</a></span></li></ul></li><li><span><a href="#Custom-Cross-Validation" data-toc-modified-id="Custom-Cross-Validation-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Custom Cross-Validation</a></span></li><li><span><a href="#Given-Cross-Validation-splits" data-toc-modified-id="Given-Cross-Validation-splits-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Given Cross-Validation splits</a></span></li></ul></div>

# Imports

In [17]:
# Required imports
import numpy as np
import pandas as pd
import gzip
import glob
import pickle
from io import StringIO
import importlib.machinery

from sklearn.model_selection import train_test_split as TT_split
# Models
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score,confusion_matrix,balanced_accuracy_score
#from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit, KFold, StratifiedKFold
#from sklearn.preprocessing import StandardScaler

random_state=10

In [2]:
# Data location and sample user
prefix='dataset/Extrasensory_uuid_fl_uTAR/'
cross_validation_user_loc='dataset/cv_5_folds/'
user_sample='3600D531-0C55-44A7-AE95-A7A38519464E.features_labels'

# Dataset parsers and cleaning functions

In [3]:
# Dataset parsers for header/ body for CSVs

def parse_header_of_csv(csv_str):
    # Isolate the headline columns:
    headline = csv_str[:csv_str.index('\n')];
    columns = headline.split(',');

    # The first column should be timestamp:
    assert columns[0] == 'timestamp';
    # The last column should be label_source:
    assert columns[-1] == 'label_source';
    
    # Search for the column of the first label:
    for (ci,col) in enumerate(columns):
        if col.startswith('label:'):
            first_label_ind = ci;
            break;
        pass;

    # Feature columns come after timestamp and before the labels:
    feature_names = columns[1:first_label_ind];
    # Then come the labels, till the one-before-last column:
    label_names = columns[first_label_ind:-1];
    for (li,label) in enumerate(label_names):
        # In the CSV the label names appear with prefix 'label:', but we don't need it after reading the data:
        assert label.startswith('label:');
        label_names[li] = label.replace('label:','');
        pass;
    
    return (feature_names,label_names);

def parse_body_of_csv(csv_str,n_features):
    # Read the entire CSV body into a single numeric matrix:
    full_table = np.loadtxt(StringIO(csv_str),delimiter=',',skiprows=1);
    
    # Timestamp is the primary key for the records (examples):
    timestamps = full_table[:,0].astype(int);
    
    # Read the sensor features:
    X = full_table[:,1:(n_features+1)];
    
    # Read the binary label values, and the 'missing label' indicators:
    trinary_labels_mat = full_table[:,(n_features+1):-1]; # This should have values of either 0., 1. or NaN
    M = np.isnan(trinary_labels_mat); # M is the missing label matrix
    Y = np.where(M,0,trinary_labels_mat) > 0.; # Y is the label matrix
    
    return (X,Y,M,timestamps);

def read_user_data(directory):
    print('Reading {}'.format(directory.split("/")[-1]))

    # Read the entire csv file of the user:
    with gzip.open(directory,'rb') as fid:
        csv_str = fid.read();
        csv_str = csv_str.decode("utf-8")
        pass;

    (feature_names,label_names) = parse_header_of_csv(csv_str);
    n_features = len(feature_names);
    (X,Y,M,timestamps) = parse_body_of_csv(csv_str,n_features);

    return (X,Y,M,timestamps,feature_names,label_names);

In [4]:
# Clean labels
def clean_labels(input_label):
    if label.endswith('_'):
        label=label[:-1]+')'
    label=label.replace('__',' (').replace('_',' ')
    label=label[0]+label[1:].lower()
    label=label.replace('i m','I\'m')
    return label

In [5]:
# Get a summary of the sensor feature
'''
# Summarize features as we are only using phone_acc,phone_gyro,phone_mag,phone_loc,phone_audio,
# phone_app,phone_battery,phone_use,phone_callstat,phone_wifi,phone_lf,phone_time
# We are ignoring the use of the smartwatch features. There are definitely features that will be used
# much more (e.g. than the phone_callstat) but we'll leave that up to the ML algorithm.
'''
def summarize_features(feature_list):
    summary_feature_list=np.empty_like(feature_list)
    for (ind,feature) in enumerate(feature_list):
        if feature.startswith('raw_acc'):
            summary_feature_list[ind]='phone_acc' 
        if feature.startswith('proc_gyro'):
            summary_feature_list[ind]='phone_gyro'
        if feature.startswith('raw_magnet'):
            summary_feature_list[ind]='phone_mag'
        if feature.startswith('watch_acc'):
            summary_feature_list[ind]='watch_acc'
        if feature.startswith('watch_heading'):
            summary_feature_list[ind]='watch_dir'
        if feature.startswith('location'):
            summary_feature_list[ind]='phone_loc'
        if feature.startswith('audio_naive'):
            summary_feature_list[ind]='phone_audio'
        if feature.startswith('discrete:app_state'):
            summary_feature_list[ind]='phone_app'
        if feature.startswith('discrete:battery'):
            summary_feature_list[ind]='phone_battery'
        if feature.startswith('discrete:on'):
            summary_feature_list[ind]='phone_use'
        if feature.startswith('discrete:ringer'):
            summary_feature_list[ind]='phone_callstat'
        if feature.startswith('discrete:wifi'):
            summary_feature_list[ind]='phone_wifi'
        if feature.startswith('lf'):
            summary_feature_list[ind]='phone_lf'
        if feature.startswith('discrete:time'):
            summary_feature_list[ind]='phone_time'

    return summary_feature_list

# User data test

In [6]:
# Reading sample data
sample_loc='{}/{}.csv.gz'.format(prefix,user_sample)
x_user,y_user,missedlabel_user,tstamp_user,featurename_user,labelname_user=read_user_data(sample_loc)

Reading 3600D531-0C55-44A7-AE95-A7A38519464E.features_labels.csv.gz


In [7]:
# Dataset summaries for this user
print('Data shape input for user (Len minutes/num examples, num sensors): ',x_user.shape) # Timestep examples, number of sensors
print('Label shape for user (Len minutes, num labels): ',y_user.shape,'\n') # Timestep examples, labels

countlabels_user=np.sum(y_user,axis=0) # Column summary
labelname_countlabel_user=zip(labelname_user,countlabels_user) # Zip together names, counts
labelname_countlabel_user=sorted(labelname_countlabel_user,key=lambda row:row[-1],reverse=True)

print('Sensor feature names:\n')
feature_names=summarize_features(featurename_user)

for i,sensor_feature in enumerate(featurename_user):
    print('{} :: {} ::--> {}\n'.format(i,feature_names[i],sensor_feature))

print('Activities and counts:')
print(labelname_countlabel_user)

Data shape input for user (Len minutes/num examples, num sensors):  (5203, 225)
Label shape for user (Len minutes, num labels):  (5203, 51) 

Sensor feature names:

0 :: phone_acc ::--> raw_acc:magnitude_stats:mean

1 :: phone_acc ::--> raw_acc:magnitude_stats:std

2 :: phone_acc ::--> raw_acc:magnitude_stats:moment3

3 :: phone_acc ::--> raw_acc:magnitude_stats:moment4

4 :: phone_acc ::--> raw_acc:magnitude_stats:percentile25

5 :: phone_acc ::--> raw_acc:magnitude_stats:percentile50

6 :: phone_acc ::--> raw_acc:magnitude_stats:percentile75

7 :: phone_acc ::--> raw_acc:magnitude_stats:value_entropy

8 :: phone_acc ::--> raw_acc:magnitude_stats:time_entropy

9 :: phone_acc ::--> raw_acc:magnitude_spectrum:log_energy_band0

10 :: phone_acc ::--> raw_acc:magnitude_spectrum:log_energy_band1

11 :: phone_acc ::--> raw_acc:magnitude_spectrum:log_energy_band2

12 :: phone_acc ::--> raw_acc:magnitude_spectrum:log_energy_band3

13 :: phone_acc ::--> raw_acc:magnitude_spectrum:log_energy_ban

# Training

In [8]:
# Choosing sensor labels
'''
Summary sensor choices are: phone_acc,phone_gyro,phone_mag,watch_acc,watch_dir,phone_loc,phone_audio,
phone_app,phone_battery,phone_use,phone_callstat,phone_wifi,phone_lf,phone_time
In this project, we aren't using watch_acc,watch_dir (no smartwatch)
'''

def choose_sensors(X_train,used_sensors,summarized_feature_names):
    used_sensor_feature_names=np.zeros(len(summarized_feature_names),dtype=bool)
    # Creates a zero boolean vector of all possible feature names
    for s in used_sensors:
        used_sensor_feature_names=np.logical_or(used_sensor_feature_names,(s==summarized_feature_names))
    X_train=X_train[:,used_sensor_feature_names]
    return X_train

In [9]:
# Returns a standardized (0 mean, 1 variance) dataset
def standardize(X_train):
    mean=np.nanmean(X_train,axis=0).reshape((1,-1))# Ignores NaNs while finding the mean across rows
    standard_dev=np.nanstd(X_train,axis=0) # Ignores NaNs while finding the standard deviation across rows
    standard_dev_nonzero=np.where(standard_dev>0,standard_dev,1.).reshape((1,-1)) # Div zero
    
    X=(X_train-mean)/standard_dev_nonzero
    return X,mean,standard_dev_nonzero   

## Importing data (no cross-validation)

In [10]:
# Reading data in the directory (Stacked)
X_train=np.empty((0,225))
Y_train=np.empty((0,51))
X_test=np.empty((0,225))
Y_test=np.empty((0,51))
M_train=np.empty((0,51))
M_test=np.empty((0,51))

for u_file in glob.glob('{}/*.csv.gz'.format(prefix)):
        x_user,y_user,missed_label_user,tstamp_user,featurename_user,labelname_user=read_user_data(u_file)
        # Split each user data into train-test splits .80-.20
        x_train_u,x_test_u,y_train_u,y_test_u=TT_split(x_user,y_user,test_size=0.20,random_state=random_state)
        m_train,m_test=TT_split(missed_label_user,test_size=0.20,random_state=random_state)
        
        # Stacking data. Will be changed for K-Fold cross-validation
        X_train=np.vstack((X_train,x_train_u))
        Y_train=np.vstack((Y_train,y_train_u))
        X_test=np.vstack((X_test,x_test_u))
        Y_test=np.vstack((Y_test,y_test_u))
    
        # Missing data matrix
        M_train=np.vstack((M_train,m_train))
        M_test=np.vstack((M_test,m_test))
        
assert len(X_train)==len(Y_train)
assert len(X_test)==len(Y_test)

print('\nTraining: X::{} ,Y::{}'.format(X_train.shape,Y_train.shape))
print('Testing: X::{} ,Y::{}'.format(X_test.shape,Y_test.shape))

Reading 3600D531-0C55-44A7-AE95-A7A38519464E.features_labels.csv.gz
Reading 8023FE1A-D3B0-4E2C-A57A-9321B7FC755F.features_labels.csv.gz
Reading 86A4F379-B305-473D-9D83-FC7D800180EF.features_labels.csv.gz
Reading 1538C99F-BA1E-4EFB-A949-6C7C47701B20.features_labels.csv.gz
Reading 11B5EC4D-4133-4289-B475-4E737182A406.features_labels.csv.gz
Reading 74B86067-5D4B-43CF-82CF-341B76BEA0F4.features_labels.csv.gz
Reading 4FC32141-E888-4BFF-8804-12559A491D8C.features_labels.csv.gz
Reading B9724848-C7E2-45F4-9B3F-A1F38D864495.features_labels.csv.gz
Reading A76A5AF5-5A93-4CF2-A16E-62353BB70E8A.features_labels.csv.gz
Reading 96A358A0-FFF2-4239-B93E-C7425B901B47.features_labels.csv.gz
Reading 665514DE-49DC-421F-8DCB-145D0B2609AD.features_labels.csv.gz
Reading BE3CA5A6-A561-4BBD-B7C9-5DF6805400FC.features_labels.csv.gz
Reading A5A30F76-581E-4757-97A2-957553A2C6AA.features_labels.csv.gz
Reading 27E04243-B138-4F40-A164-F40B60165CF3.features_labels.csv.gz
Reading 0E6184E1-90C0-48EE-B25A-F1ECB7B9714E.fea

# Single Class Classifier: Training

In [11]:
# Train model function repeat for every training label
def scc_train(X_train,Y_train,M,all_sensornames,all_labelnames,used_sensors,Y_target,clf,clf_type):
    out_model={}
    X_train=choose_sensors(X_train,used_sensors,all_sensornames)
    print('Current X_train shape is {}'.format(X_train.shape))
    print('Using sensors {}'.format(used_sensors))
    
    X_train,mean,standard_dev_nonzero=standardize(X_train) # Standardizing X_train to have zero mean and unit variance
    index_label=all_labelnames.index(Y_target)
    y_train=Y_train[:,index_label] # Choosing a column of data for training consisting of only one target label
    
    any_missingdata_label=M[:,index_label] # Check if there's data missing for that label
    any_presentdata_features=np.logical_not(any_missingdata_label) # Present data indices boolean
    
    x_train=X_train[any_presentdata_features,:] # Training dataset consists of data that's available ignoring all missing rows
    x_train[np.isnan(x_train)]=0 # Zero imputation of NaN values
    y_train=y_train[any_presentdata_features] # Training labels consists of data that's available
    print('Current X_train shape after removing missing data & zero-impute is {}'.format(x_train.shape))
    
    # Fitting classifier
    clf.fit(x_train,y_train)
    
    out_model['train_mean']=mean
    out_model['train_std_dev_nonzero']=standard_dev_nonzero
    out_model['classifier']=clf
    
    filename='scc_model_{}_label_{}.sav'.format(clf_type,Y_target)
    pickle.dump(clf,open(filename, 'wb'))
    
    return out_model

In [13]:
# Test model function repeat for every training label
def scc_test(X_test,Y_test,M,all_sensornames,all_labelnames,used_sensors,Y_target,trained_model):
   
    X_test=choose_sensors(X_test,used_sensors,all_sensornames)
    print('Current X_test shape is {}'.format(X_test.shape))
    # Normalizing test set the same way training set was normalized
    X=(X_test-trained_model['train_mean'])/trained_model['train_std_dev_nonzero']
       
    index_label=all_labelnames.index(Y_target)
    y_test=Y_test[:,index_label] # Choosing a column of data for training consisting of only one target label
    
    any_missingdata_label=M[:,index_label] # Check if there's data missing for that label
    any_presentdata_features=np.logical_not(any_missingdata_label) # Present data indices boolean
    
    x_test=X[any_presentdata_features,:] # Training dataset consists of data that's available ignoring all missing rows
    x_test[np.isnan(x_test)]=0 # Zero imputation of NaN values
    y_test=y_test[any_presentdata_features] # Training labels consists of data that's available
    
    y_test_predicted=trained_model['classifier'].predict(x_test) # Prediction
    
    tn,fp,fn,tp=confusion_matrix(y_test,y_test_predicted).ravel()
    bal_accuracy=balanced_accuracy_score(y_test,y_test_predicted)
    return tn,fp,fn,tp,bal_accuracy

In [18]:
# Sensor Types, Label Possibilities variables
sensor_types=['phone_acc','phone_gyro','phone_mag','phone_loc','phone_audio',
'phone_app','phone_battery','phone_use','phone_callstat','phone_wifi','phone_lf','phone_time']
label_possibilities=['LOC_home','OR_indoors','PHONE_ON_TABLE','SITTING',
                     'WITH_FRIENDS','LYING_DOWN','SLEEPING','WATCHING_TV',
                     'EATING','PHONE_IN_POCKET','TALKING','DRIVE_-_I_M_A_PASSENGER',
                     'OR_standing','IN_A_CAR','OR_exercise','AT_THE_GYM','FIX_walking',
                     'OR_outside','BATHING_-_SHOWER','DRESSING','DRINKING__ALCOHOL_',
                     'PHONE_IN_HAND','FIX_restaurant','IN_CLASS','PHONE_IN_BAG','IN_A_MEETING',
                     'TOILET','COOKING','FIX_running','BICYCLING','LOC_main_workplace',
                     'ON_A_BUS','DRIVE_-_I_M_THE_DRIVER','STROLLING','CLEANING','DOING_LAUNDRY',
                     'WASHING_DISHES','SURFING_THE_INTERNET','AT_A_PARTY',
                     'COMPUTER_WORK','GROOMING','STAIRS_-_GOING_UP','STAIRS_-_GOING_DOWN',
                     'WITH_CO-WORKERS']

## Model Choices

In [20]:
# Models
clf1=LogisticRegression(n_jobs=-1,warm_start=True,class_weight='balanced') # Account for class imbalance
clf2=SVC()
clf3=RandomForestClassifier(n_jobs=-1,warm_start=True)
clf4=MLPClassifier(hidden_layer_sizes=(100, ),activation='relu',
                   solver='sgd',batch_size=300,learning_rate='invscaling',
                   learning_rate_init=0.01,power_t=0.5,max_iter=50,random_state=random_state,
                   verbose=True,warm_start=True,early_stopping=True, validation_fraction=0.0,
                   epsilon=1e-08, n_iter_no_change=10)

## Logistic Regression

In [16]:
# Loop through label possibilities and train/test logistic regression model
for label in label_possibilities:
    trained_model=scc_train(X_train=X_train,
                            Y_train=Y_train,
                            M=M_train,
                            all_sensornames=feature_names,
                            all_labelnames=labelname_user,
                            used_sensors=sensor_types,
                            Y_target=label,
                            clf=clf1,
                            clf_type='logisticregression')

    tn,fp,fn,tp,bal_accuracy=scc_test(X_test=X_test,
                                           Y_test=Y_test,
                                           M=M_test,
                                           all_sensornames=feature_names,
                                           all_labelnames=labelname_user,
                                           used_sensors=sensor_types,
                                           Y_target=label,
                                           trained_model=trained_model)
    print('*'*50)
    print('Predicting {} label'.format(label))
    print('TP:{}, FP:{}, FN:{}, TP:{}, Balanced Accuracy:{}\n'.format(tn,fp,fn,tp,bal_accuracy))

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (284244, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting LOC_home label
TP:31888, FP:8866, FN:5825, TP:24511, Balanced Accuracy:0.7952173579386683

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (157494, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting OR_indoors label
TP:2184, FP:228, FN:3643, TP:33257, Balanced Accuracy:0.9033731747765239

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (129977, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting PHONE_ON_TABLE label
TP:7849, FP:1646, FN:3546, TP:19405, Balanced Accuracy:0.8360712655936707

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (245191, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting SITTING label
TP:24937, FP:9129, FN:4440, TP:22897, Balanced Accuracy:0.7848014796888974

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (132325, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting WITH_FRIENDS label
TP:21502, FP:6577, FN:1009, TP:4001, Balanced Accuracy:0.7821854030462527

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (242882, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting LYING_DOWN label
TP:36013, FP:3994, FN:2658, TP:18176, Balanced Accuracy:0.8862937766249934

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (228150, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting SLEEPING label
TP:36432, FP:4019, FN:1664, TP:15003, Balanced Accuracy:0.9004036109236037

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (156949, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting WATCHING_TV label
TP:28428, FP:8336, FN:527, TP:2112, Balanced Accuracy:0.7867797958272522

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (237091, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting EATING label
TP:35747, FP:20305, FN:713, TP:2625, Balanced Accuracy:0.712073066664067

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (107578, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting PHONE_IN_POCKET label
TP:18260, FP:3833, FN:653, TP:4040, Balanced Accuracy:0.843681364046484

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (223541, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting TALKING label
TP:33848, FP:14839, FN:1669, TP:5649, Balanced Accuracy:0.7335743020575183

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (104810, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting DRIVE_-_I_M_A_PASSENGER label
TP:23338, FP:2460, FN:36, TP:443, Balanced Accuracy:0.9147435973172655

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (245191, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting OR_standing label
TP:34100, FP:19824, FN:1930, TP:5549, Balanced Accuracy:0.6871577979849898

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (139665, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting IN_A_CAR label
TP:30304, FP:3476, FN:170, TP:1081, Balanced Accuracy:0.8806037940517923

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (192605, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting OR_exercise label
TP:39502, FP:7085, FN:274, TP:1371, Balanced Accuracy:0.8406767989008765

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (35507, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting AT_THE_GYM label
TP:7897, FP:753, FN:21, TP:215, Balanced Accuracy:0.9119824630155775

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (20133, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting SINGING label
TP:4325, FP:534, FN:18, TP:107, Balanced Accuracy:0.8730504218975097

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (245191, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting FIX_walking label
TP:47673, FP:9217, FN:903, TP:3610, Balanced Accuracy:0.8189484766902763

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (121205, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting OR_outside label
TP:25085, FP:2796, FN:241, TP:2171, Balanced Accuracy:0.8998997858286963

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (111000, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting SHOPPING label
TP:23559, FP:3843, FN:45, TP:346, Balanced Accuracy:0.8723326241798021

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (160830, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting BATHING_-_SHOWER label
TP:30098, FP:9800, FN:80, TP:312, Balanced Accuracy:0.7751460100808081

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (166291, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting DRESSING label
TP:32515, FP:8622, FN:72, TP:408, Balanced Accuracy:0.8202038311009554

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (56607, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting DRINKING__ALCOHOL_ label
TP:12711, FP:1155, FN:41, TP:269, Balanced Accuracy:0.8922223307882358

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (113415, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting PHONE_IN_HAND label
TP:19756, FP:5603, FN:688, TP:2241, Balanced Accuracy:0.7720801735019567

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (126026, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting FIX_restaurant label
TP:27142, FP:3992, FN:50, TP:372, Balanced Accuracy:0.8766483176070903

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (87206, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting IN_CLASS label
TP:17837, FP:2798, FN:100, TP:1122, Balanced Accuracy:0.8912860381734273

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (73926, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting PHONE_IN_BAG label
TP:14024, FP:2479, FN:321, TP:1712, Balanced Accuracy:0.8459450753770447

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (184904, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting IN_A_MEETING label
TP:37542, FP:7749, FN:123, TP:895, Balanced Accuracy:0.8540406289491673

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (176173, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting TOILET label
TP:32677, FP:10869, FN:145, TP:409, Balanced Accuracy:0.7443345109474673

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (167448, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting COOKING label
TP:31477, FP:9651, FN:127, TP:643, Balanced Accuracy:0.8002036404560233

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (112675, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting FIX_running label
TP:25539, FP:2432, FN:31, TP:193, Balanced Accuracy:0.8873299737738576

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (108132, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting BICYCLING label
TP:23362, FP:2673, FN:137, TP:879, Balanced Accuracy:0.8812439984636067

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (161367, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting LOC_main_workplace label
TP:28441, FP:5145, FN:823, TP:5951, Balanced Accuracy:0.8626586119376332

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (135229, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting ON_A_BUS label
TP:29359, FP:4067, FN:42, TP:335, Balanced Accuracy:0.8834612059641306

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (129769, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting DRIVE_-_I_M_THE_DRIVER label
TP:27574, FP:3287, FN:200, TP:1434, Balanced Accuracy:0.8855455723866603

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (43024, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting STROLLING label
TP:9358, FP:1215, FN:23, TP:141, Balanced Accuracy:0.8724203735700461

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (127446, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting CLEANING label
TP:23813, FP:7437, FN:158, TP:566, Balanced Accuracy:0.7718919779005524

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (59157, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting DOING_LAUNDRY label
TP:12657, FP:2050, FN:19, TP:89, Balanced Accuracy:0.8423423338344804

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (109159, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting WASHING_DISHES label
TP:21580, FP:5503, FN:38, TP:194, Balanced Accuracy:0.8165083517208276

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (154067, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting SURFING_THE_INTERNET label
TP:25566, FP:9212, FN:923, TP:2915, Balanced Accuracy:0.7473150324648344

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (43633, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting AT_A_PARTY label
TP:10127, FP:510, FN:6, TP:287, Balanced Accuracy:0.9657881674533577

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (189977, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting COMPUTER_WORK label
TP:29505, FP:10357, FN:1416, TP:6304, Balanced Accuracy:0.7783794635534043

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (159027, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting GROOMING label
TP:29733, FP:9407, FN:131, TP:502, Balanced Accuracy:0.7763533061937502

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (75700, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting STAIRS_-_GOING_UP label
TP:16512, FP:2253, FN:20, TP:144, Balanced Accuracy:0.8789924158234388

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (75536, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting STAIRS_-_GOING_DOWN label
TP:16281, FP:2438, FN:20, TP:140, Balanced Accuracy:0.8723789999465783

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (89679, 168)


  " = {}.".format(effective_n_jobs(self.n_jobs)))


Current X_test shape is (75493, 168)
**************************************************
Predicting WITH_CO-WORKERS label
TP:16876, FP:4273, FN:106, TP:1130, Balanced Accuracy:0.8560984162149863



## Support-Vector

In [None]:
# Loop through label possibilities and train/test logistic regression model
for label in label_possibilities:
    trained_model=scc_train(X_train=X_train,
                            Y_train=Y_train,
                            M=M_train,
                            all_sensornames=feature_names,
                            all_labelnames=labelname_user,
                            used_sensors=sensor_types,
                            Y_target=label,
                            clf=clf2,
                            clf_type='svc')

    tn,fp,fn,tp,bal_accuracy=scc_test(X_test=X_test,
                                           Y_test=Y_test,
                                           M=M_test,
                                           all_sensornames=feature_names,
                                           all_labelnames=labelname_user,
                                           used_sensors=sensor_types,
                                           Y_target=label,
                                           trained_model=trained_model)
    print('*'*50)
    print('Predicting {} label'.format(label))
    print('TP:{}, FP:{}, FN:{}, TP:{}, Balanced Accuracy:{}\n'.format(tn,fp,fn,tp,bal_accuracy))

Current X_train shape is (301853, 168)
Using sensors ['phone_acc', 'phone_gyro', 'phone_mag', 'phone_loc', 'phone_audio', 'phone_app', 'phone_battery', 'phone_use', 'phone_callstat', 'phone_wifi', 'phone_lf', 'phone_time']
Current X_train shape after removing missing data & zero-impute is (284244, 168)




## Random Forest

In [None]:
# Loop through label possibilities and train/test logistic regression model
for label in label_possibilities:
    trained_model=scc_train(X_train=X_train,
                            Y_train=Y_train,
                            M=M_train,
                            all_sensornames=feature_names,
                            all_labelnames=labelname_user,
                            used_sensors=sensor_types,
                            Y_target=label,
                            clf=clf3,
                            clf_type='rf')

    tn,fp,fn,tp,bal_accuracy=scc_test(X_test=X_test,
                                           Y_test=Y_test,
                                           M=M_test,
                                           all_sensornames=feature_names,
                                           all_labelnames=labelname_user,
                                           used_sensors=sensor_types,
                                           Y_target=label,
                                           trained_model=trained_model)
    print('*'*50)
    print('Predicting {} label'.format(label))
    print('TP:{}, FP:{}, FN:{}, TP:{}, Balanced Accuracy:{}\n'.format(tn,fp,fn,tp,bal_accuracy))

## ANN

In [None]:
# Loop through label possibilities and train/test logistic regression model
for label in label_possibilities:
    trained_model=scc_train(X_train=X_train,
                            Y_train=Y_train,
                            M=M_train,
                            all_sensornames=feature_names,
                            all_labelnames=labelname_user,
                            used_sensors=sensor_types,
                            Y_target=label,
                            clf=clf4,
                            clf_type='ann')

    tn,fp,fn,tp,bal_accuracy=scc_test(X_test=X_test,
                                           Y_test=Y_test,
                                           M=M_test,
                                           all_sensornames=feature_names,
                                           all_labelnames=labelname_user,
                                           used_sensors=sensor_types,
                                           Y_target=label,
                                           trained_model=trained_model)
    print('*'*50)
    print('Predicting {} label'.format(label))
    print('TP:{}, FP:{}, FN:{}, TP:{}, Balanced Accuracy:{}\n'.format(tn,fp,fn,tp,bal_accuracy))

# Custom Cross-Validation

In [None]:
# Load cross-validation user IDs
def cross_validation_id(location):

# Given Cross-Validation splits