# 3. Feature extraction and feature-level fusion for multimodal classification

<a id='task3'></a>
<div class=" alert alert-warning">
    <b>Assigment.</b> <b>Task 3.</b>

Prepare new feature sets for each modality and combine them to single feature representation. Compare two classifiers from scikit-learn. Train classifiers using joint feature presentation. Evaluate and compare the result using testing dataset. Do the subtasks given as
<br>
<br>
<p> <b>3.1</b> Similar to task 2.1, calculate PCA for accelerometer, but choose now the 10 largest principal components as 10-dim feature vector for each window. In addition, for each window calculate mean and standard deviation of each three acc channels as statistical features, resulting 6-dimensional vector. Combine these to 36-dimensional final feature vector.</p>
<br>
<p> <b>3.2</b> Similar to task 2.2, calculate the PCA for depth images using same setup, but now choose the 10 largest principal components as feature vector. Concatenate the image sequence forming 50-dimensional feature vector from each windowed example.</p>
<br>
<p> <b>3.3</b> Form a joint feature presentation of features extracted in 3.1 and 3.2, resulting 86-dimensional feature vector for each example. Normalize data between 0-1 using the training dataset. Use support vector machine (SVM) with RBF-kernel and Gaussian naiveBayes classifier (use default parameter values for both classifiers). Train the classifiers and evaluate and compare classifiers on testset using confusion matrices and F1 scores.</p>
<br>
Document your work, evaluate the results, and analyse the outcomes in each subtasks 3.1-3.3.

</div>

In [None]:
# Task 3 imports
import pandas as pd
import numpy as np
import time
from copy import deepcopy
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
import importlib, utilities.fun_three, utilities.fun_two, utilities.fun_one
importlib.reload(utilities.fun_three)
importlib.reload(utilities.fun_two)
importlib.reload(utilities.fun_one)
from utilities.fun_three import reshape_dataframe, Normalizer, merge_dataframes
from utilities.fun_two import *
from utilities.fun_one import visualize

In [None]:
#Get the training data form the notebook slovo_one:
%store -r training_records
df_records_windowed = training_records
%store -r testing_records
df_records_windowed = testing_records

## Task 3.1

In [None]:
start_time = time.time()
# 3.1
### Your code begins here ###
# Resample data:
resample_samples = 125
act_train = pd.DataFrame()
act_train['df'] = acccelerometer_resample(training_records,resample_samples)
train_labels = training_records[training_records.sensor_code=='act'].exercise_id.apply(lambda x: int(x))
act_test = pd.DataFrame()
act_test['df'] = acccelerometer_resample(testing_records,resample_samples)
test_labels = testing_records[testing_records.sensor_code=='act'].exercise_id.apply(lambda x: int(x))

#Standardize the Data:
act_s = Standardizer()
act_s.fit(act_train)
act_train['df'] = act_s.transform(act_train)
act_test['df'] = act_s.transform(act_test)


n_components = 10
pca = PcaActApplier(n_components)
pca.fit(act_train['df'])
act_pca_train = pca.transform(act_train['df'])
act_pca_test = pca.transform(act_test['df'])

act_features = np.concatenate((act_pca_train[0],act_pca_train[1],act_pca_train[2],
                               np.mean(act_pca_train[0],axis=1).reshape(-1,1), np.std(act_pca_train[0],axis=1).reshape(-1,1),
                               np.mean(act_pca_train[1],axis=1).reshape(-1,1), np.std(act_pca_train[1],axis=1).reshape(-1,1),
                               np.mean(act_pca_train[2],axis=1).reshape(-1,1), np.std(act_pca_train[2],axis=1).reshape(-1,1),
                              ),axis=1)

act_test_features = np.concatenate((act_pca_test[0],act_pca_test[1],act_pca_test[2],
                                    np.mean(act_pca_test[0],axis=1).reshape(-1,1), np.std(act_pca_test[0],axis=1).reshape(-1,1),
                                    np.mean(act_pca_test[1],axis=1).reshape(-1,1), np.std(act_pca_test[1],axis=1).reshape(-1,1),
                                    np.mean(act_pca_test[2],axis=1).reshape(-1,1), np.std(act_pca_test[2],axis=1).reshape(-1,1),
                                   ),axis=1)

pca_act_training_records_reshaped = deepcopy(training_records[training_records.sensor_code=='act'])
pca_act_training_records_reshaped["df"] = [act_feature for act_feature in act_features]
pca_act_training_records_reshaped["df"] = pca_act_training_records_reshaped.df.apply(np.expand_dims,axis=0)

pca_act_testing_records_reshaped = deepcopy(testing_records[testing_records.sensor_code=='act'])
pca_act_testing_records_reshaped["df"] = [act_test_feature for act_test_feature in act_test_features]
pca_act_testing_records_reshaped["df"] = pca_act_testing_records_reshaped.df.apply(np.expand_dims,axis=0)

end_time = time.time()
print("Execution Time: ", end_time - start_time)
### Your code ends here ###

## Task 3.2

In [None]:
# 3.2
### Your code begins here ###
'''Work only with rows with dc'''
dc_train_records = training_records[training_records['sensor'] == 'dc']
dc_test_records = testing_records[testing_records['sensor'] == 'dc']

'''Initialize PCA for depth senspr'''
reduced_dimensions = 10
pca_applier = PcaDcApplier(reduced_dimensions)

In [None]:
start_time = time.time()

'''Standardize the dc data'''
standardizer = Standardizer()
standardizer.fit(dc_train_records)
standardized_dc_train_records = standardizer.transform(dc_train_records)
standardized_dc_test_records = standardizer.transform(dc_test_records)

end_time = time.time()
print("Execution Time: ", end_time - start_time)

In [None]:
start_time = time.time()

'''Fit and transform PCA'''
pca_applier.fit(standardized_dc_train_records)

pca_dc_train_records = pca_applier.transform(standardized_dc_train_records)
pca_dc_test_records = pca_applier.transform(standardized_dc_test_records)

end_time = time.time()
print("Execution Time: ", end_time - start_time)


In [None]:
start_time = time.time()

'''Reshape the dataframes for both train and test datasets '''
pca_dc_train_records_reshaped = deepcopy(pca_dc_train_records)
pca_dc_train_records_reshaped["df"] = pca_dc_train_records_reshaped["df"].apply(reshape_dataframe)
dc_features = np.concatenate(pca_dc_train_records_reshaped['df'].values,axis=0)

pca_dc_test_records_reshaped = deepcopy(pca_dc_test_records)
pca_dc_test_records_reshaped["df"] = pca_dc_test_records_reshaped["df"].apply(reshape_dataframe)
dc_test_features = np.concatenate(pca_dc_test_records_reshaped['df'].values,axis=0)

end_time = time.time()
print("Execution Time: ", end_time - start_time)
### Your code ends here ###

## 3.3

In [None]:
import importlib, utilities
importlib.reload(utilities.fun_one)
from utilities.fun_one import visualize

In [None]:
start_time = time.time()
# 3.3
### Your code begins here ###

train_records_merged = pca_dc_train_records_reshaped.merge(pca_act_training_records_reshaped, on=['subject_id', 'exercise_id', 'trial', 'window_idx']).apply(merge_dataframes, axis=1)
test_records_merged = pca_dc_test_records_reshaped.merge(pca_act_testing_records_reshaped, on=['subject_id', 'exercise_id', 'trial', 'window_idx']).apply(merge_dataframes, axis=1)

end_time = time.time()
print("Execution Time: ", end_time - start_time)

In [None]:
#Save data for use in other notebooks:
%store train_records_merged
%store test_records_merged

In [None]:
start_time = time.time()

'''Normalize the features'''
normalizer = Normalizer()
normalizer.fit(train_records_merged)

normalized_train_records = normalizer.transform(train_records_merged)
normalized_test_records = normalizer.transform(test_records_merged)

end_time = time.time()
print("Execution Time: ", end_time - start_time)

In [None]:
start_time = time.time()

'''Initialize and fit classifiers on training data'''
gnb_classifier = GaussianNB()
svm_classifier = SVC()

gnb_classifier.fit(np.concatenate(normalized_train_records['df_normalized'].values,axis=0), normalized_train_records['exercise_id'].values)
svm_classifier.fit(np.concatenate(normalized_train_records['df_normalized'].values,axis=0), normalized_train_records['exercise_id'].values)

end_time = time.time()
print("Execution Time: ", end_time - start_time)

In [None]:
start_time = time.time()

'''Get classifier predictions for test and train datasets'''
gnb_est_train_labels = gnb_classifier.predict(np.concatenate(normalized_train_records['df_normalized'].values,axis=0))
gnb_est_test_labels = gnb_classifier.predict(np.concatenate(normalized_test_records['df_normalized'].values,axis=0))

svm_est_train_labels = svm_classifier.predict(np.concatenate(normalized_train_records['df_normalized'].values,axis=0))
svm_est_test_labels = svm_classifier.predict(np.concatenate(normalized_test_records['df_normalized'].values,axis=0))

end_time = time.time()
print("Execution Time: ", end_time - start_time)

In [None]:
'''Visualize classification results. First for Gaussian naiveBayes classifier, then for SVM classifier'''
visualize(gnb_est_train_labels,
          normalized_train_records['exercise_id'].values,
          gnb_est_test_labels,
          normalized_test_records['exercise_id'].values,
          main_title="GNB merged")

visualize(svm_est_train_labels,
          normalized_train_records['exercise_id'].values,
          svm_est_test_labels,
          normalized_test_records['exercise_id'].values,
          main_title="SVM merged")

### Your code ends here ###