# Using arus package to process spades lab dataset

## Reference: processed dataset from the original MUSS project

**Reference dataset version: MUSS 1.1.6**

In [1]:
from arus import dataset
from arus import env
import os
name = 'spades_lab_reference'
path = dataset.download_dataset('https://github.com/qutang/MUSS/releases/download/v1.1.6/sample_reproduction_results.tar.gz', name + '.tar.gz')

In [2]:
if os.path.exists(os.path.join(env.get_data_home(), name)):
    decompressed_path = os.path.join(env.get_data_home(), name)
else:
    decompressed_path = dataset.decompress_dataset(path, 'sample_results_1_1_6')

In [3]:
import pandas as pd
data_filepath = os.path.join(decompressed_path, 'muss.feature.csv')
class_filepath = os.path.join(decompressed_path, 'muss.class.csv')
d0 = pd.read_csv(data_filepath, parse_dates=[0, 1, 2])
d0_class = pd.read_csv(class_filepath, parse_dates=[0, 1, 2])

In [4]:
# Merge d0 and d0_class
d0 = d0.merge(d0_class, sort=False)

## processed dataset using arus package

In [5]:
spades_lab = dataset.load_dataset('spades_lab')
d1 = pd.read_csv(spades_lab['processed']['muss'], parse_dates=[0, 1, 2])

## Comparison

In [6]:
# Filter out "Unknown" and "Transitions"
d0_f = d0.loc[(d0['FINEST_ACTIVITIES'] != 'Unknown') & (d0['FINEST_ACTIVITIES'] != 'Transition'), :]
d1_f = d1.loc[(d1['CLASS_LABEL_SPADESInLab'] != 'Unknown') & (d1['CLASS_LABEL_SPADESInLab'] != 'Transition'), :]

### Compare sample size

In [7]:
c0 = d0_f.groupby(by=['PID']).size()
c0 = c0.to_frame(name='c0_count')
c0 = c0.reset_index(drop=False)
c0 = c0.rename(columns={'SENSOR_PLACEMENT': 'PLACEMENT'})

In [8]:
c1 = d1_f.groupby(by=['PID']).size()
c1 = c1.to_frame(name='c1_count')
c1 = c1.reset_index(drop=False)

In [9]:
comparison = c0.merge(c1, sort=False)
comparison

Unnamed: 0,PID,c0_count,c1_count
0,SPADES_1,609,609
1,SPADES_11,1092,1092
2,SPADES_12,1029,1029
3,SPADES_13,735,735
4,SPADES_15,1253,1253
5,SPADES_16,1323,1323
6,SPADES_17,1274,1274
7,SPADES_18,1057,1057
8,SPADES_19,1449,1449
9,SPADES_2,791,791


### Compare feature values

### Investigation by PID

In [10]:
d0_f_1 = d0_f.loc[d0_f['PID'] == 'SPADES_1', :]
d1_f_1 = d1_f.loc[d1_f['PID'] == 'SPADES_1', :]
d0_f_1 = d0_f_1.rename(columns={'SENSOR_PLACEMENT': 'PLACEMENT'})
merged_f_1 = d0_f_1.merge(d1_f_1, how='left', on=['PLACEMENT', 'START_TIME', 'STOP_TIME'], suffixes=('_ref', '_arus'))