# Practical Session 2: Transfer Learning for Activity Recognition 

`Librerires`

In [1]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import mode
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.utils.multiclass import unique_labels
from sklearn.gaussian_process.kernels import RBF
from sklearn.preprocessing import StandardScaler

`Data and files`

In [2]:
path = 'C:\\Users\\s7277028\\Documents\\ARSSD\\Practicas\\Practica1\\OpportunityUCIDataset\\dataset\\' 
header_path = 'C:\\Users\\s7277028\\Documents\\ARSSD\\Practicas\\Practica1\\header.csv' 
header=pd.read_csv(header_path, names=['column',''])['column'].values
users = range(1,5)
trials = range(1,7)
all_data = pd.DataFrame()

`Read Data`

In [3]:
for user in users:
    for trial in trials:
        if trial==6:
            filePath= path+'S'+str(user)+'-Drill.dat'
        else:
            filePath = path+'S'+str(user)+'-ADL'+str(trial)+'.dat'
        sadl1 = pd.read_csv(filePath, sep=' ', header=None)
        data = sadl1.iloc[:, :243]
        data.columns = header
        data=data[data.columns[np.r_[0:1,16:19,76:79,50:53]]]
        labels = sadl1.iloc[:,243]
        #find and remove rows with all nulls
        idx=data.index[data.isnull().all(1)]
        data = data[~data.index.isin(idx)]
        labels = labels[~labels.index.isin(idx)]
        
        #fill missing values
        data = data.fillna(method='ffill',axis=1)
        data['user'] = user
        data['trial'] = trial
        
        #Pre-processing: filtering
        #only data columns
        columns = data.columns[~data.columns.isin(['user', 'trial', 'MILLISEC'])]
        filtered_data = data[columns].rolling(11).median()
        filtered_data['MILLISEC'] = data.MILLISEC
        
        #separate train and test'

        #segmentation

        filtered_data['time']=pd.to_datetime(data.MILLISEC, unit='ms')
        filtered_data.index=filtered_data.time
        keep = filtered_data.time.dt.microsecond/1000 %500
        keep = keep - keep.shift() < 0
        
        #Feature extraction - only for first 132 columns
        means = filtered_data[columns].rolling('1S').mean()[keep]
        means.columns = [str(col) + '_mean' for col in means.columns]
        variances = filtered_data[columns].rolling('1S').var()[keep]
        variances.columns = [str(col) + '_var' for col in variances.columns]

        #talk about apply function
        labels.index = filtered_data.time
        mode_labels = labels.rolling('1S').apply(lambda x:mode(x)[0])[keep]

        #all features
        all_features = pd.concat([means, variances], axis=1)
        all_features['label'] = mode_labels 
        all_features['user'] = user
        all_features['trial'] = trial
        all_data = pd.concat([all_data, all_features])



In [34]:
all_data.columns

Index(['Accelerometer_BACK_accX_mean', 'Accelerometer_BACK_accY_mean',
       'Accelerometer_BACK_accZ_mean', 'InertialMeasurementUnit_LUA_accX_mean',
       'InertialMeasurementUnit_LUA_accY_mean',
       'InertialMeasurementUnit_LUA_accZ_mean',
       'InertialMeasurementUnit_RUA_accX_mean',
       'InertialMeasurementUnit_RUA_accY_mean',
       'InertialMeasurementUnit_RUA_accZ_mean', 'Accelerometer_BACK_accX_var',
       'Accelerometer_BACK_accY_var', 'Accelerometer_BACK_accZ_var',
       'InertialMeasurementUnit_LUA_accX_var',
       'InertialMeasurementUnit_LUA_accY_var',
       'InertialMeasurementUnit_LUA_accZ_var',
       'InertialMeasurementUnit_RUA_accX_var',
       'InertialMeasurementUnit_RUA_accY_var',
       'InertialMeasurementUnit_RUA_accZ_var', 'label', 'user', 'trial'],
      dtype='object')

## Using the data from trials 1, 2, 3 and Drill Session as training data and the data from trials 4 and 5 as test perform two initial evaluations:

In [14]:
#Data from trials 1,2,3 and drill with all users
all_data_trials_1236 = all_data[all_data.trial.isin(['1','2','3','6'])]
# #Data from user 4 with all trials
all_data_trials_45 = all_data[all_data.trial.isin(['4','6'])]
print("users: ", dict.fromkeys(all_data_trials_45.user),"| trial: ",dict.fromkeys(all_data_trials_45.trial))
print("users: ",dict.fromkeys(all_data_trials_1236.user),"| trial: ",dict.fromkeys(all_data_trials_1236.trial))

users:  {1: None, 2: None, 3: None, 4: None} | trial:  {4: None, 6: None}
users:  {1: None, 2: None, 3: None, 4: None} | trial:  {1: None, 2: None, 3: None, 6: None}


`sensors`

In [15]:
rua_sensors = ['InertialMeasurementUnit_RUA_accX_mean',
       'InertialMeasurementUnit_RUA_accX_var',
       'InertialMeasurementUnit_RUA_accY_mean',
       'InertialMeasurementUnit_RUA_accY_var',
       'InertialMeasurementUnit_LUA_accZ_mean',
       'InertialMeasurementUnit_LUA_accZ_var','label']
lua_sensors = ['InertialMeasurementUnit_LUA_accX_mean',
       'InertialMeasurementUnit_LUA_accX_var',
       'InertialMeasurementUnit_LUA_accY_mean',
       'InertialMeasurementUnit_LUA_accY_var',
       'InertialMeasurementUnit_LUA_accZ_mean',
       'InertialMeasurementUnit_LUA_accZ_var', 'label']

 ## Original and transfer tasks

In [16]:
original_task = all_data[lua_sensors]
transfer_task = all_data[rua_sensors]
transfer_task_trials_1236 = all_data_trials_1236[rua_sensors]

## 1. Use data from the RUA sensor in both training and testing data

In [17]:
#Data from RUA sensor for training 
rua_training = all_data_trials_1236[rua_sensors]
#Data from RUA sensor for testing 
rua_testing = all_data_trials_45[rua_sensors]

## 2. Use data from the LUA sensor as training and data from the RUA sensor as testing. 

In [18]:
#Data from back sensor for training 
lua_training = all_data_trials_1236[lua_sensors]
#Data from back sensor for testing 
rua_testing = all_data_trials_45[rua_sensors]

# Transfer using probability distribution adaptation

### Step 1: 
### Scale the data of the original task sensor using a standard scaler to set mean 0 and std 1 https://scikitlearn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html

In [19]:
scaler = StandardScaler()
print(scaler.fit(original_task))
print(scaler.mean_)

X_train = train_data.loc[:,~train_data.columns.isin(['user', 'trial','label'])]

StandardScaler(copy=True, with_mean=True, with_std=True)
[1.94177855e+04 2.18834496e+08 2.00863024e+04 2.18569782e+08
 2.04571760e+04 2.18483902e+08 1.63153172e+00]


NameError: name 'train_data' is not defined

In [None]:
scaled_original_task = scaler.transform(original_task)
scaled_original_task = pd.DataFrame(scaled_original_task, index=original_task.index, columns=original_task.columns)

## 2. Find the mean and std of the transfer sensor (trials 1, 2, 3and Drill ) 

In [10]:
for j in range(0, len(transfer_task_trials_1236.columns)):
    print(transfer_task_trials_1236.columns[j] + " // mean: " , transfer_task_trials_1236[transfer_task_trials_1236.columns[j]].mean(), "// std: ", transfer_task_trials_1236[transfer_task_trials_1236.columns[j]].std())

InertialMeasurementUnit_RUA_accX_mean // mean:  23159.58826429697 // std:  180408.51799130425
InertialMeasurementUnit_RUA_accX_var // mean:  236628738.69885087 // std:  11642425621.824337
InertialMeasurementUnit_RUA_accY_mean // mean:  24122.07514511405 // std:  180281.48467186422
InertialMeasurementUnit_RUA_accY_var // mean:  236332852.33198577 // std:  11630247143.49956
InertialMeasurementUnit_LUA_accZ_mean // mean:  24193.05689198367 // std:  180271.98216335406
InertialMeasurementUnit_LUA_accZ_var // mean:  236282499.35900232 // std:  11628590516.36796
label // mean:  1.6009500626285067 // std:  1.32616813349937


## 3. Multiply the scaled sensor data of the original task by the std of the transfer sensor and add the mean. 

In [11]:
for i in range(0, len(transfer_task_trials_1236.columns)):
    mean = transfer_task_trials_1236[transfer_task_trials_1236.columns[i]].mean()
    std = transfer_task_trials_1236[transfer_task_trials_1236.columns[i]].std()
    for j in range(0, len(scaled_original_task[scaled_original_task.columns[0]])):
        scaled_original_task[scaled_original_task.columns[i]][j] = (scaled_original_task[scaled_original_task.columns[i]][j]  * std) + mean

NameError: name 'scaled_original_task' is not defined

In [12]:
scaled_original_task['user'] = all_data.user
scaled_original_task['trial'] = all_data.trial
scaled_original_task['label'] = all_data.label
transfer_task['user'] = all_data.user
transfer_task['trial'] = all_data.trial
transfer_task['label'] = all_data.label

NameError: name 'scaled_original_task' is not defined

In [13]:
scaled_original_task.label.unique()

NameError: name 'scaled_original_task' is not defined

## Train a classifier with the transformed data and test with the RUA sensor. 

`Using RUA Sensor as training and testing data`

In [166]:
train_data = scaled_original_task
test_data = transfer_task

# Dedife train data
X_train = train_data.loc[:,~train_data.columns.isin(['user', 'trial','label'])]
y_train = train_data.loc[:,train_data.columns.isin(['label'])]

#Define test data
X_test = test_data.loc[:,~test_data.columns.isin(['user', 'trial','label'])]
y_test = test_data.loc[:,test_data.columns.isin(['label'])]

In [167]:
clf = KNeighborsClassifier(3)
clf.fit(X_train, y_train.values.ravel())
score = clf.score(X_test, y_test)
print("Score: ", score)
y_pred = clf.predict(X_test)


Score:  0.16123075861116865


In [168]:
y_pred

array([4., 4., 4., ..., 0., 0., 0.])

`Using LUA as training and RUA as testing data`

In [169]:
train_data = all_data_trials_1236[lua_sensors]
test_data = all_data_trials_45[rua_sensors]

# Dedife train data
X_train = train_data.loc[:,~train_data.columns.isin(['user', 'trial','label'])]
y_train = train_data.loc[:,train_data.columns.isin(['label'])]

#Define test data
X_test = test_data.loc[:,~test_data.columns.isin(['user', 'trial','label'])]
y_test = test_data.loc[:,test_data.columns.isin(['label'])]

In [170]:
clf = KNeighborsClassifier(3)
clf.fit(X_train, y_train.values.ravel())
score = clf.score(X_test, y_test)
print("Score: ", score)
y_pred = clf.predict(X_test)

Score:  0.4174380675306356


In [175]:
y_pred

array([1., 1., 1., ..., 0., 0., 0.])

# Transfer using probability distribution (2)

### Find the mean and std of each class on the transfer sensor and apply the same transformation as in the previous section considering the class. 

In [205]:
transfer_task = all_data[rua_sensors]