# Practical Session 2: Transfer Learning for Activity Recognition 

`Librerires`

In [194]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import mode
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.utils.multiclass import unique_labels
from sklearn.gaussian_process.kernels import RBF
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

`Data and files`

In [195]:
path = 'C:\\Users\\s7277028\\Documents\\ARSSD\\Practicas\\Practica1\\OpportunityUCIDataset\\dataset\\' 
header_path = 'C:\\Users\\s7277028\\Documents\\ARSSD\\Practicas\\Practica1\\header.csv' 
header=pd.read_csv(header_path, names=['column',''])['column'].values
users = range(1,5)
trials = range(1,7)
all_data = pd.DataFrame()

`Read Data`

In [196]:
for user in users:
    for trial in trials:
        if trial==6:
            filePath= path+'S'+str(user)+'-Drill.dat'
        else:
            filePath = path+'S'+str(user)+'-ADL'+str(trial)+'.dat'
        sadl1 = pd.read_csv(filePath, sep=' ', header=None)
        data = sadl1.iloc[:, :243]
        data.columns = header
        data=data[data.columns[np.r_[0,50:53,76:79]]]
        labels = sadl1.iloc[:,243]
        #find and remove rows with all nulls
        idx=data.index[data.isnull().all(1)]
        data = data[~data.index.isin(idx)]
        labels = labels[~labels.index.isin(idx)]
        
        #fill missing values
        data = data.fillna(method='ffill',axis=1)
        data['user'] = user
        data['trial'] = trial
        
        #Pre-processing: filtering
        #only data columns
        columns = data.columns[~data.columns.isin(['user', 'trial', 'MILLISEC'])]
        filtered_data = data[columns].rolling(11).median()
        filtered_data['MILLISEC'] = data.MILLISEC
        
        #separate train and test'

        #segmentation

        filtered_data['time']=pd.to_datetime(data.MILLISEC, unit='ms')
        filtered_data.index=filtered_data.time
        keep = filtered_data.time.dt.microsecond/1000 %500
        keep = keep - keep.shift() < 0
        
        #Feature extraction - only for first 132 columns
        means = filtered_data[columns].rolling('1S').mean()[keep]
        means.columns = [str(col) + '_mean' for col in means.columns]
        variances = filtered_data[columns].rolling('1S').var()[keep]
        variances.columns = [str(col) + '_var' for col in variances.columns]

        #talk about apply function
        labels.index = filtered_data.time
        mode_labels = labels.rolling('1S').apply(lambda x:mode(x)[0])[keep]

        #all features
        all_features = pd.concat([means, variances], axis=1)
        all_features['label'] = mode_labels 
        all_features['user'] = user
        all_features['trial'] = trial
        all_data = pd.concat([all_data, all_features])



In [197]:
all_data.columns

Index(['InertialMeasurementUnit_RUA_accX_mean',
       'InertialMeasurementUnit_RUA_accY_mean',
       'InertialMeasurementUnit_RUA_accZ_mean',
       'InertialMeasurementUnit_LUA_accX_mean',
       'InertialMeasurementUnit_LUA_accY_mean',
       'InertialMeasurementUnit_LUA_accZ_mean',
       'InertialMeasurementUnit_RUA_accX_var',
       'InertialMeasurementUnit_RUA_accY_var',
       'InertialMeasurementUnit_RUA_accZ_var',
       'InertialMeasurementUnit_LUA_accX_var',
       'InertialMeasurementUnit_LUA_accY_var',
       'InertialMeasurementUnit_LUA_accZ_var', 'label', 'user', 'trial'],
      dtype='object')

## Using the data from trials 1, 2, 3 and Drill Session as training data and the data from trials 4 and 5 as test perform two initial evaluations:

In [198]:
#Data from trials 1,2,3 and drill with all users
all_data_trials_1236 = all_data[all_data.trial.isin(['1','2','3','6'])]
# #Data from user 4 with all trials
all_data_trials_45 = all_data[all_data.trial.isin(['4','5'])]
print("users: ", dict.fromkeys(all_data_trials_45.user),"| trial: ",dict.fromkeys(all_data_trials_45.trial))
print("users: ",dict.fromkeys(all_data_trials_1236.user),"| trial: ",dict.fromkeys(all_data_trials_1236.trial))

users:  {1: None, 2: None, 3: None, 4: None} | trial:  {4: None, 5: None}
users:  {1: None, 2: None, 3: None, 4: None} | trial:  {1: None, 2: None, 3: None, 6: None}


`sensors`

In [199]:
rua_sensors = ['InertialMeasurementUnit_RUA_accX_mean',
       'InertialMeasurementUnit_RUA_accX_var',
       'InertialMeasurementUnit_RUA_accY_mean',
       'InertialMeasurementUnit_RUA_accY_var',
       'InertialMeasurementUnit_RUA_accZ_mean',
       'InertialMeasurementUnit_RUA_accZ_var','label']
lua_sensors = ['InertialMeasurementUnit_LUA_accX_mean',
       'InertialMeasurementUnit_LUA_accX_var',
       'InertialMeasurementUnit_LUA_accY_mean',
       'InertialMeasurementUnit_LUA_accY_var',
       'InertialMeasurementUnit_LUA_accZ_mean',
       'InertialMeasurementUnit_LUA_accZ_var','label']

 ## Original and transfer tasks

In [200]:
original_task = all_data[lua_sensors]
original_task_trials_1236 = all_data_trials_1236[lua_sensors]
transfer_task = all_data[rua_sensors]
transfer_task_trials_1236 = all_data_trials_1236[rua_sensors]

In [201]:
scaler = StandardScaler()
print(scaler.fit(original_task_trials_1236))
colunmns = original_task_trials_1236.columns
original_task_trials_1236 = scaler.transform(original_task_trials_1236)

StandardScaler(copy=True, with_mean=True, with_std=True)


In [202]:
type(original_task_trials_1236)
original_task_trials_1236 = pd.DataFrame(original_task_trials_1236, columns=colunmns)

In [203]:
scaler = StandardScaler()
print(scaler.fit(transfer_task_trials_1236))
transfer_mean = scaler.mean_
print(transfer_mean)
transfer_std = scaler.scale_
print(transfer_std)

StandardScaler(copy=True, with_mean=True, with_std=True)
[2.39607113e+04 2.46833914e+08 2.49231982e+04 2.46506781e+08
 2.50088188e+04 2.46440479e+08 1.60095006e+00]
[1.82551143e+05 1.17054866e+10 1.82421461e+05 1.16927436e+10
 1.82409816e+05 1.16899279e+10 1.32615246e+00]


In [204]:
for name_column, std, mean in zip(original_task_trials_1236.columns.values,transfer_std,transfer_mean):
    serie = original_task_trials_1236[name_column]
    serie = serie*std + mean
    original_task_trials_1236[name_column] = serie

original_task_trials_1236.head()

Unnamed: 0,InertialMeasurementUnit_LUA_accX_mean,InertialMeasurementUnit_LUA_accX_var,InertialMeasurementUnit_LUA_accY_mean,InertialMeasurementUnit_LUA_accY_var,InertialMeasurementUnit_LUA_accZ_mean,InertialMeasurementUnit_LUA_accZ_var,label
0,-995.043499,4426.61436,383.625642,-2730.407356,-28.091734,14567.33228,0.0
1,-978.234123,4591.476925,381.60226,-2821.232997,-60.567574,14846.070014,0.0
2,-974.87701,4562.78617,339.00172,505.61211,-74.624566,14751.422064,0.0
3,-981.810282,4686.27632,230.291315,4192.531813,-42.991573,15861.230425,0.0
4,-968.74373,7138.387294,72.457936,12591.761643,12.0745,15785.839262,0.0


In [205]:
classifier =RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1, random_state= 1)
classifier.fit(original_task_trials_1236, all_data_trials_1236['label'])

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=5, max_features=1, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=1, verbose=0, warm_start=False)

In [206]:
X_test = all_data_trials_45[rua_sensors]
y_test = all_data_trials_45['label']
score = classifier.score(X_test, y_test)
print("Score transfer", score)
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred))

Score transfer 0.7966101694915254
              precision    recall  f1-score   support

         0.0       1.00      0.77      0.87      3597
         1.0       0.68      1.00      0.81      4990
         2.0       0.96      0.54      0.69      3506
         4.0       0.78      0.93      0.85      2909
         5.0       0.99      0.13      0.23       633

   micro avg       0.80      0.80      0.80     15635
   macro avg       0.88      0.67      0.69     15635
weighted avg       0.85      0.80      0.78     15635



In [207]:
train_data = all_data_trials_1236[lua_sensors]
test_data = all_data_trials_45[rua_sensors]

# Dedife train data
X_train = train_data.loc[:,~train_data.columns.isin(['user', 'trial','label'])]
y_train = train_data.loc[:,train_data.columns.isin(['label'])]

#Define test data
X_test = test_data.loc[:,~test_data.columns.isin(['user', 'trial','label'])]
y_test = test_data.loc[:,test_data.columns.isin(['label'])]

In [208]:
classifier =RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
classifier.fit(X_train, y_train)
score = classifier.score(X_test, y_test)
print("Score No transfer with LUA and RUA:", score)
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred))

  


Score No transfer with LUA and RUA: 0.3706427886152862
              precision    recall  f1-score   support

         0.0       0.85      0.05      0.10      3597
         1.0       0.34      0.66      0.45      4990
         2.0       0.44      0.63      0.52      3506
         4.0       0.04      0.01      0.01      2909
         5.0       1.00      0.08      0.14       633

   micro avg       0.37      0.37      0.37     15635
   macro avg       0.53      0.29      0.25     15635
weighted avg       0.45      0.37      0.29     15635



In [209]:
train_data = all_data_trials_1236[rua_sensors]
test_data = all_data_trials_45[rua_sensors]

# Dedife train data
X_train = train_data.loc[:,~train_data.columns.isin(['user', 'trial','label'])]
y_train = train_data.loc[:,train_data.columns.isin(['label'])]

#Define test data
X_test = test_data.loc[:,~test_data.columns.isin(['user', 'trial','label'])]
y_test = test_data.loc[:,test_data.columns.isin(['label'])]
classifier =RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
classifier.fit(X_train, y_train)
score = classifier.score(X_test, y_test)
print("Score No transfer with RUA in both:", score)
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred))

  if sys.path[0] == '':


Score No transfer with RUA in both: 0.5294531499840103
              precision    recall  f1-score   support

         0.0       0.66      0.18      0.28      3597
         1.0       0.44      0.83      0.58      4990
         2.0       0.52      0.32      0.39      3506
         4.0       0.74      0.66      0.70      2909
         5.0       0.91      0.71      0.80       633

   micro avg       0.53      0.53      0.53     15635
   macro avg       0.65      0.54      0.55     15635
weighted avg       0.58      0.53      0.50     15635



In [210]:
for value in all_data.label.unique():
    print(value)

0.0
1.0
2.0
5.0
4.0


In [211]:
all_data_0 = all_data[all_data.label==0]
all_data_1 = all_data[all_data.label==1]
all_data_2 = all_data[all_data.label==2]
all_data_4 = all_data[all_data.label==4]
all_data_5 = all_data[all_data.label==5]

In [212]:
all_classes = pd.DataFrame()
for value in all_data.label.unique():
    all_data_trials_1236 = all_data[all_data.trial.isin(['1','2','3','6'])]
    all_data_trials_1236 = all_data_trials_1236[all_data_trials_1236.label == value]
    all_data_trials_45 = all_data[all_data.trial.isin(['4','5'])]
    all_data_trials_45 = all_data_trials_45[all_data_trials_45.label == value]
    original_task_trials_1236 = all_data_trials_1236[lua_sensors]
    transfer_task_trials_1236 = all_data_trials_1236[rua_sensors]
    scaler.fit(original_task_trials_1236)
    colunmns = original_task_trials_1236.columns
    original_task_trials_1236 = scaler.transform(original_task_trials_1236)
    original_task_trials_1236 = pd.DataFrame(original_task_trials_1236, columns=colunmns)
    scaler.fit(transfer_task_trials_1236)
    transfer_mean = scaler.mean_
    transfer_std = scaler.scale_
    for name_column, std, mean in zip(original_task_trials_1236.columns.values,transfer_std,transfer_mean):
        serie = original_task_trials_1236[name_column]
        serie = serie*std + mean
        original_task_trials_1236[name_column] = serie

    original_task_trials_1236.head()
    all_classes = pd.concat([all_classes, original_task_trials_1236])

all_data_trials_1236 = all_data[all_data.trial.isin(['1','2','3','6'])]
all_data_trials_45 = all_data[all_data.trial.isin(['4','5'])]
classifier.fit(all_classes, all_data_trials_1236['label'])
X_test = all_data_trials_45[rua_sensors]
y_test = all_data_trials_45['label']
score = classifier.score(X_test, y_test)
print("Score transfer", score)
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred))

Score transfer 0.319027822193796
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      3597
         1.0       0.32      1.00      0.48      4990
         2.0       0.00      0.00      0.00      3506
         4.0       0.00      0.00      0.00      2909
         5.0       0.00      0.00      0.00       633

   micro avg       0.32      0.32      0.32     15635
   macro avg       0.06      0.20      0.10     15635
weighted avg       0.10      0.32      0.15     15635



  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [213]:
all_classes.label.unique()

array([0., 1., 2., 5., 4.])

In [214]:
len(all_data_trials_1236['label'])

42313

In [215]:
len(all_classes)

42313