# Experimenting with new normalization

- this playbook contains the code for the experimentation with new device

In [1]:
import os
import csv
import pandas as pd

version = "v6"
dataset_path = f"../data/processed/"

# files
filename_s = f"{version}-2_stages-full_statistical.csv"
filename_s_n = f"{version}-2_stages-full_statistical_normalized.csv"
filename_s_s = f"{version}-2_stages-full_statistical_standardized.csv"
filename_f = f"{version}-2_stages-fourier_int_XYZ.csv"
filename_f_n = f"{version}-2_stages-fourier_normalized_int_XYZ.csv"
filename_f_s = f"{version}-2_stages-fourier_standardized_XYZ.csv"


# datasets
df_statistical = pd.read_csv(f"{dataset_path}/{filename_s}")
df_statistical_normalized = pd.read_csv(f"{dataset_path}/{filename_s_n}")
df_statistical_standardized = pd.read_csv(f"{dataset_path}/{filename_s_s}")
df_spectral = pd.read_csv(f"{dataset_path}/{filename_f}")
df_spectral_normalized = pd.read_csv(f"{dataset_path}/{filename_f_n}")
df_spectral_standardized = pd.read_csv(f"{dataset_path}/{filename_f_s}")

In [2]:
from lib import device_mapping, reverse_device_mapping

# ------------------------------------------------
# choice of dataset

# df = df_statistical # 56, 67
# df = df_statistical_normalized  # 54 63
# df = df_statistical_standardized    # 66, 72
# df = df_spectral    # 74, 77
# df = df_spectral_normalized #77 78
# df = df_spectral_standardized   #89, 80
datasets = [df_spectral, df_spectral_normalized, df_spectral_standardized]
dataset_names = ["raw", "normalized", "standardized"]

# ------------------------------------------------
# choice of phone

# GooglePixel6:        03575768cc23b2df
# RedmiNote8PRO:       4aaf95a621ccf092
# SamsungGalaxyA51:    029a77f196804217
# SamsungGalaxyS6:     e08d976ac75c011e


# df = df[df['device_id'].isin(reverse_device_mapping(['RedmiNote8PRO']))]
# df = df[df['device_id'].isin(reverse_device_mapping(['SamsungGalaxyS6']))]
# df = df[df['device_id'].isin(reverse_device_mapping(['GooglePixel6', 'RedmiNote8PRO']))]
# df = df[df['device_id'].isin(reverse_device_mapping(['SamsungGalaxyA51', 'SamsungGalaxyS6']))]

# ------------------------------------------------
# choice of locations to include TOOD

# number of cases per device
# df['device_id'].value_counts()


In [3]:
def map_classes(x):
    if x == 5:
        return 1
    return 0
def map_back_classes(x):
    if x == 1:
        return 5
    return 6

In [4]:
df = datasets[0]
# 03575768cc23b2df
device = "GooglePixel6"
device_id = reverse_device_mapping(device)[0]
print(device_id)
df['device_id'] == device_id

03575768cc23b2df


0        False
1        False
2        False
3        False
4        False
         ...  
37414    False
37415    False
37416    False
37417    False
37418    False
Name: device_id, Length: 37419, dtype: bool

In [5]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from lib import device_mapping

results_svm_all = []
results_rf_all = []
for device in ["GooglePixel6", "RedmiNote8PRO", "SamsungGalaxyA51","SamsungGalaxyS6"]:
    results_svm  = [device]
    results_RF  = [device]
    for i, dataset in enumerate(datasets):
        df = dataset
        
        dataset_name = dataset_names[i]
        # test is one device, train the rest
        test = df[df['device_id'] == reverse_device_mapping(device)[0]]
        train = df[df['device_id'] != reverse_device_mapping(device)[0]]

        # correlation and feature selection
        correlation = df.iloc[:, 6:].apply(lambda x: df['label'].corr(x, method="pearson")).abs().sort_values(ascending=False)
        selection = correlation[:10].index

        y_test = test['label'].to_numpy()
        X_test = test[selection].to_numpy()
        y_train = train['label'].to_numpy()
        X_train = train[selection].to_numpy()

        # SVM
        clf = svm.SVC(decision_function_shape='ovo')
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        acc_svm = sum([1 for i in range(len(y_pred)) if y_pred[i] == y_test[i]])/len(y_pred)
        f1_svm = f1_score(list(map(map_classes, y_test)), list(map(map_classes, y_pred)))

        # RF
        clf = RandomForestClassifier(max_depth=3)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        acc_rf = sum([1 for i in range(len(y_pred)) if y_pred[i] == y_test[i]])/len(y_pred)
        f1_rf = f1_score(list(map(map_classes, y_test)), list(map(map_classes, y_pred)))

        device_name = device_mapping[test['device_id'].iloc[0]]
        location = test['location'].iloc[0]
        #results_temp = [device, acc_svm, f1_svm, acc_rf, f1_rf]
        results_svm.append(acc_svm)
        results_RF.append(acc_rf)
    print(results_svm)
    print(results_RF)
    results_svm_all.append(results_svm)
    results_rf_all.append(results_RF)

results_all_svm = pd.DataFrame(results_svm_all, columns=['Device', "raw", "normalized", "standardized"])
results_all_rf = pd.DataFrame(results_rf_all, columns=['Device', "raw", "normalized", "standardized"])
results_all_svm

['GooglePixel6', 0.5291985501409585, 0.5014671192681663, 0.5915079684713193]
['GooglePixel6', 0.7895978367182556, 0.5872504458891894, 0.8100799723836373]
['RedmiNote8PRO', 0.8122270742358079, 0.24828446662507797, 0.5009357454772302]
['RedmiNote8PRO', 0.66126013724267, 0.5009357454772302, 0.5009357454772302]
['SamsungGalaxyA51', 0.5558933582787652, 0.5515279077019021, 0.6269098846273776]
['SamsungGalaxyA51', 0.5732772061116308, 0.5490333645151232, 0.7143748051138136]
['SamsungGalaxyS6', 0.5341537364009274, 0.5097199928660603, 0.478330658105939]
['SamsungGalaxyS6', 0.4893882646691635, 0.5359372213304797, 0.7417513822008204]


NameError: name 'results_svm_RF' is not defined

In [7]:
results_all_svm

Unnamed: 0,Device,raw,normalized,standardized
0,GooglePixel6,0.529199,0.501467,0.591508
1,RedmiNote8PRO,0.812227,0.248284,0.500936
2,SamsungGalaxyA51,0.555893,0.551528,0.62691
3,SamsungGalaxyS6,0.534154,0.50972,0.478331


In [8]:
results_all_rf

Unnamed: 0,Device,raw,normalized,standardized
0,GooglePixel6,0.789598,0.58725,0.81008
1,RedmiNote8PRO,0.66126,0.500936,0.500936
2,SamsungGalaxyA51,0.573277,0.549033,0.714375
3,SamsungGalaxyS6,0.489388,0.535937,0.741751


In [9]:
results_all_svm.mean()

  results_all_svm.mean()


raw             0.607868
normalized      0.452750
standardized    0.549421
dtype: float64

In [10]:
results_all_rf.mean()

  results_all_rf.mean()


raw             0.628381
normalized      0.543289
standardized    0.691785
dtype: float64

In [12]:
print(results_all_svm.sort_values(['Device']).round(3).to_latex(index=False))

\begin{tabular}{lrrr}
\toprule
          Device &   raw &  normalized &  standardized \\
\midrule
    GooglePixel6 & 0.529 &       0.501 &         0.592 \\
   RedmiNote8PRO & 0.812 &       0.248 &         0.501 \\
SamsungGalaxyA51 & 0.556 &       0.552 &         0.627 \\
 SamsungGalaxyS6 & 0.534 &       0.510 &         0.478 \\
\bottomrule
\end{tabular}



  print(results_all_svm.sort_values(['Device']).round(3).to_latex(index=False))


In [13]:
print(results_all_svm.sort_values(['Device']).round(3).to_latex(index=False))

\begin{tabular}{lrrr}
\toprule
          Device &   raw &  normalized &  standardized \\
\midrule
    GooglePixel6 & 0.529 &       0.501 &         0.592 \\
   RedmiNote8PRO & 0.812 &       0.248 &         0.501 \\
SamsungGalaxyA51 & 0.556 &       0.552 &         0.627 \\
 SamsungGalaxyS6 & 0.534 &       0.510 &         0.478 \\
\bottomrule
\end{tabular}



  print(results_all_svm.sort_values(['Device']).round(3).to_latex(index=False))
