## Neural Network models - Leave One Participant Out CV to predict properties using 3 subwindows

In [1]:
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", None)
#pd.set_option("display.max_rows", None)
from pandas.core.common import SettingWithCopyWarning

import warnings
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
warnings.simplefilter(action="ignore", category=UserWarning)

import random
import datetime
import time
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, ConcatDataset
import torch.optim as optim



In [2]:
# Initialise the random state
#num = random.randint(1, 500)
num = 58
torch.manual_seed(num)
np.random.seed(num)
print(f"The generated random seed is {num}") #347

The generated random seed is 58


### Load data

In [14]:
path = "complete_dataset_"+str(15)+"subwindows_"+str(10)+"slices.csv"
df = pd.read_csv(path)
#data.iloc[12959:12965,:]



In [15]:
# Remove enjoyment as we are only considering physical properties
print(df.shape)
physical_df = df[df.property_name!='enjoyment']
physical_df.reset_index(inplace=True, drop=True)
physical_df.shape

(75588, 190)


(62989, 190)

In [16]:
existing_data = physical_df.iloc[:36000,:]
new_data = physical_df.iloc[36000:,:]

### Normalise the data

In [17]:
starting_index = 10
# Obtain a df of features
features_df = physical_df.iloc[:,starting_index:]
features_df.head()


Unnamed: 0,max_ch1_hand0,max_ch2_hand0,max_ch3_hand0,max_ch4_hand0,max_ch5_hand0,max_ch6_hand0,max_ch7_hand0,max_ch8_hand0,mean_ch1_hand0,mean_ch2_hand0,mean_ch3_hand0,mean_ch4_hand0,mean_ch5_hand0,mean_ch6_hand0,mean_ch7_hand0,mean_ch8_hand0,std_ch1_hand0,std_ch2_hand0,std_ch3_hand0,std_ch4_hand0,std_ch5_hand0,std_ch6_hand0,std_ch7_hand0,std_ch8_hand0,max_ch1_hand1,max_ch2_hand1,max_ch3_hand1,max_ch4_hand1,max_ch5_hand1,max_ch6_hand1,max_ch7_hand1,max_ch8_hand1,mean_ch1_hand1,mean_ch2_hand1,mean_ch3_hand1,mean_ch4_hand1,mean_ch5_hand1,mean_ch6_hand1,mean_ch7_hand1,mean_ch8_hand1,std_ch1_hand1,std_ch2_hand1,std_ch3_hand1,std_ch4_hand1,std_ch5_hand1,std_ch6_hand1,std_ch7_hand1,std_ch8_hand1,max_Ax_hand0,max_Ay_hand0,max_Az_hand0,max_Vx_hand0,max_Vy_hand0,max_Vz_hand0,max_Jx_hand0,max_Jy_hand0,max_Jz_hand0,mean_Ax_hand0,mean_Ay_hand0,mean_Az_hand0,mean_Vx_hand0,mean_Vy_hand0,mean_Vz_hand0,mean_Jx_hand0,mean_Jy_hand0,mean_Jz_hand0,std_Ax_hand0,std_Ay_hand0,std_Az_hand0,std_Vx_hand0,std_Vy_hand0,std_Vz_hand0,std_Jx_hand0,std_Jy_hand0,std_Jz_hand0,max_Ax_hand1,max_Ay_hand1,max_Az_hand1,max_Vx_hand1,max_Vy_hand1,max_Vz_hand1,max_Jx_hand1,max_Jy_hand1,max_Jz_hand1,mean_Ax_hand1,mean_Ay_hand1,mean_Az_hand1,mean_Vx_hand1,mean_Vy_hand1,mean_Vz_hand1,mean_Jx_hand1,mean_Jy_hand1,mean_Jz_hand1,std_Ax_hand1,std_Ay_hand1,std_Az_hand1,std_Vx_hand1,std_Vy_hand1,std_Vz_hand1,std_Jx_hand1,std_Jy_hand1,std_Jz_hand1,max_w_hand0,max_x_hand0,max_y_hand0,max_z_hand0,max_AVx_hand0,max_AVy_hand0,max_AVz_hand0,max_AAx_hand0,max_AAy_hand0,max_AAz_hand0,max_AJx_hand0,max_AJy_hand0,max_AJz_hand0,mean_w_hand0,mean_x_hand0,mean_y_hand0,mean_z_hand0,mean_AVx_hand0,mean_AVy_hand0,mean_AVz_hand0,mean_AAx_hand0,mean_AAy_hand0,mean_AAz_hand0,mean_AJx_hand0,mean_AJy_hand0,mean_AJz_hand0,std_w_hand0,std_x_hand0,std_y_hand0,std_z_hand0,std_AVx_hand0,std_AVy_hand0,std_AVz_hand0,std_AAx_hand0,std_AAy_hand0,std_AAz_hand0,std_AJx_hand0,std_AJy_hand0,std_AJz_hand0,max_w_hand1,max_x_hand1,max_y_hand1,max_z_hand1,max_AVx_hand1,max_AVy_hand1,max_AVz_hand1,max_AAx_hand1,max_AAy_hand1,max_AAz_hand1,max_AJx_hand1,max_AJy_hand1,max_AJz_hand1,mean_w_hand1,mean_x_hand1,mean_y_hand1,mean_z_hand1,mean_AVx_hand1,mean_AVy_hand1,mean_AVz_hand1,mean_AAx_hand1,mean_AAy_hand1,mean_AAz_hand1,mean_AJx_hand1,mean_AJy_hand1,mean_AJz_hand1,std_w_hand1,std_x_hand1,std_y_hand1,std_z_hand1,std_AVx_hand1,std_AVy_hand1,std_AVz_hand1,std_AAx_hand1,std_AAy_hand1,std_AAz_hand1,std_AJx_hand1,std_AJy_hand1,std_AJz_hand1
0,0.5,0.615385,0.670391,0.495868,0.759036,0.652406,0.75625,0.666667,0.497805,0.613462,0.665764,0.492188,0.718656,0.636197,0.748926,0.660851,0.002324,0.002503,0.003077,0.002455,0.006501,0.004039,0.003061,0.002083,0.508197,0.908451,0.751445,0.730159,0.493878,0.995918,0.648515,0.576037,0.49667,0.846831,0.691595,0.636244,0.491412,0.608674,0.601382,0.558852,0.004198,0.014491,0.015432,0.026432,0.002186,0.218816,0.022186,0.006429,-0.857056,-0.084885,0.479568,0.000208,0.000215,0.000349,0.718061,0.488281,1.206342,-0.86311,-0.091721,0.468826,9.082031e-06,-2.8e-05,-6e-06,0.036854,0.039343,0.092439,0.006753,0.007619,0.010341,0.000193,0.000245,0.000281,0.488563,0.441082,0.750762,0.98616,0.013489,-0.211105,0.001218,0.00051,0.000937,1.448006,0.708008,2.34375,0.964774,-0.007117,-0.249875,0.0001117188,7e-05,2.7e-05,-0.10478,0.096063,0.335256,0.022364,0.013963,0.028878,0.000738,0.000331,0.000943,1.260192,0.571902,1.565671,0.179352,0.484946,0.160615,-0.840857,0.009831,0.012959,0.117029,1.270143,2.238883,19.812834,154.113175,104.818227,1338.775861,0.178765,0.484513,0.160469,-0.841154,-0.016003,-0.027618,-0.081836,0.129868,0.55289,-1.016133,16.779031,-45.915024,210.907738,0.000451,0.000472,0.00021,0.000335,0.016423,0.031903,0.20044,1.160057,1.546528,13.337295,97.220032,155.178181,1114.885458,0.600343,-0.175433,-0.769104,-0.129347,0.140072,0.440644,0.126734,5.745156,20.612016,5.331893,212.817696,779.034203,579.105295,0.59826,-0.176827,-0.770695,-0.129768,0.053843,0.125821,-0.043329,0.487977,0.515185,-1.737315,-44.05619,-91.541428,81.034379,0.002302,0.000906,0.001676,0.00051,0.073437,0.28063,0.115024,3.958058,13.903979,6.202001,170.597999,792.76994,449.786951
1,0.5,0.615385,0.670391,0.495868,0.73494,0.641711,0.75625,0.666667,0.497331,0.612607,0.665735,0.492597,0.717746,0.635807,0.749609,0.661458,0.002484,0.002791,0.003327,0.002405,0.008698,0.005318,0.003954,0.002901,0.504098,0.929578,0.757225,0.714286,0.497959,0.734694,0.633663,0.571429,0.497375,0.849582,0.692467,0.635417,0.491965,0.503827,0.59607,0.557604,0.002954,0.013122,0.013631,0.022183,0.002619,0.111327,0.012712,0.004858,-0.857056,-0.084885,0.474686,9.5e-05,0.000256,0.000317,0.563401,0.94401,1.878005,-0.864478,-0.095041,0.466776,9.765625e-08,-1.6e-05,-6.7e-05,0.069561,-0.096004,0.077474,0.0049,0.009589,0.009925,9.2e-05,0.000279,0.000253,0.402547,1.013362,1.039248,0.99202,0.002258,-0.233078,0.000466,0.000281,0.00083,1.437717,1.25558,1.489258,0.974197,-0.014669,-0.24968,-1.163737e-05,-0.000108,0.000176,0.015346,-0.161628,0.19922,0.013709,0.010763,0.014762,0.000277,0.000409,0.00045,0.806361,1.094488,1.2517,0.176189,0.485587,0.159029,-0.841541,0.133141,0.061469,0.043901,4.33232,2.887801,12.346117,165.242446,140.565259,615.46916,0.175885,0.485399,0.158387,-0.841644,0.050176,0.020933,-0.022867,0.428492,0.224097,3.416566,-216.183273,-80.686083,232.653044,0.000331,0.000123,0.000423,7.6e-05,0.063817,0.036748,0.077514,3.233995,1.55389,5.736241,586.244076,234.136553,556.915683,0.603038,-0.175326,-0.766516,-0.128901,0.115731,0.321439,0.121293,5.012583,12.815405,7.55585,224.865107,706.861321,549.917517,0.600378,-0.176324,-0.769133,-0.129948,0.030569,0.132933,-0.023579,1.005235,2.467654,-1.775671,2.8928,-87.699311,-108.119283,0.001406,0.00082,0.001312,0.000867,0.05574,0.140247,0.158323,2.578049,8.649329,8.466336,187.474756,696.353488,622.765618
2,0.5,0.620513,0.670391,0.495868,0.740964,0.647059,0.75625,0.666667,0.497504,0.612607,0.666201,0.492511,0.718373,0.636809,0.748307,0.660995,0.002042,0.002791,0.002938,0.002199,0.007508,0.004658,0.003347,0.002685,0.5,0.866197,0.699422,0.645503,0.493878,0.685714,0.618812,0.571429,0.497268,0.847711,0.691595,0.634039,0.491327,0.476105,0.594884,0.557988,0.003087,0.005556,0.003264,0.004274,0.001997,0.092711,0.008191,0.005364,-0.852173,-0.082443,0.475174,0.00033,0.000281,0.00022,0.906808,0.449219,0.592913,-0.862817,-0.094943,0.466385,-1.132812e-05,9.8e-05,-5e-05,0.069983,0.019696,-0.033961,0.010145,0.009021,0.00605,0.000275,0.000182,0.000209,0.628876,0.446873,0.478738,0.991043,-0.003113,-0.19841,0.000416,0.000427,0.001187,0.785496,0.806726,1.159668,0.972586,-0.017859,-0.217062,-4.882813e-07,-1e-05,0.000148,0.211373,-0.105115,-0.080995,0.014115,0.009521,0.014976,0.000383,0.000359,0.000618,0.589524,1.04007,0.992292,0.17542,0.486364,0.159025,-0.841592,0.0043,0.006697,0.05908,2.017495,1.433494,8.71026,554.574503,268.044509,424.425134,0.174108,0.486077,0.15771,-0.84175,-0.032439,-0.014993,-0.048357,-1.806178,-0.672885,-1.05721,49.81464,3.745668,-139.450471,0.000775,0.000376,0.000817,0.000121,0.022678,0.016039,0.110523,4.044649,2.649872,7.347502,311.488508,208.452524,736.223136,0.613327,-0.176585,-0.758505,-0.131125,0.13947,0.398959,0.045814,4.164473,12.76164,10.404938,649.805789,1527.420437,519.653553,0.610256,-0.176983,-0.760867,-0.131678,0.051381,0.248855,0.023004,-0.510481,-0.3824,1.409711,64.171485,18.288726,-53.628672,0.002365,0.000324,0.001801,0.000421,0.085341,0.122248,0.020458,4.730311,9.167239,5.244792,380.644859,894.676534,492.696123
3,0.5,0.615385,0.675978,0.495868,0.722892,0.647059,0.7625,0.666667,0.497676,0.612607,0.666201,0.492166,0.718373,0.637478,0.749349,0.660417,0.002544,0.002583,0.002703,0.002129,0.003623,0.003481,0.003469,0.003366,0.504098,0.866197,0.705202,0.650794,0.493878,0.787755,0.628713,0.576037,0.497183,0.848591,0.691835,0.633764,0.492092,0.540434,0.600093,0.559476,0.002839,0.005657,0.004265,0.006547,0.002057,0.109162,0.0141,0.006607,-0.857056,-0.068771,0.482498,0.000286,0.00026,0.000278,1.269531,0.719572,0.77097,-0.866528,-0.084103,0.476443,-6.630859e-05,-5.4e-05,5.4e-05,-0.025835,-0.170118,0.186197,0.006789,0.011025,0.006789,0.000228,0.000194,0.000224,0.777207,0.56881,0.501869,0.97493,0.005676,-0.178879,0.000523,0.000256,0.00122,1.185826,1.139323,1.302083,0.96435,-0.008809,-0.191086,-0.0002908529,-5.9e-05,0.000385,-0.162318,-0.022144,0.332164,0.012866,0.013375,0.010642,0.000824,0.000434,0.000794,1.224161,1.199073,1.068872,0.174454,0.486522,0.156278,-0.841936,0.006764,0.005302,0.04293,2.314447,0.774435,11.379475,115.560261,65.426803,662.709244,0.172737,0.486404,0.154664,-0.842407,-0.027498,-0.00142,-0.110571,0.531482,0.106488,-0.267177,-34.985147,14.486963,-186.83649,0.001145,0.000209,0.001076,0.000314,0.036174,0.004493,0.144953,1.566252,0.466043,9.341831,160.556495,37.410321,921.217386,0.61868,-0.177124,-0.753378,-0.131839,0.173872,0.320532,-0.069988,2.60877,1.143752,-1.314365,-33.100067,3.814586,208.877911,0.617947,-0.177514,-0.754276,-0.133027,0.088206,0.149616,-0.131999,-3.232508,-7.028403,-2.62541,-252.208344,-303.849649,-8.786893,0.001179,0.000635,0.001239,0.001194,0.115058,0.195463,0.05499,6.219711,7.714367,2.122456,193.986947,290.84036,216.525244
4,0.5,0.615385,0.670391,0.495868,0.73494,0.647059,0.7625,0.666667,0.497418,0.612286,0.665502,0.491822,0.719126,0.63681,0.748958,0.660532,0.002503,0.002747,0.002966,0.001806,0.004051,0.003087,0.00434,0.002863,0.5,0.859155,0.693642,0.640212,0.493878,0.673469,0.608911,0.56682,0.496542,0.848371,0.691113,0.63426,0.491008,0.493941,0.59491,0.557604,0.002553,0.004341,0.002891,0.003333,0.002143,0.098174,0.009725,0.005128,-0.857056,-0.075119,0.490311,0.000216,0.000406,0.000343,0.830078,0.600962,0.78125,-0.863013,-0.090549,0.4741,3.515625e-06,0.000115,-0.000121,0.139281,0.16778,-0.097263,0.005067,0.009197,0.012057,0.000156,0.000176,0.000394,0.446644,0.262082,0.830701,1.02327,0.007141,-0.189621,0.00069,0.000289,0.000414,4.089355,4.516602,1.617432,0.979603,-0.017552,-0.231474,-6.821987e-05,1.3e-05,-9.9e-05,0.569523,0.594643,-1.278555,0.024091,0.018184,0.046005,0.000512,0.000253,0.000316,2.398816,1.957802,3.178717,0.171828,0.486377,0.155308,-0.842687,0.051942,0.046418,0.028305,2.167587,1.173944,3.115225,116.744319,42.480494,240.661919,0.171385,0.486033,0.154898,-0.842855,0.023378,0.019177,-0.00175,-0.178144,-0.205674,0.458694,-12.632382,-27.099915,64.146291,0.000234,0.000225,0.00044,9.6e-05,0.022904,0.020641,0.02209,1.303469,1.026663,1.524111,108.642753,66.800589,151.705323,0.617756,-0.179027,-0.753498,-0.133178,0.081987,0.076177,0.275227,21.603607,56.450019,29.424453,2797.763022,9050.317251,2366.953044,0.608768,-0.179851,-0.760911,-0.134045,-0.157557,-0.563161,0.016789,0.834538,2.442042,2.794393,357.828889,1043.185698,84.447301,0.008409,0.0007,0.006875,0.000894,0.1676,0.57271,0.179101,12.984182,39.396991,14.842166,1229.13765,3678.905761,1628.94316


In [18]:
normalised_features_df = features_df.copy()
print(normalised_features_df.shape)

(62989, 180)


In [19]:
# create scaler
scaler = MinMaxScaler(feature_range=(-1,1)) # As this is the range of the activation function - tanh

# fit scaler and apply transform
normalised_features_df[normalised_features_df.columns] = scaler.fit_transform(features_df[features_df.columns])
normalised_features_df.shape

(62989, 180)

In [20]:
df_info = physical_df.iloc[:, :starting_index]
df_info.head()

Unnamed: 0,participant_id,clothes_id,property_id,property_name,interaction_id,rating,rating_level,rating_level_num,sub_window_num,slice_num
0,7,14,0,smoothness,1,3,medium,2,1,1
1,7,14,0,smoothness,1,3,medium,2,1,2
2,7,14,0,smoothness,1,3,medium,2,1,3
3,7,14,0,smoothness,1,3,medium,2,1,4
4,7,14,0,smoothness,1,3,medium,2,1,5


In [21]:
normalised_df = pd.concat([df_info,normalised_features_df], axis=1)
normalised_df.shape

(62989, 190)

In [22]:
normalised_lili = normalised_df.iloc[:10800,:]
normalised_dat_2022 = normalised_df.iloc[10800:,:]


In [23]:
existing_data = physical_df.iloc[:36000,:]
new_data = physical_df.iloc[36000:,:]

existing_data.head(2)


Unnamed: 0,participant_id,clothes_id,property_id,property_name,interaction_id,rating,rating_level,rating_level_num,sub_window_num,slice_num,max_ch1_hand0,max_ch2_hand0,max_ch3_hand0,max_ch4_hand0,max_ch5_hand0,max_ch6_hand0,max_ch7_hand0,max_ch8_hand0,mean_ch1_hand0,mean_ch2_hand0,mean_ch3_hand0,mean_ch4_hand0,mean_ch5_hand0,mean_ch6_hand0,mean_ch7_hand0,mean_ch8_hand0,std_ch1_hand0,std_ch2_hand0,std_ch3_hand0,std_ch4_hand0,std_ch5_hand0,std_ch6_hand0,std_ch7_hand0,std_ch8_hand0,max_ch1_hand1,max_ch2_hand1,max_ch3_hand1,max_ch4_hand1,max_ch5_hand1,max_ch6_hand1,max_ch7_hand1,max_ch8_hand1,mean_ch1_hand1,mean_ch2_hand1,mean_ch3_hand1,mean_ch4_hand1,mean_ch5_hand1,mean_ch6_hand1,mean_ch7_hand1,mean_ch8_hand1,std_ch1_hand1,std_ch2_hand1,std_ch3_hand1,std_ch4_hand1,std_ch5_hand1,std_ch6_hand1,std_ch7_hand1,std_ch8_hand1,max_Ax_hand0,max_Ay_hand0,max_Az_hand0,max_Vx_hand0,max_Vy_hand0,max_Vz_hand0,max_Jx_hand0,max_Jy_hand0,max_Jz_hand0,mean_Ax_hand0,mean_Ay_hand0,mean_Az_hand0,mean_Vx_hand0,mean_Vy_hand0,mean_Vz_hand0,mean_Jx_hand0,mean_Jy_hand0,mean_Jz_hand0,std_Ax_hand0,std_Ay_hand0,std_Az_hand0,std_Vx_hand0,std_Vy_hand0,std_Vz_hand0,std_Jx_hand0,std_Jy_hand0,std_Jz_hand0,max_Ax_hand1,max_Ay_hand1,max_Az_hand1,max_Vx_hand1,max_Vy_hand1,max_Vz_hand1,max_Jx_hand1,max_Jy_hand1,max_Jz_hand1,mean_Ax_hand1,mean_Ay_hand1,mean_Az_hand1,mean_Vx_hand1,mean_Vy_hand1,mean_Vz_hand1,mean_Jx_hand1,mean_Jy_hand1,mean_Jz_hand1,std_Ax_hand1,std_Ay_hand1,std_Az_hand1,std_Vx_hand1,std_Vy_hand1,std_Vz_hand1,std_Jx_hand1,std_Jy_hand1,std_Jz_hand1,max_w_hand0,max_x_hand0,max_y_hand0,max_z_hand0,max_AVx_hand0,max_AVy_hand0,max_AVz_hand0,max_AAx_hand0,max_AAy_hand0,max_AAz_hand0,max_AJx_hand0,max_AJy_hand0,max_AJz_hand0,mean_w_hand0,mean_x_hand0,mean_y_hand0,mean_z_hand0,mean_AVx_hand0,mean_AVy_hand0,mean_AVz_hand0,mean_AAx_hand0,mean_AAy_hand0,mean_AAz_hand0,mean_AJx_hand0,mean_AJy_hand0,mean_AJz_hand0,std_w_hand0,std_x_hand0,std_y_hand0,std_z_hand0,std_AVx_hand0,std_AVy_hand0,std_AVz_hand0,std_AAx_hand0,std_AAy_hand0,std_AAz_hand0,std_AJx_hand0,std_AJy_hand0,std_AJz_hand0,max_w_hand1,max_x_hand1,max_y_hand1,max_z_hand1,max_AVx_hand1,max_AVy_hand1,max_AVz_hand1,max_AAx_hand1,max_AAy_hand1,max_AAz_hand1,max_AJx_hand1,max_AJy_hand1,max_AJz_hand1,mean_w_hand1,mean_x_hand1,mean_y_hand1,mean_z_hand1,mean_AVx_hand1,mean_AVy_hand1,mean_AVz_hand1,mean_AAx_hand1,mean_AAy_hand1,mean_AAz_hand1,mean_AJx_hand1,mean_AJy_hand1,mean_AJz_hand1,std_w_hand1,std_x_hand1,std_y_hand1,std_z_hand1,std_AVx_hand1,std_AVy_hand1,std_AVz_hand1,std_AAx_hand1,std_AAy_hand1,std_AAz_hand1,std_AJx_hand1,std_AJy_hand1,std_AJz_hand1
0,7,14,0,smoothness,1,3,medium,2,1,1,0.5,0.615385,0.670391,0.495868,0.759036,0.652406,0.75625,0.666667,0.497805,0.613462,0.665764,0.492188,0.718656,0.636197,0.748926,0.660851,0.002324,0.002503,0.003077,0.002455,0.006501,0.004039,0.003061,0.002083,0.508197,0.908451,0.751445,0.730159,0.493878,0.995918,0.648515,0.576037,0.49667,0.846831,0.691595,0.636244,0.491412,0.608674,0.601382,0.558852,0.004198,0.014491,0.015432,0.026432,0.002186,0.218816,0.022186,0.006429,-0.857056,-0.084885,0.479568,0.000208,0.000215,0.000349,0.718061,0.488281,1.206342,-0.86311,-0.091721,0.468826,9.082031e-06,-2.8e-05,-6e-06,0.036854,0.039343,0.092439,0.006753,0.007619,0.010341,0.000193,0.000245,0.000281,0.488563,0.441082,0.750762,0.98616,0.013489,-0.211105,0.001218,0.00051,0.000937,1.448006,0.708008,2.34375,0.964774,-0.007117,-0.249875,0.000112,7e-05,2.7e-05,-0.10478,0.096063,0.335256,0.022364,0.013963,0.028878,0.000738,0.000331,0.000943,1.260192,0.571902,1.565671,0.179352,0.484946,0.160615,-0.840857,0.009831,0.012959,0.117029,1.270143,2.238883,19.812834,154.113175,104.818227,1338.775861,0.178765,0.484513,0.160469,-0.841154,-0.016003,-0.027618,-0.081836,0.129868,0.55289,-1.016133,16.779031,-45.915024,210.907738,0.000451,0.000472,0.00021,0.000335,0.016423,0.031903,0.20044,1.160057,1.546528,13.337295,97.220032,155.178181,1114.885458,0.600343,-0.175433,-0.769104,-0.129347,0.140072,0.440644,0.126734,5.745156,20.612016,5.331893,212.817696,779.034203,579.105295,0.59826,-0.176827,-0.770695,-0.129768,0.053843,0.125821,-0.043329,0.487977,0.515185,-1.737315,-44.05619,-91.541428,81.034379,0.002302,0.000906,0.001676,0.00051,0.073437,0.28063,0.115024,3.958058,13.903979,6.202001,170.597999,792.76994,449.786951
1,7,14,0,smoothness,1,3,medium,2,1,2,0.5,0.615385,0.670391,0.495868,0.73494,0.641711,0.75625,0.666667,0.497331,0.612607,0.665735,0.492597,0.717746,0.635807,0.749609,0.661458,0.002484,0.002791,0.003327,0.002405,0.008698,0.005318,0.003954,0.002901,0.504098,0.929578,0.757225,0.714286,0.497959,0.734694,0.633663,0.571429,0.497375,0.849582,0.692467,0.635417,0.491965,0.503827,0.59607,0.557604,0.002954,0.013122,0.013631,0.022183,0.002619,0.111327,0.012712,0.004858,-0.857056,-0.084885,0.474686,9.5e-05,0.000256,0.000317,0.563401,0.94401,1.878005,-0.864478,-0.095041,0.466776,9.765625e-08,-1.6e-05,-6.7e-05,0.069561,-0.096004,0.077474,0.0049,0.009589,0.009925,9.2e-05,0.000279,0.000253,0.402547,1.013362,1.039248,0.99202,0.002258,-0.233078,0.000466,0.000281,0.00083,1.437717,1.25558,1.489258,0.974197,-0.014669,-0.24968,-1.2e-05,-0.000108,0.000176,0.015346,-0.161628,0.19922,0.013709,0.010763,0.014762,0.000277,0.000409,0.00045,0.806361,1.094488,1.2517,0.176189,0.485587,0.159029,-0.841541,0.133141,0.061469,0.043901,4.33232,2.887801,12.346117,165.242446,140.565259,615.46916,0.175885,0.485399,0.158387,-0.841644,0.050176,0.020933,-0.022867,0.428492,0.224097,3.416566,-216.183273,-80.686083,232.653044,0.000331,0.000123,0.000423,7.6e-05,0.063817,0.036748,0.077514,3.233995,1.55389,5.736241,586.244076,234.136553,556.915683,0.603038,-0.175326,-0.766516,-0.128901,0.115731,0.321439,0.121293,5.012583,12.815405,7.55585,224.865107,706.861321,549.917517,0.600378,-0.176324,-0.769133,-0.129948,0.030569,0.132933,-0.023579,1.005235,2.467654,-1.775671,2.8928,-87.699311,-108.119283,0.001406,0.00082,0.001312,0.000867,0.05574,0.140247,0.158323,2.578049,8.649329,8.466336,187.474756,696.353488,622.765618


In [24]:
starting_index = 10

# Create a df of features
existing_features_df = existing_data.iloc[:,starting_index:]
new_features_df = new_data.iloc[:,starting_index:]

# Create a df with the first 10 columns
existing_info = existing_data.iloc[:, :starting_index]
new_info = new_data.iloc[:, :starting_index]

normalised_existing_features = existing_features_df.copy()
normalised_new_features = new_features_df.copy()
print(normalised_existing_features.shape)
print(normalised_new_features.shape)

# create scaler
scaler = MinMaxScaler(feature_range=(-1,1)) # As this is the range of the activation function - tanh

# fit scaler and apply transform
normalised_existing_features[normalised_existing_features.columns] = scaler.fit_transform(existing_features_df[existing_features_df.columns])
normalised_new_features[normalised_new_features.columns] = scaler.fit_transform(new_features_df[new_features_df.columns])

print(normalised_existing_features.shape)
print(normalised_new_features.shape)

normalised_existing_df = pd.concat([existing_info, normalised_existing_features], axis=1)
normalised_new_df = pd.concat([new_info, normalised_new_features], axis=1)




(36000, 180)
(26989, 180)
(36000, 180)
(26989, 180)


### Create X and y data

In [25]:
def create_X_2d(df, features_starting_idx):
    
    X_2d = df.iloc[:,features_starting_idx:].values
    
    X_tensor_2d = torch.Tensor(X_2d)    
    return X_tensor_2d



In [26]:
def create_y_train_for_2d_X(df, predicting_feature = 'property_id', output_as_tensor='Yes'):
    # CreatE an instance of a one-hot-encoder
    encoder = OneHotEncoder(handle_unknown='ignore')

    # Perform one-hot encoding on the specified column 
    encoder_df = pd.DataFrame(encoder.fit_transform(df[[predicting_feature]]).toarray())
    
    # Convert to a numpy array
    y_train = encoder_df.to_numpy()
    
    if output_as_tensor == 'Yes':
        # Convert to a tensor
        y_train = torch.Tensor(y_train)

    return y_train



In [27]:
def create_y_test_for_2d_X(df, predicting_feature = 'property_id'):   
    y_test = df[predicting_feature].values
    #if predicting_feature == 'property_id':
       # y_test = y_test - 3
    if predicting_feature == 'rating_level_num':
        y_test = y_test - 1
    
    y_test_tensor = torch.Tensor(y_test)    
    y_test_tensor = y_test_tensor.type(torch.LongTensor)
    
    return y_test_tensor


In [28]:
def create_X_3d(df, features_starting_idx):
    dim1 = df.new_interaction_id.nunique()
    print(dim1)
    dim2 = df.slice_num.nunique()
    dim3 = df.iloc[:,features_starting_idx:].shape[1]
        
    X = np.zeros((dim1, dim2, dim3)) 

    itr_id_lst = df.new_interaction_id.unique().tolist()
    print(itr_id_lst[0], itr_id_lst[-1])

    for itr_id in itr_id_lst: #range(len(itr_id_lst)):
        #itr_id = itr_id_lst[i]
        itr_id_df = df[df.new_interaction_id==itr_id]  
        
        for j in range(itr_id_df.shape[0]):
            vals_arr = itr_id_df.iloc[j,features_starting_idx:].values
            if itr_id-1 == dim1:
                print(itr_id)
            X[itr_id-1,j] = vals_arr
    
    X_tensor = torch.Tensor(X)    
    return X_tensor


In [29]:
def create_y_train_for_3d_X(df, predicting_feature = 'property_id'):
    # Create a dataset with only the required columns
    df2 = df[['new_interaction_id', 'property_id', 'rating_level_num']]

    # Remove duplicates
    df2.drop_duplicates(keep='first', inplace=True)

    # Reset the indexes
    df2.reset_index(drop=True, inplace=True) 
    
    ## Create y train
    # CreatE an instance of a one-hot-encoder
    encoder = OneHotEncoder(handle_unknown='ignore')

    # Perform one-hot encoding on the specified column 
    encoder_df = pd.DataFrame(encoder.fit_transform(df2[[predicting_feature]]).toarray())
    
    # Convert to a numpy array
    y_train = encoder_df.to_numpy()
    
    # Convert to a tensor
    y_train = torch.Tensor(y_train)
  
    return y_train



In [30]:
def create_y_test_for_3d_X(df, predicting_feature = 'property_id'):
    # Create a dataset with only the required columns
    df2 = df[['new_interaction_id', 'property_id', 'rating_level_num']]

    # Remove duplicates
    df2.drop_duplicates(keep='first', inplace=True)

    # Reset the indexes
    df2.reset_index(drop=True, inplace=True) 
    
    y_test = df2[predicting_feature].values
    #if predicting_feature == 'property_id':
       # y_test = y_test - 3
    if predicting_feature == 'rating_level_num':
        y_test = y_test - 1
    
    y_test = torch.Tensor(y_test)    
    y_test = y_test.type(torch.LongTensor)
    
    return y_test



## Model 1 - Linear model using all 180 features

### Create the model

In [31]:
class Linear_all_features_properties(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(180, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 5)        
           
    def forward(self, x): #, x2
        x = F.tanh(self.fc1(x))
        x = F.tanh(self.fc2(x))
        x = F.softmax(self.fc3(x), dim=1)
        return x

class Linear_all_features_ratings(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(185, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 3)        

    def forward(self, x): #, x2
        x = F.tanh(self.fc1(x))
        x = F.tanh(self.fc2(x))
        x = F.softmax(self.fc3(x), dim=1)
        return x
 

In [32]:
class LSTM_all_features_properties(nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = nn.LSTM(90, 40, 1, batch_first=True)
        self.fc1 = nn.Linear(10 * 40 * 2, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 5)
          
    def forward(self, x1, x2): #, x2
        #print(x1.shape)
        x1, (hn, cn) = self.rnn(x1) #, (self.h0, self.c0)
        x1 = F.tanh(x1)
        #print(x2.shape)
        x2, (hm, cm) = self.rnn(x2) # (self.h0, self.c0)
        x2 = F.tanh(x2)
        #print(x1.shape,x2.shape)
        x = torch.cat((x1, x2), 2)
        #print(x.shape)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.tanh(self.fc1(x))
        x = F.tanh(self.fc2(x))
        x = F.softmax(self.fc3(x), dim=1)
        return x
    

In [33]:
def find_best_model_for_2d_X(train_dataloader, learning_rate, num_epochs, model):

    # Model
    train_model = model

    # Loss and Optimiser
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.SGD(train_model.parameters(), lr=learning_rate, momentum=0.7)

    best_train_loss = np.inf
    best_model = None
    #best_model_epoch_num = np.inf
    train_loss_lst = []
    #val_loss_lst = []
    #avg_loss_lst = []

    for epoch in range(num_epochs):  # loop over the dataset multiple times

        #Set the model in training mode
        train_model.train()

        # Initialise the total training and validation loss
        epoch_train_loss = 0
        epoch_val_loss = 0
        avg_loss = 0

        for i, train_data in enumerate(train_dataloader, 0):

            # get the inputs; data is a list of [input1, input2, label]
            train_input1, train_labels = train_data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            train_preds = train_model(train_input1) #, train_input2
            train_loss = criterion(train_preds, train_labels)
            train_loss.backward()
            optimizer.step()

            # Update training loss
            epoch_train_loss += train_loss.item()
      
        avg_training_loss = epoch_train_loss / len(train_dataloader) 
        
        train_loss_lst.append(avg_training_loss)
         
        if epoch % 10 == 0:
            print(f'epoch {epoch+1}: train loss = {round(avg_training_loss,2)}')

        if avg_training_loss < best_train_loss:
            best_train_loss = avg_training_loss
            best_model = train_model.state_dict()

    return best_train_loss, best_model, train_loss_lst   #, avg_loss_lst, 


In [34]:
def find_best_model_for_3d_X(train_dataloader, learning_rate, num_epochs, model):

    # Model
    train_model = model

    # Loss and Optimiser
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.SGD(train_model.parameters(), lr=learning_rate, momentum=0.7)

    best_avg_loss = np.inf
    best_model = None
    #best_model_epoch_num = np.inf
    train_loss_lst = []
   # val_loss_lst = []
   # avg_loss_lst = []

    for epoch in range(num_epochs):  # loop over the dataset multiple times

        #Set the model in training mode
        train_model.train()

        # Initialise the total training and validation loss
        epoch_train_loss = 0
        epoch_val_loss = 0
        avg_loss = 0

        #running_loss = 0.0
        for i, train_data in enumerate(train_dataloader, 0):
            #print(len(train_data))

            # get the inputs; data is a list of [input1, input2, label]
            train_input1, train_input2, train_labels = train_data #train_input2, 

            #train_labels = train_labels.type(torch.LongTensor)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            train_preds = train_model(train_input1, train_input2)  
            #print(train_labels)#

            train_loss = criterion(train_preds, train_labels)
            train_loss.backward()
            optimizer.step()

            # Update training loss
            epoch_train_loss += train_loss.item()

     
        avg_training_loss = epoch_train_loss / len(train_dataloader) #count_train
        
        train_loss_lst.append(avg_training_loss)
         
        #print(f'epoch {epoch+1}: train loss = {round(avg_training_loss,3)}, val loss = {round(avg_validation_loss,3)}, average loss = {round(avg_loss,3)}')
        if epoch % 10 == 0:
            print(f'epoch {epoch+1}: train loss = {round(avg_training_loss,2)}')
        
        if avg_training_loss < best_train_loss:
            best_train_loss = avg_training_loss
            best_model = train_model.state_dict()

    return best_train_loss, best_model, train_loss_lst 

    #return best_avg_loss, best_model, train_loss_lst, val_loss_lst, avg_loss_lst   #, avg_loss_lst, 


In [35]:
# Original
def Linear_LOP0CV(lili_data, dat_2022, model, num_folds=5, predicting_feature='property_id', learning_rate=0.01, num_epochs=10, random_state=num): #, num_inner_folds=5
    # Set fixed random number seed
    torch.manual_seed(num)
    
    total_conf_mat = 0
    micro_f1_lst = []
    acc_lst = []
    if predicting_feature == 'property_id':
        macro_f1_lst = []
    elif predicting_feature == 'rating_level_num':
        weighted_f1_lst = []  
        
    # Data preparation
    X_train = create_X_2d(lili_data, 10)       
    y_train = create_y_train_for_2d_X(lili_data, predicting_feature = predicting_feature, output_as_tensor='Yes')
    X_test = create_X_2d(dat_2022, 10) 
    y_test = create_y_test_for_2d_X(dat_2022, predicting_feature = predicting_feature)
        
    if predicting_feature == 'rating_level_num':
        X_train_add = create_y_train_for_2d_X(lili_data, predicting_feature = 'property_id', output_as_tensor='Yes')
        X_train = torch.cat((X_train, X_train_add), 1)
        X_test_add = create_y_train_for_2d_X(dat_2022, predicting_feature = 'property_id', output_as_tensor='Yes')  
        X_test = torch.cat((X_test, X_test_add), 1)
    
    train_dataset = TensorDataset(X_train, y_train)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=y_train.shape[0]) # num_workers=2,
    
    test_dataset = TensorDataset(X_test, y_test)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, shuffle=True, batch_size=y_test.shape[0]) # num_workers=2,       

    avg_training_loss, best_model, training_loss_lst = find_best_model_for_2d_X(train_dataloader, learning_rate, num_epochs, model)

    # save trained model 
    name = 'best_model.pt'
    torch.save(best_model, name)
    print(f'The model has been saved')

    test_model = model
    test_model.load_state_dict(torch.load(name))

    dataiter = iter(test_dataloader) 
    test_input1, test_labels = dataiter.next() 

    test_preds = test_model(test_input1) 

    test_preds_np = test_preds.detach().numpy()
    test_predicted_np = np.argmax(test_preds_np, axis = 1)

    test_labels_np = test_labels.numpy()       

    if predicting_feature == 'property_id':
        conf_mat = confusion_matrix(test_labels_np, test_predicted_np, labels=[0, 1, 2, 3, 4])
        macro_f1_score = f1_score(test_labels_np, test_predicted_np, average='macro') 
        macro_f1_lst.append(macro_f1_score) 
    elif predicting_feature == 'rating_level_num':
        conf_mat = confusion_matrix(test_labels_np, test_predicted_np, labels=[0,1,2])
        weighted_f1_score = f1_score(test_labels_np, test_predicted_np, average='weighted') 
        weighted_f1_lst.append(weighted_f1_score)

    total_conf_mat += conf_mat
    micro_f1_score = f1_score(test_labels_np, test_predicted_np, average='micro')  
    micro_f1_lst.append(micro_f1_score)
    acc = accuracy_score(test_labels_np, test_predicted_np)
    acc_lst.append(acc)


    #print(f"Leaving participant {participant} out")
    print("(1) Confusion matrix:\n", conf_mat)
    print(f"(2) micro F1 score = {round(micro_f1_score,2)}") 
    if predicting_feature == 'property_id':
        print(f"(3) Macro F1 score = {round(macro_f1_score,2)}")
    elif predicting_feature == 'rating_level_num':
        print(f"(3) Weighted F1 score = {round(weighted_f1_score,2)}")            
    print(f"(4) Percentage Classification accuracy = {round(acc*100,2)}%")

    print('--------------------------------')
        

In [36]:
# Original
def LSTM_LOP0CV(lili_0, lili_1, new_0, new_1, model, num_folds=5, predicting_feature='property_id', learning_rate=0.01, num_epochs=10, random_state=num): #, num_inner_folds=5
    # Set fixed random number seed
    torch.manual_seed(num)
        
    total_conf_mat = 0
    micro_f1_lst = []
    acc_lst = []
    if predicting_feature == 'property_id':
        macro_f1_lst = []
    elif predicting_feature == 'rating_level_num':
        weighted_f1_lst = [] 
        
        data0_add = create_y_train_for_2d_X(data_0, predicting_feature = 'property_id', output_as_tensor='No')
        data0_add_pd = pd.DataFrame(data0_add, columns = ['smoothness','thickness','warmth', 'flexibility', 'softness'])
        data_0 = pd.concat([data_0.reset_index(drop=True), data0_add_pd.reset_index(drop=True)], axis=1)
        data1_add = create_y_train_for_2d_X(data_1, predicting_feature = 'property_id', output_as_tensor='No')
        data1_add_pd = pd.DataFrame(data1_add, columns = ['smoothness','thickness','warmth', 'flexibility', 'softness'])
        data_1 = pd.concat([data_1.reset_index(drop=True), data1_add_pd.reset_index(drop=True)], axis=1)
    
    #for participant in sorted(data_0.participant_id.unique()): # # #:lst: #
       # print(f'LEAVING PARTICIPANT {participant} OUT:')
        
        # Split the data into training and testing
        #training_data_0 = data_0[data_0.participant_id != participant] 
        #training_data_1 = data_1[data_1.participant_id != participant] 
        #testing_data_0 = data_0[data_0.participant_id == participant] 
        #testing_data_1 = data_1[data_1.participant_id == participant] 

    # Data preparation
    X_train_0 = create_X_3d(lili_0, 10)
    X_train_1 = create_X_3d(lili_1, 10) 
    X_test_0 = create_X_3d(new_0, 10) 
    X_test_1 = create_X_3d(new_1, 10)           
    y_train = create_y_train_for_3d_X(lili_0, predicting_feature = predicting_feature)
    y_test = create_y_test_for_3d_X(new_0, predicting_feature = predicting_feature)        


        #print(X_train_0.shape)
        #print(X_train_1.shape)
        #print(y_train.shape)
        #print(X_test_0.shape)
        #print(X_test_1.shape)
        #print(y_test.shape)
 
    # Create the datasets and dataloaders
    train_dataset = TensorDataset(X_train_0, X_train_1, y_train) 
    train_dataloader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=y_train.shape[0]) # num_workers=2,

    test_dataset = TensorDataset(X_test_0, X_test_1, y_test)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, shuffle=True, batch_size=y_test.shape[0]) # num_workers=2,       
            
    train_loss, best_model, train_loss_lst = find_best_model_for_3d_X(train_dataloader, learning_rate, num_epochs, model)
                            
    # save trained model 
    name = 'best_model.pt'
    torch.save(best_model, name)
    print(f'The model has been saved')
      
        
    test_model = model
    test_model.load_state_dict(torch.load(name))

    dataiter = iter(test_dataloader) 
    test_input1, test_input2, test_labels = dataiter.next() 

    test_preds = test_model(test_input1, test_input2) 

    test_preds_np = test_preds.detach().numpy()
    test_predicted_np = np.argmax(test_preds_np, axis = 1)

    test_labels_np = test_labels.numpy()    
    
    if predicting_feature == 'property_id':
        conf_mat = confusion_matrix(test_labels_np, test_predicted_np, labels=[0, 1, 2, 3, 4])
        macro_f1_score = f1_score(test_labels_np, test_predicted_np, average='macro') 
        #macro_f1_lst.append(macro_f1_score) 
    elif predicting_feature == 'rating_level_num':
        conf_mat = confusion_matrix(test_labels_np, test_predicted_np, labels=[0,1,2])
        weighted_f1_score = f1_score(test_labels_np, test_predicted_np, average='weighted') 
        weighted_f1_lst.append(weighted_f1_score)

    #total_conf_mat += conf_mat
    micro_f1_score = f1_score(test_labels_np, test_predicted_np, average='micro')  
    #micro_f1_lst.append(micro_f1_score)
    acc = accuracy_score(test_labels_np, test_predicted_np)
    #acc_lst.append(acc)


    #print(f"Leaving participant {participant} out")
    print("(1) Confusion matrix:\n", conf_mat)
    print(f"(2) micro F1 score = {round(micro_f1_score,2)}") 
    if predicting_feature == 'property_id':
        print(f"(3) Macro F1 score = {round(macro_f1_score,2)}")
    elif predicting_feature == 'rating_level_num':
        print(f"(3) Weighted F1 score = {round(weighted_f1_score,2)}")            
    print(f"(4) Percentage Classification accuracy = {round(acc*100,2)}%")

    print('--------------------------------')
        
   

### Run the FC for properties

In [49]:
t1 = time.time()
Linear_LOP0CV(normalised_lili, normalised_dat_2022, model=Linear_all_features_properties(), num_folds=5, predicting_feature='property_id', learning_rate=0.05, num_epochs=5000, random_state=num)
t2 = time.time()

epoch 1: train loss = 1.61
epoch 11: train loss = 1.61
epoch 21: train loss = 1.61
epoch 31: train loss = 1.61
epoch 41: train loss = 1.61
epoch 51: train loss = 1.61
epoch 61: train loss = 1.61
epoch 71: train loss = 1.61
epoch 81: train loss = 1.61
epoch 91: train loss = 1.61
epoch 101: train loss = 1.61
epoch 111: train loss = 1.61
epoch 121: train loss = 1.61
epoch 131: train loss = 1.61
epoch 141: train loss = 1.61
epoch 151: train loss = 1.61
epoch 161: train loss = 1.61
epoch 171: train loss = 1.61
epoch 181: train loss = 1.61
epoch 191: train loss = 1.61
epoch 201: train loss = 1.61
epoch 211: train loss = 1.61
epoch 221: train loss = 1.61
epoch 231: train loss = 1.61
epoch 241: train loss = 1.61
epoch 251: train loss = 1.61
epoch 261: train loss = 1.61
epoch 271: train loss = 1.61
epoch 281: train loss = 1.61
epoch 291: train loss = 1.61
epoch 301: train loss = 1.61
epoch 311: train loss = 1.61
epoch 321: train loss = 1.61
epoch 331: train loss = 1.61
epoch 341: train loss = 1

In [51]:
t2-t1

384.22882103919983