In [1]:
import os
from pathlib import Path
import pickle
import numpy as np
import pandas as pd

In [2]:
def load_data(filename):
    with open(filename, 'rb') as file:
        return pickle.load(file)

In [3]:
data_root = r"C:/Users/hayashi/doutorado/mac6958--dados-redes/webrtc"
data_file = Path(data_root, 'data/processed/facial-1001-normalized.pkl')

In [4]:
data = load_data(data_file)

In [18]:
len(data)

300

In [5]:
data[0]

{'user_id': -1.647508942095828,
 'test_condition': 0.4629100498862757,
 'delay': 1.0690449676496976,
 'jitter': 1.224744871391589,
 'packet_loss_rate': 0.0,
 'features': array([[ 0.200067, -0.033075, -0.979224, ...,  1.      ,  0.      ,
          0.      ],
        [ 0.203814, -0.040347, -0.978178, ...,  1.      ,  0.      ,
          0.      ],
        [ 0.203665, -0.042064, -0.978137, ...,  0.      ,  0.      ,
          0.      ],
        ...,
        [-0.020206,  0.302458, -0.952948, ...,  0.      ,  0.      ,
          0.      ],
        [-0.015582,  0.300056, -0.953794, ...,  0.      ,  0.      ,
          0.      ],
        [-0.021239,  0.304443, -0.952294, ...,  1.      ,  0.      ,
          0.      ]])}

In [6]:
data[0]['features'].shape

(3676, 323)

In [7]:
d= data[0]
# get one label as vector
y = np.array([value for key, value in d.items() if key != 'features'])
y

array([-1.64750894,  0.46291005,  1.06904497,  1.22474487,  0.        ])

In [10]:
def split_and_reshape(dd):
    # given dict, return (-1,30,323), (-1,5)

    # for x, split in seq size of 30
    x = dd['features']    # (-1,323)
    seq_size = int(x.shape[0]/30)
    x = x[:seq_size*30]
    assert x.shape[1]== 323, "Expected shape of (-1,323) for feature"
    x = x.reshape((-1,30,323))

    # for y, replicate of each sequence
    y = np.array([value for key, value in dd.items() if key != 'features'])  # (5,)
    y = np.tile(y, (seq_size,1))

    return x, y    
    
def convert_to_seq(data):
    x_list, y_list = [], []
    for d in data:
        x,y = split_and_reshape(d)   # return (-1,30,323), (-1,5)
        x_list.append(x)
        y_list.append(y)

    # append along the first dimension
    X = np.concatenate(x_list, axis=0)
    Y = np.concatenate(y_list, axis=0)
    return X, Y

In [11]:
X, Y = convert_to_seq(data)    # return (-1,30,323), (-1,5)

In [12]:
X.shape, Y.shape

((34880, 30, 323), (34880, 5))

In [20]:
for i in range(Y.shape[1]):
    print(i, np.unique(Y[:,i]))

0 [-1.64750894 -1.47408695 -1.30066495 -1.12724296 -0.95382097 -0.78039897
 -0.60697698 -0.43355498 -0.26013299 -0.086711    0.086711    0.26013299
  0.43355498  0.60697698  0.78039897  0.95382097  1.12724296  1.30066495
  1.47408695  1.64750894]
1 [-1.62018517 -1.38873015 -1.15727512 -0.9258201  -0.69436507 -0.46291005
 -0.23145502  0.          0.23145502  0.46291005  0.69436507  0.9258201
  1.15727512  1.38873015  1.62018517]
2 [-1.60356745 -0.26726124  1.06904497]
3 [-0.81649658  1.22474487]
4 [-1.22474487  0.          1.22474487]


In [14]:
np.unique(Y[:,1])

array([-1.62018517, -1.38873015, -1.15727512, -0.9258201 , -0.69436507,
       -0.46291005, -0.23145502,  0.        ,  0.23145502,  0.46291005,
        0.69436507,  0.9258201 ,  1.15727512,  1.38873015,  1.62018517])

In [15]:
np.unique(Y[:,2])

array([-1.60356745, -0.26726124,  1.06904497])

In [16]:
seq_data_file = Path(data_root, 'data/processed/facial-1012-sequential.pkl')

In [17]:
with open(seq_data_file, 'wb') as file:
    pickle.dump((X,Y), file)