# Pain Data Restructuring
This notebook prepares the pain dataset to be able to successfully use it with the federated-machine-learning repository.


In [1]:
import shutil
import sys
import os

from Scripts.Data_Loader_Functions import get_user_number, get_frame_number, get_session_id, \
    get_user_number_from_filename, read_pain_score_from_file, get_filename_without_extension

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

INPUT_DATA = os.path.join(module_path, "Data", "Input Data")
RAW_DATA = os.path.join(module_path, "Data", "Raw Data")



  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
pain_scores = {}
for dir_path, dir_names, filenames in os.walk(INPUT_DATA):
    if filenames and "PSPI" in dir_path:
        pain_scores_tempt = { get_filename_without_extension(filenames[i]) : read_pain_score_from_file(os.path.join(dir_path,filenames[i])) for i in range(0, len(filenames) ) }
        pain_scores.update(pain_scores_tempt)

In [3]:
# Required format:
# "43_0_0_0_2.jpg", where:
# [person, session, culture, frame, pain_level]
# iteration 2 gets the user folders
# iteration 3+ gets the user sessions and user frames per session
iteration = 1
for dir_path, dir_names, filenames in os.walk(INPUT_DATA):
    # print("{} {} {}".format(dir_path, dir_names, filenames))
    if iteration == 2: # get user directories
        users = { get_user_number(dir_names[i]) : dir_names[i] for i in range(0, len(dir_names) ) }
    if iteration >= 3: # get session directories and filenames
        if not filenames and "Images" in dir_path: # get session directories
            sessions = { dir_names[i] : i for i in range(0, len(dir_names) ) }
            users[get_user_number(dir_path)] = sessions
        elif "Images" in dir_path: # get image filenames
            frames = {get_session_id(filenames[0]) : get_frame_number(filenames[i]) for i in range(0, len(filenames) ) }
            # get the pain scores first
            for filename in filenames:
                try:
                    new_filename = "{}_{}_0_{}_{}.png"\
                        .format(get_user_number_from_filename(filename),
                                sessions[get_session_id(filename)],
                                get_frame_number(filename),
                                pain_scores[get_filename_without_extension(filename)])
                    new_filepath = os.path.join(RAW_DATA, new_filename)
                    old_filepath = os.path.join(dir_path, filename)
                    shutil.copy(old_filepath, new_filepath)
                except:
                    continue
    iteration += 1
print("End of input data restructuring...")

End of input data restructuring...
