In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import flirt.reader.empatica
import flirt.with_
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from joblib import Parallel, delayed
from tqdm.autonotebook import trange
from datetime import timedelta
import multiprocessing

  from tqdm.autonotebook import trange
  @jit('float64(float64[:], int32)')


Import Datasets

In [2]:
raw_acceleration_data = pd.read_csv("C:/Users/duong/Desktop/BA/Analysis/dataset-main/AccelerometerMeasurements.csv")
raw_selfreports_data = pd.read_csv("C:/Users/duong/Desktop/BA/Analysis/dataset-main/SelfReports.csv")

Copy Dataset

In [3]:
df_accel = raw_acceleration_data.copy()
df_reports = raw_selfreports_data.copy()

Cleaning Reports (Removing not available data)

In [4]:
df_reports = df_reports[df_reports.timeOfEngagement != 0].copy()

df_reports = df_reports[df_reports.valence != "NONE"].copy()
df_reports = df_reports[df_reports.arousal != "NONE"].copy()
df_reports = df_reports[df_reports.context != "NONE"].copy()


Renaming Column in df Accel

In [5]:
df_accel.rename(columns={'timestamp':'timeOfNotification'}, inplace=True)

Convert Timenotification form Unix to DateTime

In [6]:
df_accel["timeOfNotification"] = pd.to_datetime(df_accel["timeOfNotification"], unit="ms")
df_reports["timeOfNotification"] = pd.to_datetime(df_reports["timeOfNotification"], unit="ms")


Function to extract the Timewindow

In [7]:
def extract_accel_data(row, accel_data):
    start_time = row['timeOfNotification'] - pd.Timedelta(milliseconds=300000)
    end_time = row['timeOfNotification'] + pd.Timedelta(milliseconds=300000)
    participant_id = row['participantId']
    
    # Filter accelerometer data for the given participant and time window
    mask = (
        (accel_data['participantId'] == participant_id) &
        (accel_data['timeOfNotification'] >= start_time) &
        (accel_data['timeOfNotification'] <= end_time)
    )
    return accel_data[mask]

In [8]:
df_reports['accel_data'] = df_reports.apply(extract_accel_data, accel_data=df_accel, axis=1)

Create Combined Dataframe

In [9]:
combined_data = []

for _, row in df_reports.iterrows():
    accel_data = row['accel_data']
    for _, accel_row in accel_data.iterrows():
        combined_data.append({
            'participantId': row['participantId'],
            'selfreport_time': row['timeOfNotification'],
            'valence': row['valence'],
            'arousal': row['arousal'],
            'context': row['context'],
            'accel_time': accel_row['timeOfNotification'],
            'x': accel_row['x'],
            'y': accel_row['y'],
            'z': accel_row['z']
        })

combined_df = pd.DataFrame(combined_data)

In [10]:
combined_df.to_csv("data_5min_ms", encoding='utf-8', index=False)

Import combined Dataset

In [12]:
data = pd.read_csv("data_5min_ms.csv")

Label with ReportID

In [13]:
data['reportId'] = data.groupby(['participantId', 'selfreport_time']).ngroup() + 1

In [14]:
col = data.pop("reportId")
data.insert(0, col.name, col)

Scale x y z data

In [15]:
columns_to_scale = ['x', 'y', 'z']
scaler = StandardScaler()
scaled_columns = scaler.fit_transform(data[columns_to_scale])
scaled_df = pd.DataFrame(scaled_columns, columns=columns_to_scale, index=data.index)
data[columns_to_scale] = scaled_df

In [16]:
data = data.drop('participantId', axis=1)
data = data.drop('selfreport_time', axis=1)

In [17]:
# FUNKTIONIERT OHNE INPLACE NICHT (weil inplace den originalen df modifiziert, sonst nicht)
data.rename(columns={"accel_time": "datetime"}, inplace=True)

In [18]:
data.shape

(4869564, 8)

In [19]:
data['datetime'] = pd.DatetimeIndex(data["datetime"]).astype( np.int64 )/100000
# combined_df['accel_time'] = pd.DatetimeIndex(combined_df["accel_time"]).astype( np.int64 )/100000

In [20]:
data.reportId

0            1
1            1
2            1
3            1
4            1
          ... 
4869559    330
4869560    330
4869561    330
4869562    330
4869563    330
Name: reportId, Length: 4869564, dtype: int64

Test first five

In [21]:
# features_list = []

# # Iterate through each group
# for report_id in data['reportId'].unique()[:3]:

#     # Creating temporaire dataframe with rows with same reportId
#     temp = data[data['reportId'] == report_id]

#     # Extracting accel_time x y z
#     temp_ex = temp[['datetime',"x","y","z"]].copy()

#     # Getting features
#     features = flirt.get_acc_features(temp_ex, window_length=10, window_step_size=1, data_frequency=25)

#     # Adding reportId, arousal, valence, and context to every row
#     features['reportId'] = report_id
#     features['arousal'] = temp['arousal'].iloc[0]
#     features['valence'] = temp['valence'].iloc[0]
#     features['context'] = temp['context'].iloc[0]
    
#     # Append to the list
#     features_list.append(features)


# # Combine all feature DataFrames into a single DataFrame
# all_features = pd.concat(features_list)

In [22]:
# all_features.shape

In [23]:
# all_features.to_csv("allfeatures_3min_10_1", encoding='utf-8', index=False)

Analyze Data and put into list

In [24]:
features_list = []

# Iterate through each group
for report_id in data['reportId'].unique():

    # Creating temporaire dataframe with rows with same reportId
    temp = data[data['reportId'] == report_id]

    # Extracting accel_time x y z
    temp_ex = temp[['datetime',"x","y","z"]].copy()

    # Getting features
    features = flirt.get_acc_features(temp_ex, window_length=120, window_step_size=10, data_frequency=25)

    # Adding reportId, arousal, valence, and context to every row
    features['reportId'] = report_id
    features['arousal'] = temp['arousal'].iloc[0]
    features['valence'] = temp['valence'].iloc[0]
    features['context'] = temp['context'].iloc[0]
    
    # Append to the list
    features_list.append(features)


# Combine all feature DataFrames into a single DataFrame
all_features = pd.concat(features_list)

ACC features:   0%|          | 0/60 [00:00<?, ?it/s]

ACC features: 100%|██████████| 60/60 [00:26<00:00,  2.27it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 445.35it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 449.34it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 435.24it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 470.50it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 433.17it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 233.75it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 378.50it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 435.33it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 488.75it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 475.32it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 466.64it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 388.13it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 482.49it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 57/57 [00:00<00:00, 414.40it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 374.92it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 461.41it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 432.53it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 505.23it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 457.91it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 300.59it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 464.48it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 481.08it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 428.15it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 449.98it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 418.35it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 441.07it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 413.87it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 397.77it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 442.17it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 376.81it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 431.55it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 413.63it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 476.31it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 445.96it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 388.24it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 465.01it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 440.18it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 418.87it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 452.26it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 389.11it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 440.59it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 483.36it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 474.13it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 463.25it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 454.43it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 338.38it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 470.05it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 457.23it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 420.10it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 454.53it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 443.09it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 306.77it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 476.09it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 458.31it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 44/44 [00:00<00:00, 684.02it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 430.53it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 454.44it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 417.55it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 496.76it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 451.05it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 51/51 [00:00<00:00, 410.93it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 431.52it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 351.29it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 439.72it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 444.31it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 445.97it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 422.44it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 375.21it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 489.64it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 424.22it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 370.28it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 438.48it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 449.75it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 454.63it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 425.48it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 444.17it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 374.03it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 422.86it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 438.84it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 51/51 [00:00<00:00, 406.25it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 427.74it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 427.47it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 276.44it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 483.76it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 442.33it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 441.09it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 429.49it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 442.14it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 410.89it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 457.89it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 383.39it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 464.98it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 451.02it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 44/44 [00:00<00:00, 701.42it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 431.55it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 448.21it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 434.67it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 463.08it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 412.38it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 431.55it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 392.19it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 399.84it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 454.44it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 39/39 [00:00<00:00, 672.19it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 435.58it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 405.27it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 327.69it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 435.72it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 441.06it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 358.60it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 447.68it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 474.41it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 433.56it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 445.97it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 425.43it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 421.56it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 465.00it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 466.47it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 425.67it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 447.66it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 296.94it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 387.00it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 397.19it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 369.41it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 447.29it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 476.09it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 444.34it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 40/40 [00:00<00:00, 632.31it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 423.65it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 458.13it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 408.27it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 462.92it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 449.30it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 427.59it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 370.60it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 46/46 [00:00<00:00, 491.81it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 388.23it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 403.09it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 441.08it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 474.71it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 419.74it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 441.08it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 439.11it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 470.50it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 424.80it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 460.11it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 419.29it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 421.98it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 447.66it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 405.96it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 457.26it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 433.72it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 474.03it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 447.59it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 425.45it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 442.66it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 470.05it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 437.52it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 439.54it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 427.56it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 288.86it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 422.44it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 404.45it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 429.90it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 484.95it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 451.02it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 460.87it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 449.77it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 447.66it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 455.13it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 441.38it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 474.41it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 427.08it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 404.13it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 315.97it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 436.24it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 468.63it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 451.02it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 431.48it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 422.93it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 392.95it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 457.89it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 464.83it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 470.01it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 442.36it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 442.71it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 447.66it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 413.70it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 384.62it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 411.81it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 56/56 [00:00<00:00, 422.03it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 451.02it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 451.84it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 412.27it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 442.32it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 357.06it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 436.97it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 426.41it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 469.97it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 437.86it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 455.10it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 436.22it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 434.00it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 51/51 [00:00<00:00, 405.21it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 347.43it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 456.14it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 450.42it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 353.88it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 436.89it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 451.03it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 449.69it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 434.16it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 451.09it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 45/45 [00:00<00:00, 732.55it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 425.20it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 399.91it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 472.33it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 435.02it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 477.23it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 397.98it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 465.75it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 296.12it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 417.60it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 428.47it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 448.59it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 441.07it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 459.22it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 466.63it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 431.54it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 452.48it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 455.69it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 421.56it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 42/42 [00:00<00:00, 624.83it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 442.56it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 432.72it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 436.10it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 373.91it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 483.76it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 430.71it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 416.56it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 440.84it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 450.99it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 437.82it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 403.06it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 483.74it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 416.39it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 354.96it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 312.43it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 38/38 [00:00<00:00, 636.35it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 489.03it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 445.65it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 488.94it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 481.38it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 460.89it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 495.93it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 456.02it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 476.77it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 463.20it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 425.61it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 43/43 [00:00<00:00, 661.39it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 471.02it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 465.82it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 468.14it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 472.31it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 483.75it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 475.17it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 490.75it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 452.67it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 456.10it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 474.31it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 477.40it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 59/59 [00:00<00:00, 465.97it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 473.74it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 506.01it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 483.82it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 439.67it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 469.51it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 487.30it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 454.02it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 499.44it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 468.38it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 472.34it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 476.61it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 456.14it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 466.28it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 443.50it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 472.20it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 456.95it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 462.69it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 483.75it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 483.76it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 476.31it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 468.64it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 465.07it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 482.56it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 491.59it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 482.84it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 450.13it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 474.96it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 518.58it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 451.03it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 346.88it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 490.69it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 494.79it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 499.87it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 457.91it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 511.38it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 449.19it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 435.75it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 483.77it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 490.45it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 461.95it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 485.64it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 456.92it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 474.43it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 397.26it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 516.26it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 477.43it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 474.49it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 454.44it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 476.07it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 43/43 [00:00<00:00, 667.40it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 425.76it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 472.34it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 469.86it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 467.41it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 440.51it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 487.80it/s]


Unable to remove memmapped file


ACC features: 100%|██████████| 60/60 [00:00<00:00, 398.03it/s]


Unable to remove memmapped file


In [25]:
all_features.shape

(19589, 114)

Exporting Features

In [26]:
all_features.to_csv("allfeatures_5min_120_10", encoding='utf-8', index=False)

In [None]:
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from joblib import Parallel, delayed
from tqdm.autonotebook import trange
from datetime import timedelta
import multiprocessing

# Step 1: Import necessary modules and libraries
import matplotlib.pyplot as plt
import flirt.reader.empatica
import flirt.with_

# Step 2: Define functions for each step in the pipeline

def import_data(accel_path, reports_path):
    # Importing the datasets based on user-provided paths
    raw_acceleration_data = pd.read_csv(accel_path)
    raw_selfreports_data = pd.read_csv(reports_path)
    
    # Copying the datasets
    df_accel = raw_acceleration_data.copy()
    df_reports = raw_selfreports_data.copy()

    return df_accel, df_reports

def preprocessing(df_reports, df_accel):
    # Cleaning reports
    df_reports = df_reports[df_reports.timeOfEngagement != 0].copy()
    df_reports = df_reports[df_reports.valence != "NONE"].copy()
    df_reports = df_reports[df_reports.arousal != "NONE"].copy()
    df_reports = df_reports[df_reports.context != "NONE"].copy()

    # Renaming the column in df_accel
    df_accel.rename(columns={'timestamp': 'timeOfNotification'}, inplace=True)

    # Converting timeOfNotification from Unix to DateTime
    df_accel["timeOfNotification"] = pd.to_datetime(df_accel["timeOfNotification"], unit="ms")
    df_reports["timeOfNotification"] = pd.to_datetime(df_reports["timeOfNotification"], unit="ms")

    return df_reports, df_accel

def extract_accel_data(row, accel_data, time_window):
    # Convert the time window from minutes to milliseconds
    time_delta = pd.Timedelta(minutes=time_window)
    
    # Calculate the start and end time for the time window
    start_time = row['timeOfNotification'] - time_delta
    end_time = row['timeOfNotification'] + time_delta
    participant_id = row['participantId']

    # Filter accelerometer data for the given participant and time window
    mask = (
        (accel_data['participantId'] == participant_id) &
        (accel_data['timeOfNotification'] >= start_time) &
        (accel_data['timeOfNotification'] <= end_time)
    )
    
    return accel_data[mask]

# Function to create combined dataframe
def create_combined_dataframe(df_reports, df_accel):
    combined_data = []

    for _, row in df_reports.iterrows():
        accel_data = row['accel_data']
        for _, accel_row in accel_data.iterrows():
            combined_data.append({
                'participantId': row['participantId'],
                'selfreport_time': row['timeOfNotification'],
                'valence': row['valence'],
                'arousal': row['arousal'],
                'context': row['context'],
                'accel_time': accel_row['timeOfNotification'],
                'x': accel_row['x'],
                'y': accel_row['y'],
                'z': accel_row['z']
            })

    combined_df = pd.DataFrame(combined_data)
    return combined_df

# Function to import combined dataset
def import_combined_dataset():
    data = pd.read_csv("data_5min_ms.csv")
    return data

# Function to label with report ID
def create_reportid(data):
    data['reportId'] = data.groupby(['participantId', 'selfreport_time']).ngroup() + 1
    col = data.pop("reportId")
    data.insert(0, col.name, col)
    return data

# Function to scale x, y, z data
def scale_xyz_data(data):
    columns_to_scale = ['x', 'y', 'z']
    scaler = StandardScaler()
    scaled_columns = scaler.fit_transform(data[columns_to_scale])
    scaled_df = pd.DataFrame(scaled_columns, columns=columns_to_scale, index=data.index)
    data[columns_to_scale] = scaled_df
    return data

# Function to preprocess data
def preprocess_data(data):
    data = data.drop('participantId', axis=1)
    data = data.drop('selfreport_time', axis=1)
    data.rename(columns={"accel_time": "datetime"}, inplace=True)
    data['datetime'] = pd.DatetimeIndex(data["datetime"]).astype(np.int64)/100000
    return data

# Function to extract features
def extract_features(data):
    features_list = []

    for report_id in data['reportId'].unique():
        temp = data[data['reportId'] == report_id]
        temp_ex = temp[['datetime',"x","y","z"]].copy()
        features = flirt.get_acc_features(temp_ex, window_length=120, window_step_size=10, data_frequency=25)
        features['reportId'] = report_id
        features['arousal'] = temp['arousal'].iloc[0]
        features['valence'] = temp['valence'].iloc[0]
        features['context'] = temp['context'].iloc[0]
        features_list.append(features)

    all_features = pd.concat(features_list)
    return all_features

# Step 3: Create the pipeline
pipeline = Pipeline([
    ('import_datasets', import_data),
    ('preprocessing', preprocessing),
    ('extract_accel_data', extract_accel_data),
    ('create_combined_dataframe', create_combined_dataframe),
    ('import_combined_dataset', import_combined_dataset),
    ('label_with_report_id', create_reportid),
    ('scale_xyz_data', scale_xyz_data),
    ('preprocess_data', preprocess_data),
    ('extract_features', extract_features)
])

# Step 4: Run the pipeline
all_features = pipeline.fit_transform()