In [2]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn
import numpy as np

# 1. Load dataset
df = pd.read_pickle("../S17/S17.pkl")

In [3]:
print(type(df))   # Should print <class 'dict'>
print(df.keys())  # Shows the keys in the dictionary


<class 'dict'>
dict_keys(['signal', 'label', 'subject'])


In [4]:
signal_dict = df["signal"]
label_array  = df["label"]
subject_array = df["subject"]

In [5]:
print("signal_array shape:", getattr(signal_dict, "shape", None))
print("label_array shape:", getattr(label_array, "shape", None))
print(type(signal_dict))
print(len(signal_dict))  # If it's a list or similar
print(signal_dict.keys()) 

signal_array shape: None
label_array shape: (4144000,)
<class 'dict'>
2
dict_keys(['chest', 'wrist'])


In [6]:
for key in signal_dict:
    value = signal_dict[key]
    print(f"[{key}] -> type: {type(value)}")
    if isinstance(value, dict):
        print(f"   Subkeys: {value.keys()}")


[chest] -> type: <class 'dict'>
   Subkeys: dict_keys(['ACC', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp'])
[wrist] -> type: <class 'dict'>
   Subkeys: dict_keys(['ACC', 'BVP', 'EDA', 'TEMP'])


In [7]:
# chest_dict = signal_dict["chest"]
# features_list_chest = []
# for sensor_name_chest in chest_dict:
#     sensor_data_chest = chest_dict[sensor_name_chest]
#     # If it's 1D: shape=(N,), reshape to (N,1).
#     # If it's 2D: shape=(N, c), leave it as is.
#     if sensor_data_chest.ndim == 1:
#         sensor_data_chest = sensor_data_chest.reshape(-1, 1)
#     # Append to the list
#     features_list_chest.append(sensor_data_chest)

# # Concatenate all channels horizontally
# X_chest = np.hstack(features_list_chest)  #

In [8]:
chest_dict = signal_dict["chest"]
features_list_chest = []
for sensor_name_chest in chest_dict:
    sensor_data_chest = chest_dict[sensor_name_chest]
    if sensor_data_chest.ndim == 1:
        sensor_data_chest = sensor_data_chest.reshape(-1, 1)
    features_list_chest.append(sensor_data_chest)

In [9]:
# wrist_dict = signal_dict["wrist"]
# features_list_wrist = []
# for sensor_name_wrist in wrist_dict:
#     sensor_data_wrist = wrist_dict[sensor_name_wrist]
#     # If 1D, reshape to (N,1) for stacking
#     if sensor_data_wrist.ndim == 1:
#         sensor_data_wrist = sensor_data_wrist.reshape(-1, 1)
#     features_list_wrist.append(sensor_data_wrist)

# # Find the minimum length across all wrist sensors
# min_len = min(arr.shape[0] for arr in features_list_wrist)

# # Clip each sensor's data to 'min_len'
# for i in range(len(features_list_wrist)):
#     features_list_wrist[i] = features_list_wrist[i][:min_len]

# # Now all arrays have shape (min_len, c_i)
# X_wrist = np.hstack(features_list_wrist)
# print(X_wrist.shape)  # (min_len, sum_of_channels)

In [10]:
wrist_dict = signal_dict["wrist"]
features_list_wrist = []
for sensor_name_wrist in wrist_dict:
    sensor_data_wrist = wrist_dict[sensor_name_wrist]
    if sensor_data_wrist.ndim == 1:
        sensor_data_wrist = sensor_data_wrist.reshape(-1, 1)
    features_list_wrist.append(sensor_data_wrist)

In [11]:
all_arrays = features_list_chest + features_list_wrist
min_len = min(arr.shape[0] for arr in all_arrays)

In [12]:
for i in range(len(features_list_chest)):
    features_list_chest[i] = features_list_chest[i][:min_len]

for i in range(len(features_list_wrist)):
    features_list_wrist[i] = features_list_wrist[i][:min_len]

In [13]:
X_chest = np.hstack(features_list_chest)
X_wrist = np.hstack(features_list_wrist)
X = np.hstack([X_chest, X_wrist])

label_array = label_array[:min_len]
y = pd.Series(label_array, name="label")

X_df = pd.DataFrame(X) #Opsional

In [14]:
X_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0.7002,-0.1982,-1.0702,1.431793,0.007095,5.400467,32.415283,3.22113,-36.0,-38.0,-128.0,-0.49,2.015176,33.39
1,0.7294,-0.1902,-1.047,1.46196,-0.012405,5.408859,32.434631,3.607178,-9.0,-9.0,2.0,-0.38,2.012617,33.37
2,0.7646,-0.184,-1.0046,1.471619,-0.018082,5.401611,32.445099,3.608704,113.0,-25.0,86.0,-0.36,2.001103,33.37
3,0.8014,-0.1778,-0.9782,1.472763,-0.027557,5.407715,32.439087,3.643799,-115.0,-18.0,-35.0,-0.75,2.030529,33.37
4,0.8348,-0.1608,-0.895,1.469421,-0.026779,5.410385,32.439087,3.575134,-37.0,-24.0,56.0,-1.74,1.979353,33.37


In [15]:
X_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
count,23680.0,23680.0,23680.0,23680.0,23680.0,23680.0,23680.0,23680.0,23680.0,23680.0,23680.0,23680.0,23680.0,23680.0
mean,0.905636,-0.056766,-0.136282,0.001077,-0.003396,5.426024,32.397633,-0.182503,-35.355954,-24.07065,-29.761402,0.001593,1.080372,32.706384
std,0.031873,0.059082,0.081497,0.300154,0.020144,0.016262,0.068494,3.512175,13.20301,22.112599,28.911897,56.337299,0.247978,0.448797
min,0.5704,-0.6656,-1.3214,-0.515762,-0.197708,5.374146,32.117615,-6.642151,-128.0,-73.0,-128.0,-383.11,0.642597,31.81
25%,0.901,-0.0618,-0.15485,-0.120804,-0.015152,5.416489,32.394379,-3.259277,-49.0,-33.0,-53.0,-23.6825,0.934301,32.33
50%,0.9078,-0.0466,-0.1262,-0.03447,-0.003227,5.42717,32.418213,-0.4776,-30.0,-28.0,-48.0,3.19,1.016183,32.65
75%,0.9118,-0.0404,-0.1102,0.022659,0.008331,5.436325,32.439087,2.580643,-24.0,-26.0,-9.0,21.85,1.091767,33.05
max,1.2472,0.5306,1.4744,1.499313,0.163925,5.516815,32.580933,8.041382,125.0,127.0,101.0,469.91,2.585791,33.53


In [16]:
X_df.info() #info

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23680 entries, 0 to 23679
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       23680 non-null  float64
 1   1       23680 non-null  float64
 2   2       23680 non-null  float64
 3   3       23680 non-null  float64
 4   4       23680 non-null  float64
 5   5       23680 non-null  float64
 6   6       23680 non-null  float64
 7   7       23680 non-null  float64
 8   8       23680 non-null  float64
 9   9       23680 non-null  float64
 10  10      23680 non-null  float64
 11  11      23680 non-null  float64
 12  12      23680 non-null  float64
 13  13      23680 non-null  float64
dtypes: float64(14)
memory usage: 2.5 MB


In [65]:
X_train, X_test, y_train, y_test = train_test_split(
    X_df, y, test_size=0.2, random_state=42
)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [66]:
# 4. Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

In [69]:
# 5. Set MLflow tracking URI (local or remote server)
mlflow.set_tracking_uri("http://localhost:5000")
# Ganti sesuai setup MLflow Anda

# 6. Log model and metrics to MLflow
with mlflow.start_run():
    mlflow.log_param("model_type", "RandomForestClassifier")
    mlflow.log_metric("accuracy", accuracy)
    
    # Log model and register to Model Registry
    mlflow.sklearn.log_model(
        sk_model=model, 
        artifact_path="wesad_model", 
        registered_model_name="Deployment_Model"  # Nama model yang terdaftar
    )

    print(f"Model logged with accuracy: {accuracy}")

# 7. Print success message
print("Model successfully saved to MLflow and registered in the Model Registry!")

Successfully registered model 'Deployment_Model'.
2024/12/22 22:01:07 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Deployment_Model, version 1


Model logged with accuracy: 1.0
🏃 View run traveling-owl-279 at: http://localhost:5000/#/experiments/0/runs/c79e6efa932a40a49011a4bafc2c5776
🧪 View experiment at: http://localhost:5000/#/experiments/0
Model successfully saved to MLflow and registered in the Model Registry!


Created version '1' of model 'Deployment_Model'.
