# Physical activity monitoring project - ML model traning



In [1]:
# Log in to your W&B account
# import wandb

import random
import math
import os
import pandas as pd
import numpy as np

# wandb.login()

# Open data chunks

In [7]:

# Define the path to your file
folder_path = r"C:\Users\malit\Documents\GitHub\AI_ML_data\PAMAP2_Dataset\activity_chunks_v1"

file_name_without_ext = "subject101"

activity_ids = [3, 5, 6, 7]
activity_id = 3 # set a default activity id
chunk_count = 5 # set a default chunk count

file_name = f"{file_name_without_ext}_activity_{activity_id}_chunk_{chunk_count}.csv"
file_path = os.path.join(folder_path, file_name)

# Check if the file exists
if not os.path.isfile(file_path):
    raise FileNotFoundError(f"The file {file_path} does not exist.")

# Load the data, specifying the correct delimiter (comma) and no header
csv_data = np.loadtxt(file_path, delimiter=',', dtype=float, comments=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='utf-8', max_rows=None)
np.set_printoptions(precision=6, suppress=True)

print(f"Loaded data from {file_path}")
print(f"Data shape: {csv_data.shape}")  
print(csv_data)

Loaded data from C:\Users\malit\Documents\GitHub\AI_ML_data\PAMAP2_Dataset\activity_chunks_v1\subject101_activity_3_chunk_5.csv
Data shape: (100, 45)
[[549.33      3.      104.      ...  37.8959   -5.75508   1.     ]
 [549.34      3.      104.      ...  38.8701   -5.50071   1.     ]
 [549.35      3.      104.      ...  37.5468   -5.8872    1.     ]
 ...
 [550.3       3.      105.      ...  38.5598   -5.8717    1.     ]
 [550.31      3.      105.      ...  37.6758   -5.509     1.     ]
 [550.32      3.      105.      ...  37.5576   -5.51211   1.     ]]


## MiniRocket + RidgeClassifier

In [2]:
import os, glob, re, random
import numpy as np
import joblib
import json
from sklearn.linear_model import RidgeClassifierCV
from sklearn.metrics import classification_report, confusion_matrix
from sktime.transformations.panel.minirocket import MiniRocketMultivariate

ModuleNotFoundError: No module named 'sktime'

In [None]:


DATA_DIR = r"C:\\Users\\malit\\Documents\\GitHub\\AI_ML_data\\PAMAP2_Dataset\\activity_chunks_v1"
TARGET_ACTIVITIES = [3,5,6,7]
SAMPLES_PER_CLASS = 50
FEATURE_LIMIT = 40
SEQ_LEN = 100
SEED = 42
out_dir = 'models/minirocket_baseline'
os.makedirs(out_dir, exist_ok=True)

rng = random.Random(SEED)

def parse_activity(fp):
    m = re.search(r"activity_(\d+)_chunk", os.path.basename(fp))
    return int(m.group(1)) if m else None

# 1 Collect & group
paths = [p for p in glob.glob(os.path.join(DATA_DIR,'*.csv')) if parse_activity(p) in TARGET_ACTIVITIES]
by_act = {a: [] for a in TARGET_ACTIVITIES}
for p in paths: by_act[parse_activity(p)].append(p)
for a in TARGET_ACTIVITIES: rng.shuffle(by_act[a])

# 2 Sample balanced
selected = []
for a in TARGET_ACTIVITIES: selected.extend(by_act[a][:SAMPLES_PER_CLASS])

# 3-5 Load & build tensor list
X_list, y_list = [], []
for fp in selected:
    arr = np.loadtxt(fp, delimiter=',', dtype=float)
    feat_idx = [i for i in range(arr.shape[1]) if i not in (0,1)][:FEATURE_LIMIT]
    seq = arr[:, feat_idx].T   # (F, T)
    if seq.shape != (FEATURE_LIMIT, SEQ_LEN):
        continue
    X_list.append(seq)
    y_list.append(parse_activity(fp))

X = np.stack(X_list)  # (N, F, T)
y = np.array(y_list)

# 6 Split
idx = np.arange(len(X)); rng.shuffle(list(idx))
tr_end = int(0.7*len(idx)); va_end = int(0.85*len(idx))
tr, va, te = idx[:tr_end], idx[tr_end:va_end], idx[va_end:]
Xtr, Xva, Xte = X[tr], X[va], X[te]
ytr, yva, yte = y[tr], y[va], y[te]

# 7-8 MiniRocket fit/transform
mr = MiniRocketMultivariate(random_state=SEED)
mr.fit(Xtr)
Phi_tr = mr.transform(Xtr)
Phi_va = mr.transform(Xva)
Phi_te = mr.transform(Xte)

# 9 RidgeClassifierCV
clf = RidgeClassifierCV(alphas=np.logspace(-3,3,7))
clf.fit(Phi_tr, ytr)
print('Val acc:', clf.score(Phi_va, yva))
print('Test acc:', clf.score(Phi_te, yte))
print(classification_report(yte, clf.predict(Phi_te)))
print(confusion_matrix(yte, clf.predict(Phi_te)))

# 10 Save artifacts
joblib.dump(mr, os.path.join(out_dir,'transform.pkl'))
joblib.dump(clf, os.path.join(out_dir,'ridge.pkl'))
with open(os.path.join(out_dir,'config.json'),'w') as f:
    json.dump({'activities': TARGET_ACTIVITIES, 'feature_limit': FEATURE_LIMIT, 'seq_len': SEQ_LEN, 'seed': SEED}, f, indent=2)
