In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import time
from sklearn.preprocessing import StandardScaler

from collections import Counter
from thop import profile
from IPython.display import display
import copy

def get_rotation_matrix_3d(angle_degrees, axis='z'):
    angle_rad = np.radians(angle_degrees)
    cos_a = np.cos(angle_rad)
    sin_a = np.sin(angle_rad)
    
    if axis == 'z':
        return np.array([
            [cos_a, -sin_a, 0],
            [sin_a, cos_a, 0],
            [0, 0, 1]
        ])
    elif axis == 'y':
        return np.array([
            [cos_a, 0, sin_a],
            [0, 1, 0],
            [-sin_a, 0, cos_a]
        ])
    elif axis == 'x':
        return np.array([
            [1, 0, 0],
            [0, cos_a, -sin_a],
            [0, sin_a, cos_a]
        ])

def rotate_signal(x, angle, dataset_type='uci'):
    x_rotated = x.copy()
    rot_mat = get_rotation_matrix_3d(angle, axis='z')
    
    if dataset_type == 'uci':
        for t in range(x.shape[1]):
            x_rotated[0:3, t] = rot_mat @ x[0:3, t]
            x_rotated[3:6, t] = rot_mat @ x[3:6, t]
            x_rotated[6:9, t] = rot_mat @ x[6:9, t]
    
    elif dataset_type == 'wisdm':
        for t in range(x.shape[1]):
            x_rotated[:, t] = rot_mat @ x[:, t]
    
    elif dataset_type == 'pamap2':
        for t in range(x.shape[1]):
            x_rotated[1:4, t] = rot_mat @ x[1:4, t]
            x_rotated[4:7, t] = rot_mat @ x[4:7, t]
            x_rotated[7:10, t] = rot_mat @ x[7:10, t]
            x_rotated[10:13, t] = rot_mat @ x[10:13, t]
            
            x_rotated[17:20, t] = rot_mat @ x[17:20, t]
            x_rotated[20:23, t] = rot_mat @ x[20:23, t]
            x_rotated[23:26, t] = rot_mat @ x[23:26, t]
            x_rotated[26:29, t] = rot_mat @ x[26:29, t]
            
            x_rotated[33:36, t] = rot_mat @ x[33:36, t]
            x_rotated[36:39, t] = rot_mat @ x[36:39, t]
            x_rotated[39:42, t] = rot_mat @ x[39:42, t]
            x_rotated[42:45, t] = rot_mat @ x[42:45, t]
    
    elif dataset_type == 'mhealth':
        for t in range(x.shape[1]):
            x_rotated[0:3, t] = rot_mat @ x[0:3, t]
            x_rotated[5:8, t] = rot_mat @ x[5:8, t]
            x_rotated[8:11, t] = rot_mat @ x[8:11, t]
            x_rotated[11:14, t] = rot_mat @ x[11:14, t]
            x_rotated[14:17, t] = rot_mat @ x[14:17, t]
            x_rotated[17:20, t] = rot_mat @ x[17:20, t]
            x_rotated[20:23, t] = rot_mat @ x[20:23, t]
    
    return x_rotated

class RotationDataset(Dataset):
    def __init__(self, X, rotations=[0, 90, 180, 270], dataset_type='uci'):
        self.X = X
        self.rotations = rotations
        self.dataset_type = dataset_type
        
    def __len__(self):
        return len(self.X) * len(self.rotations)
    
    def __getitem__(self, idx):
        sample_idx = idx // len(self.rotations)
        rotation_idx = idx % len(self.rotations)
        
        x = self.X[sample_idx]
        rotation_angle = self.rotations[rotation_idx]
        
        x_rotated = rotate_signal(x, rotation_angle, self.dataset_type)
        
        return torch.FloatTensor(x_rotated), rotation_idx

class RotationTestDataset(Dataset):
    def __init__(self, X, y, rotations, dataset_type='uci'):
        self.X = X
        self.y = y
        self.rotations = rotations
        self.dataset_type = dataset_type
        
    def __len__(self):
        return len(self.X) * len(self.rotations)
    
    def __getitem__(self, idx):
        sample_idx = idx // len(self.rotations)
        rotation_idx = idx % len(self.rotations)
        
        x = self.X[sample_idx]
        y = self.y[sample_idx]
        rotation_angle = self.rotations[rotation_idx]
        
        x_rotated = rotate_signal(x, rotation_angle, self.dataset_type)
        
        return torch.FloatTensor(x_rotated), y

class DSConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        super(DSConv, self).__init__()
        self.depthwise = nn.Conv1d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels)
        self.pointwise = nn.Conv1d(in_channels, out_channels, 1)
        
    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

class InceptionBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(InceptionBlock, self).__init__()
        
        self.branch1 = nn.Sequential(
            nn.MaxPool1d(3, stride=1, padding=1),
            DSConv(in_channels, out_channels//4, 1)
        )
        
        self.branch2 = nn.Sequential(
            DSConv(in_channels, out_channels//4, 1),
            DSConv(out_channels//4, out_channels//4, 1)
        )
        
        self.branch3 = nn.Sequential(
            DSConv(in_channels, out_channels//4, 1),
            DSConv(out_channels//4, out_channels//4, 1)
        )
        
        self.branch4 = DSConv(in_channels, out_channels//4, 1)
        
        self.relu = nn.ReLU()
        
    def forward(self, x):
        b1 = self.branch1(x)
        b2 = self.branch2(x)
        b3 = self.branch3(x)
        b4 = self.branch4(x)
        
        out = torch.cat([b1, b2, b3, b4], dim=1)
        out = self.relu(out)
        return out

class MultiKernelBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(MultiKernelBlock, self).__init__()
        
        self.branch1 = DSConv(in_channels, out_channels//4, 1)
        self.branch2 = DSConv(in_channels, out_channels//4, 3, padding=1)
        self.branch3 = DSConv(in_channels, out_channels//4, 5, padding=2)
        self.branch4 = DSConv(in_channels, out_channels//4, 7, padding=3)
        
        self.relu = nn.ReLU()
        
    def forward(self, x):
        b1 = self.branch1(x)
        b2 = self.branch2(x)
        b3 = self.branch3(x)
        b4 = self.branch4(x)
        
        out = torch.cat([b1, b2, b3, b4], dim=1)
        out = self.relu(out)
        return out

class StemLayer(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(StemLayer, self).__init__()
        self.conv1d = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.conv1d(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

class SSLModel(nn.Module):
    def __init__(self, in_channels, num_classes, stem_out=64, inception_out=128, mk_out=128):
        super(SSLModel, self).__init__()
        
        self.stem = StemLayer(in_channels, stem_out)
        self.inception = InceptionBlock(stem_out, inception_out)
        self.mk = MultiKernelBlock(inception_out, mk_out)
        
        self.adaptive_pool = nn.AdaptiveAvgPool1d(1)
        self.flatten = nn.Flatten()
        
        self.classifier = nn.Linear(mk_out, num_classes)
        
    def forward(self, x):
        x = self.stem(x)
        x = self.inception(x)
        x = self.mk(x)
        x = self.adaptive_pool(x)
        x = self.flatten(x)
        out = self.classifier(x)
        return out
    
    def extract_features(self, x):
        x = self.stem(x)
        x = self.inception(x)
        x = self.mk(x)
        x = self.adaptive_pool(x)
        x = self.flatten(x)
        return x

class FinetuneClassifier(nn.Module):
    def __init__(self, in_features, num_classes, dropout=0.3):
        super(FinetuneClassifier, self).__init__()
        
        hidden = 256
        
        self.fc1 = nn.Linear(in_features, hidden)
        self.bn1 = nn.BatchNorm1d(hidden)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)
        
        self.fc2 = nn.Linear(hidden, num_classes)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        return x

def split_sequences(features, labels, window_size, step):
    X_sequences, y_sequences = [], []
    for i in range(0, len(features) - window_size + 1, step):
        window_features = features[i:i + window_size]
        window_labels = labels[i:i + window_size]
        most_common_label = Counter(window_labels).most_common(1)[0][0]
        X_sequences.append(window_features)
        y_sequences.append(most_common_label)
    return np.array(X_sequences), np.array(y_sequences)

def load_mhealth_data(data_path, window_size=50, step=25):
    print("Loading mHealth dataset...")
    log_files = glob.glob(os.path.join(data_path, "*.log"))
    if not log_files:
        raise FileNotFoundError(f"No log files found in {data_path}")
    
    all_data = []
    for log_file in log_files:
        try:
            data = pd.read_csv(log_file, header=None, sep='\t')
            all_data.append(data)
        except Exception as e:
            continue
    
    if not all_data:
        raise ValueError("No valid data files found")
    
    combined_data = pd.concat(all_data, ignore_index=True)
    
    features = combined_data.iloc[:, :-1].values
    labels = combined_data.iloc[:, -1].values.astype(int)
    
    mask = labels != 0
    features = features[mask]
    labels = labels[mask]
    labels = labels - 1
    
    X_sequences, y_sequences = split_sequences(features, labels, window_size, step)
    
    unique_labels = np.unique(y_sequences)
    num_classes = len(unique_labels)
    
    activity_labels = [
        "Standing still", "Sitting and relaxing", "Lying down", "Walking", "Climbing stairs", "Waist bends forward",
        "Frontal elevation of arms", "Knees bending (crouching)", "Cycling", "Jogging", "Running", "Jump front & back"
    ]
    
    X_windowed = np.transpose(X_sequences, (0, 2, 1))
    
    print(f"mHealth - Shape: {X_windowed.shape}, Classes: {num_classes}")
    
    return X_windowed, y_sequences, activity_labels, num_classes

def load_uci_har_raw(dataset_path):
    print(f"Loading UCI HAR data from: {dataset_path}")
    SIGNALS = ["body_acc_x", "body_acc_y", "body_acc_z", "body_gyro_x", "body_gyro_y", "body_gyro_z", "total_acc_x", "total_acc_y", "total_acc_z"]
    
    X_train_list = []
    X_test_list = []
    
    for signal in SIGNALS:
        train_file = os.path.join(dataset_path, f"{signal}_train.txt")
        test_file = os.path.join(dataset_path, f"{signal}_test.txt")
        
        try:
            train_signal = np.genfromtxt(train_file, dtype=np.float32, invalid_raise=False)
            test_signal = np.genfromtxt(test_file, dtype=np.float32, invalid_raise=False)
            
            train_signal = train_signal[~np.isnan(train_signal).any(axis=1)]
            test_signal = test_signal[~np.isnan(test_signal).any(axis=1)]
            
            X_train_list.append(train_signal)
            X_test_list.append(test_signal)
        except:
            return None, None, None, None, None
    
    if not X_train_list:
        return None, None, None, None, None
    
    X_train = np.stack(X_train_list, axis=-1)
    X_test = np.stack(X_test_list, axis=-1)
    
    X_train = np.transpose(X_train, (0, 2, 1))
    X_test = np.transpose(X_test, (0, 2, 1))
    
    try:
        y_train = np.genfromtxt(os.path.join(dataset_path, "y_train.txt"), dtype=int, invalid_raise=False) - 1
        y_test = np.genfromtxt(os.path.join(dataset_path, "y_test.txt"), dtype=int, invalid_raise=False) - 1
        
        y_train = y_train[~np.isnan(y_train)].astype(int)
        y_test = y_test[~np.isnan(y_test)].astype(int)
    except:
        return None, None, None, None, None
    
    ucihar_activity_names = ['Walking', 'Walking Upstairs', 'Walking Downstairs', 'Sitting', 'Standing', 'Laying']
    
    print(f"UCI HAR - Shape: {X_train.shape}, Classes: {len(ucihar_activity_names)}")
    
    return X_train, y_train, X_test, y_test, ucihar_activity_names

def load_wisdm_data(data_path, window_size=80, step=40):
    print("Loading WISDM dataset...")
    
    if os.path.isdir(data_path):
        possible_files = ['WISDM_ar_v1.1_raw.txt', 'WISDM_ar_v1.1.txt']
        for file in possible_files:
            file_path = os.path.join(data_path, file)
            if os.path.exists(file_path):
                data_path = file_path
                break
    
    column_names = ['user', 'activity', 'timestamp', 'x_accel', 'y_accel', 'z_accel']
    rows = []
    
    with open(data_path, 'r') as file:
        for line in file:
            line = line.strip()
            if line.endswith(';'):
                line = line[:-1]
            if line:
                try:
                    values = line.split(',')
                    if len(values) == 6:
                        rows.append(values)
                except:
                    continue
    
    df = pd.DataFrame(rows, columns=column_names)
    
    for col in ['user', 'timestamp', 'x_accel', 'y_accel', 'z_accel']:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    
    df = df.dropna()
    
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(df['activity'])
    
    features = df[['x_accel', 'y_accel', 'z_accel']].values
    
    X_sequences, y_sequences = split_sequences(features, encoded_labels, window_size, step)
    
    activity_labels = label_encoder.classes_.tolist()
    num_classes = len(activity_labels)
    
    X_windowed = np.transpose(X_sequences, (0, 2, 1))
    
    print(f"WISDM - Shape: {X_windowed.shape}, Classes: {num_classes}")
    
    return X_windowed, y_sequences, activity_labels, num_classes

import glob
import os
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.preprocessing import StandardScaler

import glob
import os
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.preprocessing import StandardScaler

def load_and_window_pamap2(dataset_dir="PAMAP2"):
    file_paths = sorted(glob.glob(os.path.join(dataset_dir, 'Protocol', 'subject*.dat')))
    optional_path = os.path.join(dataset_dir, 'Optional')
    if os.path.exists(optional_path):
        file_paths += sorted(glob.glob(os.path.join(optional_path, 'subject*.dat')))
    if not file_paths:
        return None, None, None, None

    activity_labels = [
        "lying", "sitting", "standing", "walking", "running", "cycling",
        "Nordic walking", "ascending stairs", "descending stairs",
        "vacuum cleaning", "ironing", "rope jumping"
    ]
    
    label_to_activity_idx = {
        1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 12: 7, 13: 8, 16: 9, 17: 10, 24: 11
    }

    all_windows = []
    all_labels = []
    window_size = 100
    step = 50

    for file_path in file_paths:
        df = pd.read_csv(file_path, sep='\s+', header=None, na_values='NaN')
        df_cleaned = df.ffill().bfill()
        if df_cleaned.empty:
            continue
        
        labels = df_cleaned.iloc[:, 1].values.astype(int)
        
        # --- 수정된 부분: 모든 IMU 센서 데이터를 특징으로 선택 ---
        # PDF 문서 기준, 각 IMU(손, 가슴, 발목)의 모든 센서 데이터를 사용합니다.
        hand_cols = list(range(4, 20))
        chest_cols = list(range(21, 37))
        ankle_cols = list(range(38, 54))
        all_sensor_cols = hand_cols + chest_cols + ankle_cols
        # --------------------------------------------------------

        if df_cleaned.shape[1] < max(all_sensor_cols) + 1:
            continue
        features = df_cleaned.iloc[:, all_sensor_cols].values.astype(np.float32)

        valid_indices = np.where(np.isin(labels, list(label_to_activity_idx.keys())))[0]
        if len(valid_indices) == 0:
            continue
        
        features = features[valid_indices, :]
        labels = labels[valid_indices]
        
        if len(features) < window_size:
            continue
        
        start = 0
        while start + window_size <= len(features):
            window_data = features[start : start + window_size, :]
            window_labels_raw = labels[start : start + window_size]
            most_common_label = Counter(window_labels_raw).most_common(1)[0][0]
            
            if most_common_label in label_to_activity_idx:
                all_windows.append(window_data)
                all_labels.append(label_to_activity_idx[most_common_label])
            start += step

    if not all_windows:
        return None, None, None, None

    X_windowed = np.array(all_windows, dtype=np.float32)
    y_encoded = np.array(all_labels, dtype=int)

    scaler = StandardScaler()
    X_windowed_flat = X_windowed.reshape(X_windowed.shape[0], -1)
    X_windowed_flat = scaler.fit_transform(X_windowed_flat)
    X_windowed = X_windowed_flat.reshape(X_windowed.shape)
    X_windowed = np.transpose(X_windowed, (0, 2, 1))

    num_classes_actual = len(np.unique(y_encoded))
    
    print("\n" + "="*40)
    print("PAMAP2 Dataset Loading Summary")
    print(f"  - Total Instances (Windows): {X_windowed.shape[0]}")
    print(f"  - Total Features: {X_windowed.shape[1]}")
    print(f"  - Total Classes: {num_classes_actual}")
    print("="*40 + "\n")
    
    return X_windowed, y_encoded, activity_labels, num_classes_actual

def train_pretask(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs=200):
    start_time = time.time()
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        train_loss = running_loss / len(train_loader)
        train_acc = 100. * correct / total
        
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
        
        val_loss = val_loss / len(val_loader)
        val_acc = 100. * correct / total
                
        scheduler.step()
    
    training_time = time.time() - start_time
    
    return training_time

def finetune_model(backbone, classifier, train_loader, val_loader, criterion, device, epochs=200):
    start_time = time.time()
    
    for param in backbone.parameters():
        param.requires_grad = False
    
    optimizer1 = torch.optim.AdamW(classifier.parameters(), lr=0.001)
    scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer1, T_max=50)
    
    for epoch in range(epochs//2):
        backbone.eval()
        classifier.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            with torch.no_grad():
                features = backbone.extract_features(inputs)
            
            optimizer1.zero_grad()
            outputs = classifier(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer1.step()
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        
        scheduler1.step()
    
    for param in backbone.parameters():
        param.requires_grad = True
    
    all_params = list(backbone.parameters()) + list(classifier.parameters())
    optimizer2 = torch.optim.AdamW(all_params, lr=0.0001)
    scheduler2 = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer2, T_max=50)
    
    for epoch in range(epochs//2):
        backbone.train()
        classifier.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            features = backbone.extract_features(inputs)
            
            optimizer2.zero_grad()
            outputs = classifier(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer2.step()
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        scheduler2.step()
    
    finetune_time = time.time() - start_time
    
    return finetune_time

def evaluate_model(backbone, classifier, test_loader, device):
    backbone.eval()
    classifier.eval()
    
    all_preds = []
    all_labels = []
    all_features = []
    
    inference_times = []
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            
            start = time.time()
            features = backbone.extract_features(inputs)
            outputs = classifier(features)
            inference_times.append((time.time() - start) * 1000)
            
            _, predicted = outputs.max(1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())
            all_features.append(features.cpu().numpy())
    
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    all_features = np.vstack(all_features)
    
    acc = accuracy_score(all_labels, all_preds) * 100
    f1 = f1_score(all_labels, all_preds, average='weighted') * 100
    precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0) * 100
    recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0) * 100
    avg_inference_time = np.mean(inference_times)
    
    return acc, f1, precision, recall, all_preds, all_labels, all_features, avg_inference_time

def compute_model_stats(backbone, classifier, input_shape):
    device = next(backbone.parameters()).device
    dummy_input = torch.randn(1, *input_shape).to(device)
    
    macs, params = profile(backbone, inputs=(dummy_input,), verbose=False)
    
    with torch.no_grad():
        features = backbone.extract_features(dummy_input)
    
    macs_cls, params_cls = profile(classifier, inputs=(features,), verbose=False)
    
    total_macs = (macs + macs_cls) / 1e6
    total_params = (params + params_cls) / 1e6
    
    return total_params, total_macs

def plot_confusion_matrix(y_true, y_pred, activity_names, dataset_name, suffix=""):
    cm = confusion_matrix(y_true, y_pred)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    df = pd.DataFrame(cm_normalized, index=activity_names, columns=activity_names)
    
    annot = df.copy().astype(str)
    for i in range(df.shape[0]):
        for j in range(df.shape[1]):
            v = df.iloc[i, j]
            annot.iloc[i, j] = f"{v:.2f}"
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(df, annot=annot.values, fmt="", cmap="Blues", cbar=True, annot_kws={"size": 13}, vmin=0, vmax=1)
    plt.xticks(rotation=90, fontsize=13)
    plt.yticks(rotation=0, fontsize=13)
    
    for spine in plt.gca().spines.values():
        spine.set_visible(True)
        spine.set_linewidth(0.5)
        spine.set_edgecolor('black')
    
    plt.xlabel('Predicted Label', fontsize=14)
    plt.ylabel('True Label', fontsize=14)
    plt.tight_layout()
    plt.savefig(f'./{dataset_name}_confusion{suffix}.png', dpi=300, bbox_inches='tight')
    plt.close()

def plot_tsne(features, labels, activity_names, dataset_name="Dataset", samples_per_class=200):
    sampled_features, sampled_labels = [], []
    for i in range(len(activity_names)):
        class_mask = labels == i
        class_features = features[class_mask]
        if len(class_features) > samples_per_class:
            idx = np.random.choice(len(class_features), samples_per_class, replace=False)
            sampled_features.append(class_features[idx])
            sampled_labels.append(labels[class_mask][idx])
        else:
            sampled_features.append(class_features)
            sampled_labels.append(labels[class_mask])
    
    features = np.vstack(sampled_features)
    labels = np.hstack(sampled_labels)
    
    features_2d = TSNE(n_components=2, perplexity=20, learning_rate=3000).fit_transform(features)
    
    colors = [
        "#FF0000", "#FFA500", "#FFFF00", "#008000", "#00FFFF", "#0000FF",
        "#800080", "#FFC0CB", "#A52A2A", "#000000", "#808080", "#FFD700"
    ]
    
    plt.figure(figsize=(8, 6))
    for i, activity in enumerate(activity_names):
        mask = labels == i
        if np.any(mask):
            plt.scatter(
                features_2d[mask, 0], features_2d[mask, 1],
                color=colors[i], marker='o', s=20, alpha=0.5
            )
    
    handles = [
        plt.Line2D([0], [0], marker='o', color='w', label=activity,
                   markerfacecolor=colors[i], markersize=7)
        for i, activity in enumerate(activity_names)
    ]
    plt.legend(handles=handles, title="Activities", fontsize=9, framealpha=1)
    plt.xlabel("t-SNE Component 1", fontsize=13)
    plt.ylabel("t-SNE Component 2", fontsize=13)
    plt.grid(False)
    plt.savefig(f'./{dataset_name}.png', dpi=500)
    plt.close()

def sample_balanced_data(X_data, y_data, samples_per_class=100):
    unique_classes = np.unique(y_data)
    sampled_X = []
    sampled_y = []
    
    for cls in unique_classes:
        cls_indices = np.where(y_data == cls)[0]
        if len(cls_indices) > samples_per_class:
            selected_indices = np.random.choice(cls_indices, samples_per_class, replace=False)
        else:
            selected_indices = cls_indices
        
        sampled_X.append(X_data[selected_indices])
        sampled_y.append(y_data[selected_indices])
    
    sampled_X = np.vstack(sampled_X)
    sampled_y = np.hstack(sampled_y)
    
    shuffle_idx = np.random.permutation(len(sampled_X))
    sampled_X = sampled_X[shuffle_idx]
    sampled_y = sampled_y[shuffle_idx]
    
    return sampled_X, sampled_y

def run_pretask_training(X_data, y_data, dataset_name, dataset_type, rotations, device):
    print(f"\n{'='*80}")
    print(f"Pretask Training: {dataset_name} - {len(rotations)} rotations")
    print(f"{'='*80}")
    
    X_sampled, y_sampled = sample_balanced_data(X_data, y_data, samples_per_class=100)
    
    n_samples = len(X_sampled)
    indices = np.random.permutation(n_samples)
    
    pretask_train_size = int(n_samples * 0.8)
    pretask_train_indices = indices[:pretask_train_size]
    pretask_val_indices = indices[pretask_train_size:]
    
    X_pretask_train = X_sampled[pretask_train_indices]
    X_pretask_val = X_sampled[pretask_val_indices]
    
    pretask_train_dataset = RotationDataset(X_pretask_train, rotations, dataset_type)
    pretask_val_dataset = RotationDataset(X_pretask_val, rotations, dataset_type)
    
    pretask_train_loader = DataLoader(pretask_train_dataset, batch_size=512, shuffle=True, num_workers=0)
    pretask_val_loader = DataLoader(pretask_val_dataset, batch_size=512, shuffle=False, num_workers=0)
    
    in_channels = X_data.shape[1]
    num_rotation_classes = len(rotations)
    
    model = SSLModel(in_channels, num_rotation_classes).to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
    
    pretask_time = train_pretask(
        model, pretask_train_loader, pretask_val_loader, criterion, optimizer, scheduler, device, epochs=200
    )
    
    model_save_name = f'./pretask_{dataset_name}_{len(rotations)}rot.pth'
    torch.save(model.state_dict(), model_save_name)
    print(f"Pretask model saved: {model_save_name}")
    print(f"Pretask training time: {pretask_time:.2f}s\n")
    
    return model_save_name, pretask_time

def run_finetune_experiment(X_data, y_data, activity_names, dataset_name, dataset_type, train_ratio, rotations, pretask_model_path, device):
    print(f"\n{dataset_name} - {int(train_ratio*100)}% labeled data - {len(rotations)} rotations")
    
    n_samples = len(X_data)
    indices = np.random.permutation(n_samples)
    
    # ===== 수정된 부분 =====
    if train_ratio == 1.0:
        # 100%일 때: 80% train, 10% val, 10% test로 분할
        train_size = int(n_samples * 0.8)
        val_size = int(n_samples * 0.1)
        
        train_idx = indices[:train_size]
        val_idx = indices[train_size:train_size + val_size]
        test_idx = indices[train_size + val_size:]
        
        X_train = X_data[train_idx]
        y_train = y_data[train_idx]
        X_val = X_data[val_idx]
        y_val = y_data[val_idx]
        X_test = X_data[test_idx]
        y_test = y_data[test_idx]
    else:
        # 기존 로직: labeled data에서 train/val 분할, unlabeled를 test로
        labeled_size = int(n_samples * train_ratio)
        labeled_indices = indices[:labeled_size]
        unlabeled_indices = indices[labeled_size:]
        
        X_test = X_data[unlabeled_indices]
        y_test = y_data[unlabeled_indices]
        
        val_size = int(len(labeled_indices) * 0.2)
        train_val_indices = np.random.permutation(len(labeled_indices))
        val_idx = labeled_indices[train_val_indices[:val_size]]
        train_idx = labeled_indices[train_val_indices[val_size:]]
        
        X_train = X_data[train_idx]
        y_train = y_data[train_idx]
        X_val = X_data[val_idx]
        y_val = y_data[val_idx]
    # ===== 수정 끝 =====
    
    in_channels = X_data.shape[1]
    num_rotation_classes = len(rotations)
    
    in_channels = X_data.shape[1]
    num_rotation_classes = len(rotations)
    
    pretrained_model = SSLModel(in_channels, num_rotation_classes).to(device)
    pretrained_model.load_state_dict(torch.load(pretask_model_path))
    
    model_backbone = pretrained_model
    model_backbone.classifier = nn.Identity()
    
    finetune_train_dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
    finetune_val_dataset = TensorDataset(torch.FloatTensor(X_val), torch.LongTensor(y_val))
    finetune_test_dataset = TensorDataset(torch.FloatTensor(X_test), torch.LongTensor(y_test))
    
    finetune_train_loader = DataLoader(finetune_train_dataset, batch_size=512, shuffle=True, num_workers=0)
    finetune_val_loader = DataLoader(finetune_val_dataset, batch_size=512, shuffle=False, num_workers=0)
    finetune_test_loader = DataLoader(finetune_test_dataset, batch_size=512, shuffle=False, num_workers=0)
    
    num_activity_classes = len(activity_names)
    feature_dim = 128
    classifier = FinetuneClassifier(feature_dim, num_activity_classes).to(device)
    
    criterion_finetune = nn.CrossEntropyLoss()
    
    finetune_time = finetune_model(
        model_backbone, classifier, finetune_train_loader, finetune_val_loader, criterion_finetune, device, epochs=50
    )
    
    acc1, f1_1, precision1, recall1, all_preds1, all_labels1, all_features1, inference_time1 = evaluate_model(
        model_backbone, classifier, finetune_test_loader, device
    )
    
    params, flops = compute_model_stats(model_backbone, classifier, (in_channels, X_data.shape[2]))
    
    print(f"Test 1 (Original): Acc={acc1:.2f}%, F1={f1_1:.2f}%")
    
    test2_dataset = RotationTestDataset(X_test, y_test, rotations, dataset_type)
    test2_loader = DataLoader(test2_dataset, batch_size=512, shuffle=False, num_workers=0)
    acc2, f1_2, precision2, recall2, all_preds2, all_labels2, all_features2, inference_time2 = evaluate_model(
        model_backbone, classifier, test2_loader, device
    )
    
    print(f"Test 2 (Train Rotations): Acc={acc2:.2f}%, F1={f1_2:.2f}%")
    
    small_rotations = [0, 5, 10, 15, 20, 25, 30]
    test3_dataset = RotationTestDataset(X_test, y_test, small_rotations, dataset_type)
    test3_loader = DataLoader(test3_dataset, batch_size=512, shuffle=False, num_workers=0)
    acc3, f1_3, precision3, recall3, all_preds3, all_labels3, all_features3, inference_time3 = evaluate_model(
        model_backbone, classifier, test3_loader, device
    )
    
    print(f"Test 3 (Small Angles): Acc={acc3:.2f}%, F1={f1_3:.2f}%")
    
    if train_ratio == 1.0:
        plot_confusion_matrix(all_labels1, all_preds1, activity_names, f"{dataset_name}_{int(train_ratio*100)}pct_{len(rotations)}rot_test1", "")
        plot_confusion_matrix(all_labels2, all_preds2, activity_names, f"{dataset_name}_{int(train_ratio*100)}pct_{len(rotations)}rot_test2", "")
        plot_confusion_matrix(all_labels3, all_preds3, activity_names, f"{dataset_name}_{int(train_ratio*100)}pct_{len(rotations)}rot_test3", "")
        plot_tsne(all_features1, all_labels1, activity_names, f"{dataset_name}_{int(train_ratio*100)}pct_{len(rotations)}rot_tsne")
        
        torch.save({
            'backbone': model_backbone.state_dict(),
            'classifier': classifier.state_dict()
        }, f'./{dataset_name}_{int(train_ratio*100)}pct_{len(rotations)}rot_model.pth')
    
    result = {
        'Dataset': dataset_name,
        'Train%': int(train_ratio*100),
        'Rotations': len(rotations),
        'Test1_Acc': acc1,
        'Test1_F1': f1_1,
        'Test1_Precision': precision1,
        'Test1_Recall': recall1,
        'Test2_Acc': acc2,
        'Test2_F1': f1_2,
        'Test2_Precision': precision2,
        'Test2_Recall': recall2,
        'Test3_Acc': acc3,
        'Test3_F1': f1_3,
        'Test3_Precision': precision3,
        'Test3_Recall': recall3,
        'Params(M)': params,
        'FLOPs(M)': flops,
        'InferenceTime(ms)': inference_time1,
        'FinetuneTime(s)': finetune_time
    }
    
    return result

print("="*80)
print("LOADING DATASETS")
print("="*80)

uci_X_train, uci_y_train, uci_X_test, uci_y_test, uci_activity_names = load_uci_har_raw('./')
uci_X = np.concatenate([uci_X_train, uci_X_test], axis=0)
uci_y = np.concatenate([uci_y_train, uci_y_test], axis=0)

wisdm_X, wisdm_y, wisdm_activity_names, wisdm_num_classes = load_wisdm_data('./WISDM')

pamap2_X, pamap2_y, pamap2_activity_names, pamap2_num_classes = load_and_window_pamap2('./PAMAP2')

mhealth_X, mhealth_y, mhealth_activity_names, mhealth_num_classes = load_mhealth_data('./MHEALTH')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}\n")

all_results = []

datasets = [
    (uci_X, uci_y, uci_activity_names, 'UCI', 'uci'),
    (wisdm_X, wisdm_y, wisdm_activity_names, 'WISDM', 'wisdm'),
    (pamap2_X, pamap2_y, pamap2_activity_names, 'PAMAP2', 'pamap2'),
    (mhealth_X, mhealth_y, mhealth_activity_names, 'mHealth', 'mhealth')
]

train_ratios = [0.01, 0.05, 0.1, 0.5, 1.0]

rotation_configs = [
    ([0, 90, 180, 270], "90deg"),
    #([0, 45, 90, 135, 180, 225, 270, 315], "45deg"),
    #([0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330], "30deg")
]

print("="*80)
print("PHASE 1: PRETASK TRAINING")
print("="*80)

pretask_models = {}

for X_data, y_data, activity_names, dataset_name, dataset_type in datasets:
    for rotations, rot_name in rotation_configs:
        model_path = f'./pretask_{dataset_name}_{len(rotations)}rot.pth'
        
        if os.path.exists(model_path):
            print(f"Model '{model_path}' already exists. Skipping pretask training.")
            pretask_time = 0  
        else:
            _, pretask_time = run_pretask_training(
                X_data, y_data, dataset_name, dataset_type, rotations, device
            )

        pretask_models[(dataset_name, rot_name)] = {
            'path': model_path,
            'time': pretask_time
        }

print("="*80)
print("PHASE 2: FINETUNING")
print("="*80)

for X_data, y_data, activity_names, dataset_name, dataset_type in datasets:
    for rotations, rot_name in rotation_configs:
        pretask_model_info = pretask_models[(dataset_name, rot_name)]
        
        for train_ratio in train_ratios:
            result = run_finetune_experiment(
                X_data, y_data, activity_names, dataset_name, dataset_type, 
                train_ratio, rotations, pretask_model_info['path'], device
            )
            
            result['PretaskTime(s)'] = pretask_model_info['time']
            result['RotationType'] = rot_name
            
            all_results.append(result)
            
            result_df = pd.DataFrame([result])
            display(result_df)

print("\n" + "="*80)
print("FINAL RESULTS")
print("="*80)

rst_df = pd.DataFrame(all_results)
display(rst_df)

rst_df.to_csv('./final_results.csv', index=False)
print("\nResults saved to './final_results.csv'")

  df = pd.read_csv(file_path, sep='\s+', header=None, na_values='NaN')


LOADING DATASETS
Loading UCI HAR data from: ./
UCI HAR - Shape: (7352, 9, 128), Classes: 6
Loading WISDM dataset...
WISDM - Shape: (27160, 3, 80), Classes: 6

PAMAP2 Dataset Loading Summary
  - Total Instances (Windows): 23822
  - Total Features: 48
  - Total Classes: 12

Loading mHealth dataset...


  labels = combined_data.iloc[:, -1].values.astype(int)


mHealth - Shape: (12348, 23, 50), Classes: 12
Using device: cuda

PHASE 1: PRETASK TRAINING
Model './pretask_UCI_4rot.pth' already exists. Skipping pretask training.
Model './pretask_WISDM_4rot.pth' already exists. Skipping pretask training.
Model './pretask_PAMAP2_4rot.pth' already exists. Skipping pretask training.
Model './pretask_mHealth_4rot.pth' already exists. Skipping pretask training.
PHASE 2: FINETUNING

UCI - 1% labeled data - 4 rotations
Test 1 (Original): Acc=65.02%, F1=61.71%
Test 2 (Train Rotations): Acc=33.52%, F1=30.27%
Test 3 (Small Angles): Acc=49.09%, F1=43.29%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,UCI,1,4,65.019123,61.706706,69.780952,65.019123,33.519663,30.269572,43.61977,...,49.087267,43.289275,54.635375,49.087267,0.067142,4.016768,1.33003,0.620591,0,90deg



UCI - 5% labeled data - 4 rotations
Test 1 (Original): Acc=77.61%, F1=77.20%
Test 2 (Train Rotations): Acc=34.30%, F1=32.83%
Test 3 (Small Angles): Acc=55.12%, F1=50.37%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,UCI,5,4,77.608585,77.196578,78.956925,77.608585,34.299949,32.834396,49.375112,...,55.117892,50.369694,63.638179,55.117892,0.067142,4.016768,1.428759,0.871827,0,90deg



UCI - 10% labeled data - 4 rotations
Test 1 (Original): Acc=84.67%, F1=84.63%
Test 2 (Train Rotations): Acc=34.46%, F1=33.35%
Test 3 (Small Angles): Acc=62.88%, F1=59.54%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,UCI,10,4,84.670982,84.634847,84.855958,84.670982,34.460626,33.350054,56.452778,...,62.880259,59.53978,69.351478,62.880259,0.067142,4.016768,1.41598,1.915944,0,90deg



UCI - 50% labeled data - 4 rotations
Test 1 (Original): Acc=94.10%, F1=94.09%
Test 2 (Train Rotations): Acc=34.27%, F1=35.53%
Test 3 (Small Angles): Acc=75.97%, F1=74.13%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,UCI,50,4,94.097087,94.092738,94.095539,94.097087,34.271845,35.527393,71.159293,...,75.966713,74.126168,81.471705,75.966713,0.067142,4.016768,1.588063,13.62324,0,90deg



UCI - 100% labeled data - 4 rotations
Test 1 (Original): Acc=94.57%, F1=94.57%
Test 2 (Train Rotations): Acc=35.21%, F1=35.85%
Test 3 (Small Angles): Acc=79.06%, F1=77.32%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,UCI,100,4,94.56838,94.565724,94.580021,94.56838,35.208535,35.852182,68.986674,...,79.063323,77.32271,84.121007,79.063323,0.067142,4.016768,3.406604,17.107456,0,90deg



WISDM - 1% labeled data - 4 rotations
Test 1 (Original): Acc=74.67%, F1=67.88%
Test 2 (Train Rotations): Acc=51.14%, F1=43.35%
Test 3 (Small Angles): Acc=72.52%, F1=67.08%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,WISDM,1,4,74.66622,67.884712,69.784809,74.66622,51.141731,43.349379,51.106107,...,72.522487,67.076234,66.013151,72.522487,0.06599,2.431616,1.306606,0.587598,0,90deg



WISDM - 5% labeled data - 4 rotations
Test 1 (Original): Acc=80.16%, F1=75.43%
Test 2 (Train Rotations): Acc=54.26%, F1=49.32%
Test 3 (Small Angles): Acc=79.11%, F1=74.86%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,WISDM,5,4,80.160453,75.426292,77.931484,80.160453,54.256453,49.324986,57.242308,...,79.109039,74.862184,74.719145,79.109039,0.06599,2.431616,1.26762,2.268038,0,90deg



WISDM - 10% labeled data - 4 rotations
Test 1 (Original): Acc=84.20%, F1=82.28%
Test 2 (Train Rotations): Acc=57.51%, F1=54.51%
Test 3 (Small Angles): Acc=82.19%, F1=80.14%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,WISDM,10,4,84.204713,82.282865,82.479599,84.204713,57.512068,54.511713,60.513582,...,82.194871,80.142772,79.917896,82.194871,0.06599,2.431616,1.281987,3.954287,0,90deg



WISDM - 50% labeled data - 4 rotations
Test 1 (Original): Acc=92.54%, F1=92.36%
Test 2 (Train Rotations): Acc=61.74%, F1=62.31%
Test 3 (Small Angles): Acc=88.53%, F1=88.49%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,WISDM,50,4,92.540501,92.359768,92.310934,92.540501,61.741532,62.305001,69.050807,...,88.528298,88.494872,88.518707,88.528298,0.06599,2.431616,1.304035,17.807125,0,90deg



WISDM - 100% labeled data - 4 rotations
Test 1 (Original): Acc=96.69%, F1=96.69%
Test 2 (Train Rotations): Acc=61.71%, F1=61.46%
Test 3 (Small Angles): Acc=91.44%, F1=91.58%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,WISDM,100,4,96.686303,96.693361,96.703501,96.686303,61.708395,61.462404,68.916317,...,91.436987,91.577588,91.81251,91.436987,0.06599,2.431616,1.91009,33.767331,0,90deg



PAMAP2 - 1% labeled data - 4 rotations
Test 1 (Original): Acc=58.72%, F1=55.61%
Test 2 (Train Rotations): Acc=24.48%, F1=25.04%
Test 3 (Small Angles): Acc=53.20%, F1=50.15%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,PAMAP2,1,4,58.722015,55.60835,57.590307,58.722015,24.47528,25.035653,39.286091,...,53.197083,50.150938,54.241491,53.197083,0.076172,3.896192,1.824922,0.695631,0,90deg



PAMAP2 - 5% labeled data - 4 rotations
Test 1 (Original): Acc=80.23%, F1=79.94%
Test 2 (Train Rotations): Acc=32.53%, F1=35.67%
Test 3 (Small Angles): Acc=74.90%, F1=74.53%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,PAMAP2,5,4,80.230657,79.935305,80.450643,80.230657,32.527286,35.671062,53.350186,...,74.896002,74.529758,75.797444,74.896002,0.076172,3.896192,1.711337,2.892691,0,90deg



PAMAP2 - 10% labeled data - 4 rotations
Test 1 (Original): Acc=87.20%, F1=87.11%
Test 2 (Train Rotations): Acc=34.96%, F1=37.75%
Test 3 (Small Angles): Acc=82.28%, F1=82.32%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,PAMAP2,10,4,87.196828,87.107203,87.424264,87.196828,34.960354,37.752603,56.845399,...,82.279451,82.32009,83.595571,82.279451,0.076172,3.896192,1.676207,4.407158,0,90deg



PAMAP2 - 50% labeled data - 4 rotations
Test 1 (Original): Acc=95.17%, F1=95.17%
Test 2 (Train Rotations): Acc=36.02%, F1=40.21%
Test 3 (Small Angles): Acc=91.14%, F1=91.18%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,PAMAP2,50,4,95.17253,95.173624,95.19071,95.17253,36.021325,40.209041,61.220368,...,91.143841,91.175528,91.387779,91.143841,0.076172,3.896192,1.747191,21.474077,0,90deg



PAMAP2 - 100% labeled data - 4 rotations
Test 1 (Original): Acc=97.36%, F1=97.36%
Test 2 (Train Rotations): Acc=35.12%, F1=39.38%
Test 3 (Small Angles): Acc=93.81%, F1=93.83%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,PAMAP2,100,4,97.356274,97.361732,97.394376,97.356274,35.123794,39.382063,59.333866,...,93.813321,93.826758,93.96335,93.813321,0.076172,3.896192,2.41847,43.06764,0,90deg



mHealth - 1% labeled data - 4 rotations
Test 1 (Original): Acc=53.61%, F1=46.84%
Test 2 (Train Rotations): Acc=20.83%, F1=21.26%
Test 3 (Small Angles): Acc=35.39%, F1=30.09%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,mHealth,1,4,53.611452,46.840795,53.724911,53.611452,20.826176,21.261595,44.032949,...,35.393514,30.08705,39.280197,35.393514,0.071372,1.726592,1.793345,0.812915,0,90deg



mHealth - 5% labeled data - 4 rotations
Test 1 (Original): Acc=53.65%, F1=50.27%
Test 2 (Train Rotations): Acc=17.71%, F1=18.42%
Test 3 (Small Angles): Acc=35.31%, F1=30.62%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,mHealth,5,4,53.652715,50.270275,58.29916,53.652715,17.709488,18.423717,43.961035,...,35.311811,30.624075,47.96048,35.311811,0.071372,1.726592,1.322269,0.707635,0,90deg



mHealth - 10% labeled data - 4 rotations
Test 1 (Original): Acc=73.43%, F1=71.25%
Test 2 (Train Rotations): Acc=23.25%, F1=29.28%
Test 3 (Small Angles): Acc=44.24%, F1=41.24%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,mHealth,10,4,73.429908,71.246249,76.154955,73.429908,23.252204,29.27578,65.751681,...,44.242783,41.238287,56.442884,44.242783,0.071372,1.726592,1.380433,1.673183,0,90deg



mHealth - 50% labeled data - 4 rotations
Test 1 (Original): Acc=9.30%, F1=1.58%
Test 2 (Train Rotations): Acc=9.30%, F1=1.58%
Test 3 (Small Angles): Acc=9.30%, F1=1.58%


Unnamed: 0,Dataset,Train%,Rotations,Test1_Acc,Test1_F1,Test1_Precision,Test1_Recall,Test2_Acc,Test2_F1,Test2_Precision,...,Test3_Acc,Test3_F1,Test3_Precision,Test3_Recall,Params(M),FLOPs(M),InferenceTime(ms),FinetuneTime(s),PretaskTime(s),RotationType
0,mHealth,50,4,9.297052,1.581656,0.864352,9.297052,9.297052,1.581656,0.864352,...,9.297052,1.581656,0.864352,9.297052,0.071372,1.726592,1.647931,8.422163,0,90deg



mHealth - 100% labeled data - 4 rotations
Test 1 (Original): Acc=9.06%, F1=1.51%
Test 2 (Train Rotations): Acc=9.06%, F1=1.51%
Test 3 (Small Angles): Acc=9.06%, F1=1.51%


  df = pd.read_csv(file_path, sep='\s+', header=None, na_values='NaN')


ValueError: Input X contains NaN.
TSNE does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values