In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm
import os
import pandas as pd
import numpy as np
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Subset
import wandb
import random

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(12)

Model Structure LSTM

In [5]:
class skeletonLSTM(nn.Module):
    def __init__(self, input_size, output_dim):
        super(skeletonLSTM, self).__init__()

        self.lstm1 = nn.LSTM(input_size=input_size, hidden_size=128, num_layers=1, batch_first=True)
        self.layer_norm1 = nn.LayerNorm(128)

        self.lstm2 = nn.LSTM(input_size=128, hidden_size=256, num_layers=1, batch_first=True)
        self.layer_norm2 = nn.LayerNorm(256)

        self.lstm3 = nn.LSTM(input_size=256, hidden_size=512, num_layers=1, batch_first=True)
        self.layer_norm3 = nn.LayerNorm(512)

        self.lstm4 = nn.LSTM(input_size=512, hidden_size=512, num_layers=1, batch_first=True)
        self.layer_norm4 = nn.LayerNorm(512)

        self.fc1 = nn.Linear(512, 256)
        self.dropout = nn.Dropout(p=0.3)
        self.fc2 = nn.Linear(256, output_dim)

    def forward(self, x):
        # LSTM layers with Layer Normalization
        x, _ = self.lstm1(x)
        x = self.layer_norm1(x)

        x, _ = self.lstm2(x)
        x = self.layer_norm2(x)

        x, _ = self.lstm3(x)
        x = self.layer_norm3(x)

        x, (hn, cn) = self.lstm4(x)
        x = self.layer_norm4(x)

        # Pooling to summarize sequence information
        x = torch.mean(x, dim=1)  # Mean pooling over the sequence

        # Fully connected layers with ReLU and Dropout
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        embedding = self.fc2(x)

        return embedding

Model Structure Classification Head

In [6]:
class head(nn.Module):
    def __init__(self):
        super(head, self).__init__()

        # Feedforward layers
        self.fc1 = nn.Linear(64, 32)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(32, 1)  # Output layer has 1 unit for binary classification
        self.sigmoid = nn.Sigmoid()  # Sigmoid for probability output

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x
class head_128(nn.Module):
    def __init__(self):
        super(head_128, self).__init__()

        # Feedforward layers
        self.fc1 = nn.Linear(128, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(32, 1)  # Output layer has 1 unit for binary classification
        self.sigmoid = nn.Sigmoid()  # Sigmoid for probability output

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

DataSet & DataLoader

In [7]:
class ContrastiveDataset(Dataset):
    def __init__(self, segment_length):
        self.root_dir = '/media/baebro/NIPA_data/Train/landmarks/'
        self.segment_length = segment_length
        self.data = []
        self.labels = []
        self.timestamp_to_indices = {}

        for dance_name in os.listdir(self.root_dir):
            dance_path = os.path.join(self.root_dir, dance_name)
            if os.path.isdir(dance_path):
                for csv_file in os.listdir(dance_path):
                    if csv_file.endswith("_angles.csv"):
                        file_path = os.path.join(dance_path, csv_file)
                        df = pd.read_csv(file_path)
                        df = self.interpolate_missing_values(df)
                        self.process_file(df, dance_name, file_path)

    def interpolate_missing_values(self, df):
        # Check for NaN or Inf values and interpolate
        if df.isnull().values.any() or np.isinf(df.values).any():
            df = df.replace([np.inf, -np.inf], np.nan)
            df = df.interpolate(method='linear', limit_direction='both', axis=0)
            # Fill any remaining NaN values (e.g., at the start or end) with 0
            df = df.fillna(0)
        return df

    def process_file(self, df, dance_name, file_path):
        # Segment angle data and store with dance and timestamp
        for i in range(0, len(df) - self.segment_length + 1, self.segment_length):
            segment = df.iloc[i:i + self.segment_length].values
            timestamp = i
            if len(segment) == self.segment_length:
                self.data.append((segment, dance_name, timestamp, file_path))
                self.labels.append(dance_name)

                # Positive pair dictionary
                if (dance_name, timestamp) not in self.timestamp_to_indices:
                    self.timestamp_to_indices[(dance_name, timestamp)] = []
                self.timestamp_to_indices[(dance_name, timestamp)].append(len(self.data) - 1)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        segment, dance_name, timestamp, _ = self.data[idx]
        anchor = torch.tensor(segment, dtype=torch.float32)

        # Check for NaN or Inf values in anchor
        if torch.isnan(anchor).any() or torch.isinf(anchor).any():
            raise ValueError(f"NaN or Inf value detected in anchor segment at index {idx}")

        # Positive pair (same dance, same timestamp)
        pos_indices = self.timestamp_to_indices.get((dance_name, timestamp), [])
        if len(pos_indices) > 1:
            pos_idx = np.random.choice([i for i in pos_indices if i != idx])
        else:
            pos_idx = idx  # fallback to self if no other positive sample available
        pos_segment, _, _, _ = self.data[pos_idx]
        pos = torch.tensor(pos_segment, dtype=torch.float32)

        # Check for NaN or Inf values in positive segment
        if torch.isnan(pos).any() or torch.isinf(pos).any():
            raise ValueError(f"NaN or Inf value detected in positive segment at index {pos_idx}")

        # Negative pair (different dance)
        all_labels = list(set(self.labels))
        neg_dance = np.random.choice([l for l in all_labels if l != dance_name])
        neg_indices = [i for i in range(len(self.data)) if self.labels[i] == neg_dance]
        if neg_indices:
            neg_idx = np.random.choice(neg_indices)
        else:
            neg_idx = idx  # fallback to self if no negative sample available
        neg_segment, _, _, _ = self.data[neg_idx]
        neg = torch.tensor(neg_segment, dtype=torch.float32)

        # Check for NaN or Inf values in negative segment
        if torch.isnan(neg).any() or torch.isinf(neg).any():
            raise ValueError(f"NaN or Inf value detected in negative segment at index {neg_idx}")

        return anchor, pos, neg

In [8]:
##### Train Loss #####
class TripletContrastiveLoss(nn.Module):
    def __init__(self, temperature=0.1):
        super(TripletContrastiveLoss, self).__init__()
        self.temperature = temperature

    def forward(self, anchor, positive, negative):
        # Normalize features
        anchor, positive, negative = F.normalize(anchor, dim=1), F.normalize(positive, dim=1), F.normalize(negative,
                                                                                                           dim=1)

        # Calculate similarities
        pos_sim = torch.exp(torch.sum(anchor * positive, dim=1) / self.temperature)  # Anchor-Positive similarity
        neg_sim = torch.exp(torch.sum(anchor * negative, dim=1) / self.temperature)  # Anchor-Negative similarity

        # Loss calculation: maximize anchor-positive similarity, minimize anchor-negative similarity
        loss = -torch.log(pos_sim / (pos_sim + neg_sim)).mean()
        # print(f'loss check: {pos_sim / (pos_sim + neg_sim)}')
        return loss

In [10]:
##### Train code #####
wandb.init(project="skeleton_lstm_new", name="experiment1")
save_path = '/home/baebro/nipa_ws/nipaproj_ws/output/'
# 하이퍼파라미터 설정
feature_dim = 12
output_dim = 64
num_epochs = 10  # Early stopping 적용 시 더 큰 값을 설정해도 됩니다
learning_rate = 0.0001
temperature = 0.1
patience = 25  # Early stopping patience 설정
min_delta = 0.001  # Validation loss가 감소하는 최소 값
sequence_length = 30

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

# 데이터셋과 데이터로더
dataset = ContrastiveDataset(sequence_length)
# print(len(dataset))
train_indices, val_indices = train_test_split(range(len(dataset)), test_size=0.2, random_state=42)

# Subset을 사용하여 학습 및 검증 데이터셋 생성
train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)

# DataLoader 생성
train_dataloader = DataLoader(
    train_dataset,
    num_workers=0,  # CPU 데드락 방지를 위해 num_workers=0으로 설정
    batch_size=8,
    shuffle=True,  # 데이터 순서를 섞어서 학습 효과를 높임
    pin_memory=True  # GPU 사용 시 유용
)
val_dataloader = DataLoader(
    val_dataset,
    num_workers=0,  # CPU 데드락 방지를 위해 num_workers=0으로 설정
    batch_size=8,  # 검증도 충분한 배치 크기로 설정
    shuffle=False,
    pin_memory=True  # GPU 사용 시 유용
)
print()

# 모델, 손실 함수, 옵티마이저, 스케줄러 초기화
# model = skeletonLSTM(feature_dim, output_dim).to(device)
model = skeletonLSTM(feature_dim, output_dim).to(device)
# binary classification head
classification = head_128().to(device)

# criterion1 = ContrastiveLoss(margin=1.0)
criterion1 = TripletContrastiveLoss(temperature=temperature)

# Binary classification loss
criterion2 = nn.BCELoss()
criterion3 = nn.BCELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

# Early stopping 변수 초기화
best_val_loss = np.inf
epochs_no_improve = 0
early_stop = False


# 학습 루프
for epoch in range(num_epochs):
    if early_stop:
        break

    model.train()
    classification.train()

    epoch_loss = 0.0
    p_epoch_loss = 0.0
    n_epoch_loss = 0.0
    # epoch_classification_loss = 0.0
    train_loader_tqdm = tqdm(train_dataloader, desc=f"Epoch [{epoch + 1}/{num_epochs}] Training")
    for batch_idx, (anchor, pos, neg) in enumerate(train_loader_tqdm):

        optimizer.zero_grad()
        # 데이터를 장치로 이동
        anchor, pos, neg = anchor.to(device), pos.to(device), neg.to(device)

        # 모델에 통과하여 임베딩 생성 및 이진 분류 출력
        anchor_emb = model(anchor)
        pos_emb = model(pos)
        neg_emb = model(neg)

        # Contrastive Loss 계산
        # anchor_emb = F.normalize(anchor_emb, dim=1)
        # pos_emb = F.normalize(pos_emb, dim=1)
        # neg_emb = F.normalize(neg_emb, dim=1)

        # positive_dist = F.pairwise_distance(anchor_emb, pos_emb)
        # negative_dist = F.pairwise_distance(anchor_emb, neg_emb)
        # loss1 = criterion1(positive_dist, negative_dist)

        loss1 = criterion1(anchor_emb, pos_emb, neg_emb)

        # Classification Loss 계산
        pos_classification = classification(torch.cat((anchor_emb, pos_emb), dim=1))
        loss2 = criterion2(pos_classification, torch.full((pos_classification.shape[0], 1), 1.).to(device))

        neg_classification = classification(torch.cat((anchor_emb, neg_emb), dim=1))
        loss3 = criterion3(neg_classification, torch.full((neg_classification.shape[0], 1), 0.).to(device))

        # Total loss 계산
        loss = loss1 + 1.5 * (loss2 + loss3)

        epoch_loss += loss.item()
        p_epoch_loss += loss2.item()
        n_epoch_loss += loss3.item()
        # epoch_classification_loss += classification_loss.item()

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        train_loader_tqdm.set_postfix(loss=loss.item(), contrastive_loss=loss1.item(), P_classification_loss=loss2.item(), N_classification_loss=loss3.item())


    scheduler.step()  # 학습률 조정

    # 에폭마다 평균 손실을 기록
    avg_train_contrastive_loss = loss1.item() / len(train_dataloader)
    avg_train_Pos_bc_loss = p_epoch_loss / len(train_dataloader)
    avg_train_Neg_bc_loss = n_epoch_loss / len(train_dataloader)

    avg_train_loss = epoch_loss / len(train_dataloader)
    # avg_classification_loss = epoch_classification_loss / len(train_dataloader)
    # print(
    #     f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, P_Classification Loss: {avg_train_Pos_bc_loss:.4f}, N_Classification Loss: {avg_train_Neg_bc_loss:.4f}')

    # Validation loop
    model.eval()
    val_loss = 0.0
    val_p_loss = 0.0
    val_n_loss = 0.0
    # val_classification_loss = 0.0
    val_loader_tqdm = tqdm(val_dataloader, desc=f"Epoch [{epoch + 1}/{num_epochs}] Validation")
    with torch.no_grad():
        for batch_idx, (anchor, pos, neg) in enumerate(val_loader_tqdm):
            anchor, pos, neg = anchor.to(device), pos.to(device), neg.to(device)

            # 임베딩 생성 및 이진 분류 출력
            anchor_emb = model(anchor)
            pos_emb = model(pos)
            neg_emb = model(neg)

            # Contrastive Loss 계산
            # anchor_emb = F.normalize(anchor_emb, dim=1)
            # pos_emb = F.normalize(pos_emb, dim=1)
            # neg_emb = F.normalize(neg_emb, dim=1)
            # positive_dist = F.pairwise_distance(anchor_emb, pos_emb)
            # negative_dist = F.pairwise_distance(anchor_emb, neg_emb)
            # loss1 = criterion1(positive_dist, negative_dist)

            loss1 = criterion1(anchor_emb, pos_emb, neg_emb)

            # Classification Loss 계산
            pos_classification = classification(torch.cat((anchor_emb, pos_emb), dim=1))
            loss2 = criterion2(pos_classification, torch.full((pos_classification.shape[0], 1), 1.).to(device))

            neg_classification = classification(torch.cat((anchor_emb, neg_emb), dim =1))
            loss3 = criterion3(neg_classification, torch.full((neg_classification.shape[0], 1), 0.).to(device))

            # Total validation loss 계산
            loss = loss1 + 1.5 * (loss2 + loss3)

            val_loss += loss.item()
            val_p_loss += loss2.item()
            val_n_loss += loss3.item()

            val_loader_tqdm.set_postfix(loss=loss.item(), contrastive_loss=loss1.item(), P_classification_loss=loss2.item(), N_classification_loss=loss3.item())

    # avg_train_contrastive_loss = loss1.item() / len(train_dataloader)
    # avg_train_Pos_bc_loss = loss2.item() / len(train_dataloader)
    # avg_train_Neg_bc_loss = loss3.item() / len(train_dataloader)

    # print(
    #     f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {avg_train_contrastive_loss:.4f}, P_Classification Loss: {avg_train_Pos_bc_loss:.4f}, N_Classification Loss: {avg_train_Neg_bc_loss:.4f}')

    avg_val_loss = val_loss / len(val_dataloader)
    # avg_val_classification_loss = val_classification_loss / len(val_dataloader)

    # avg_val_loss = val_loss.item() / len(train_dataloader)
    avg_val_Pos_bc_loss = val_p_loss / len(train_dataloader)
    avg_val_Neg_bc_loss = val_n_loss / len(train_dataloader)

    # print(
    #     f'Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}, P_Classification Loss: {avg_val_Pos_bc_loss:.4f}, , N_Classification Loss: {avg_val_Neg_bc_loss:.4f}')

    # Early Stopping 체크
    if avg_val_loss < best_val_loss - min_delta:
        best_val_loss = avg_val_loss
        epochs_no_improve = 0  # Improvement이 있으면 카운트 리셋
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= patience:
        print("Early stopping triggered!")
        early_stop = True

    # 에폭마다 평균 손실을 기록
    wandb.log({"epoch": epoch + 1, "train_loss": avg_train_loss,
               "val_loss": avg_val_loss})

print("Training complete!")
# torch.save(model.state_dict(), save_path + 'model_state_dict_Contrastive_loss_w_bc.pt')
# torch.save(classification.state_dict(), save_path + 'model_state_dict_Contrastive_loss_w_bc.pt')
torch.save(model, save_path + 'lstm_2000_cat_ov5.pth')
torch.save(classification, save_path + 'head_2000_cat_ov5.pth')

cuda:0



Epoch [1/10] Training: 100%|██████████| 89/89 [00:04<00:00, 18.17it/s, N_classification_loss=0.64, P_classification_loss=0.746, contrastive_loss=0.947, loss=3.03] 
Epoch [1/10] Validation: 100%|██████████| 23/23 [00:00<00:00, 65.09it/s, N_classification_loss=0.636, P_classification_loss=0.738, contrastive_loss=0.271, loss=2.33]
Epoch [2/10] Training:  38%|███▊      | 34/89 [00:02<00:03, 16.66it/s, N_classification_loss=0.632, P_classification_loss=0.734, contrastive_loss=0.453, loss=2.5] 


KeyboardInterrupt: 

Inference Code

In [None]:
def read_angle(file_path):
    df = pd.read_csv(file_path)
    segment = df.iloc[:30].values
    segment = torch.tensor(segment, dtype=torch.float32).unsqueeze(0).to(torch.float32)
    return segment

def read_angles(file_path, sequence_length):
    angle_list = []
    df = pd.read_csv(file_path)
    for i in range(0, 300, sequence_length):
        angle_list.append(torch.tensor(df.iloc[i:i + sequence_length].values, dtype=torch.float32))
    pos1_tensor = torch.stack(angle_list, dim=0).to(device)
    return pos1_tensor

In [None]:
saved_path = '/home/baebro/nipa_ws/nipaproj_ws/output/'
model_name = 'lstm_2000_cat_ov5.pth'
head_name = 'head_2000_cat_ov5.pth'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

origin_path = '/home/baebro/nipa_ws/nipaproj_ws/sample_videos/labeled_data/pos1_infer/landmarks_3d_pos1_infer_angles.csv'
pos_path = '/home/baebro/nipa_ws/nipaproj_ws/sample_videos/labeled_data/pos2_infer/landmarks_3d_pos2_infer_angles.csv'
neg_path = '/home/baebro/nipa_ws/nipaproj_ws/sample_videos/labeled_data/neg_infer/landmarks_3d_neg_infer_angles.csv'

origin_input = read_angles(origin_path, 30).to(device)
pos_input = read_angles(pos_path, 30).to(device)
neg_input = read_angles(neg_path, 30).to(device)

model = torch.load(saved_path+model_name).to(device)
head = torch.load(saved_path+head_name).to(device)
model.eval()
head.eval()

origin_emb = model(origin_input)
pos_emb = model(pos_input)
neg_emb =model(neg_input)

origin_emb_norm = F.normalize(origin_emb, dim=1)
pos_emb_norm = F.normalize(pos_emb, dim=1)
neg_emb_norm = F.normalize(neg_emb, dim=1)

pos_dist = 1-torch.pow(F.pairwise_distance(origin_emb_norm, pos_emb_norm), 1)/2
neg_dist = 1-torch.pow(F.pairwise_distance(origin_emb_norm, neg_emb_norm), 1)/2

In [None]:
print(neg_dist)
print(pos_dist)
print(neg_dist/pos_dist)

In [None]:
pos_classification = head(torch.cat((origin_emb, pos_emb), dim=1))
neg_classification = head(torch.cat((origin_emb, neg_emb), dim=1))

In [None]:
print(neg_classification)
print(pos_classification)