In [1]:
import os
print("📁 Current working directory:", os.getcwd())

📁 Current working directory: /cs/student/projects1/rai/2024/luttini/cw2_comp0248/src/pipelineB


In [8]:
import torch

# Check version
print("PyTorch version:", torch.__version__)

# Check if CUDA is available
print("CUDA available:", torch.cuda.is_available())

# If available, check the CUDA device
if torch.cuda.is_available():
    print("CUDA device name:", torch.cuda.get_device_name(0))

import os
import numpy as np
import torch
from torch.utils.data import Dataset, ConcatDataset
import cv2
import pickle
from PIL import Image

PyTorch version: 2.6.0+cu126
CUDA available: True
CUDA device name: Quadro K1200


In [14]:
def load_intrinsics(intrinsics_path=None):
    if intrinsics_path and os.path.exists(intrinsics_path):
        with open(intrinsics_path, 'r') as f:
            lines = f.readlines()
        K = []
        for line in lines:
            nums = list(map(float, line.strip().split()))
            K.append(nums)
        K = np.array(K)
        #print(f"Loaded intrinsics from {intrinsics_path}:")
        #print(K)
    else:
        print("Using default intrinsics. Cannot load from file:", intrinsics_path)
        K = np.array([[570.3422, 0, 320],
                      [0, 570.3422, 240],
                      [0, 0, 1]])
        #print(K)
    return K

def depth_to_pointcloud(depth_img, intrinsics):
    fx, fy = intrinsics[0, 0], intrinsics[1, 1]
    cx, cy = intrinsics[0, 2], intrinsics[1, 2]
    height, width = depth_img.shape
    u, v = np.meshgrid(np.arange(width), np.arange(height))
    Z = depth_img.astype(np.float32)
    X = (u - cx) * Z / fx
    Y = (v - cy) * Z / fy
    pointcloud = np.stack((X, Y, Z), axis=-1).reshape(-1, 3)
    valid = (Z.reshape(-1) > 0)
    pointcloud = pointcloud[valid]
    return pointcloud

def downsample_pointcloud(pointcloud, num_points=1024):
    N = pointcloud.shape[0]
    if N >= num_points:
        indices = np.random.choice(N, num_points, replace=False)
    else:
        indices = np.random.choice(N, num_points, replace=True)
    return pointcloud[indices]

def random_augmentation(sample):
    # Augmentation function; can be set to None if not used.
    pointcloud = sample["pointcloud"].numpy()
    angle = np.random.uniform(0, 2*np.pi)
    R = np.array([[np.cos(angle), -np.sin(angle), 0],
                  [np.sin(angle),  np.cos(angle), 0],
                  [0,             0,              1]])
    pointcloud = pointcloud @ R.T
    scale = np.random.uniform(0.9, 1.1)
    pointcloud *= scale
    jitter = np.random.normal(0, 0.01, pointcloud.shape)
    pointcloud += jitter
    sample["pointcloud"] = torch.from_numpy(pointcloud).float()
    return sample

def has_table(polygon_list):
    return len(polygon_list) > 0


In [None]:
def get_real_scene_folder(predicted_scene_name, base_dataset_dir):
    """
    Maps a predicted scene name from the depth PNG filename (subfolder name)
    to the actual dataset folder that contains the labels and intrinsics.
    For example:
       If predicted_scene_name starts with "76-", then the real folder is:
         os.path.join(base_dataset_dir, "mit_76_studyroom", predicted_scene_name)
       If predicted_scene_name starts with "d507", then it is:
         os.path.join(base_dataset_dir, "mit_32_d507", predicted_scene_name)
    Modify this function based on your dataset organization.
    """
    if predicted_scene_name.startswith("76-studyroom"):
        return os.path.join(base_dataset_dir, "mit_76_studyroom", predicted_scene_name)
    elif predicted_scene_name.startswith("76-459"):
        return os.path.join(base_dataset_dir, "mit_76_459", predicted_scene_name)
    elif predicted_scene_name.startswith("d507"):
        return os.path.join(base_dataset_dir, "mit_32_d507", predicted_scene_name)
    elif predicted_scene_name.startswith("gym"):
        return os.path.join(base_dataset_dir, "mit_gym_z_squash", predicted_scene_name)
    elif predicted_scene_name.startswith("lab"):
        return os.path.join(base_dataset_dir, "mit_lab_hj", predicted_scene_name)
    elif predicted_scene_name.startswith("hv_tea2"):
        return os.path.join(base_dataset_dir, "harvard_tea_2", predicted_scene_name)
    elif predicted_scene_name.startswith("hv_c11"):
        return os.path.join(base_dataset_dir, "harvard_c11", predicted_scene_name)
    elif predicted_scene_name.startswith("hv_c6"):
        return os.path.join(base_dataset_dir, "harvard_c6", predicted_scene_name)
    elif predicted_scene_name.startswith("hv_c5"):
        return os.path.join(base_dataset_dir, "harvard_c5", predicted_scene_name)
    else:
        return os.path.join(base_dataset_dir, predicted_scene_name)


def get_real_scene_folder(predicted_scene_name, base_dataset_dir):
    """
    Finds the full path of the dataset folder (e.g. 'mit_76_studyroom/76-1studyroom2')
    given the subfolder name (e.g. '76-1studyroom2').
    """
    for root, dirs, _ in os.walk(base_dataset_dir):
        if predicted_scene_name in dirs:
            return os.path.join(root, predicted_scene_name)
    
    raise FileNotFoundError(
        f"❌ Could not find folder '{predicted_scene_name}' inside base path: {base_dataset_dir}"
    )




class PredictedDepthDataset(Dataset):
    def __init__(self, depth_dir, base_dataset_dir, num_points=1024, transform=None, verbose=False):
        """
        Args:
            depth_dir: Folder containing predicted depth PNG files. 
                       (e.g., "data/depth_maps_PNG/train")
            base_dataset_dir: The base dataset directory where real scene folders reside.
                              (e.g., "data/mit_76_studyroom" or "data/mit_32_d507")
            num_points: Number of points to downsample each point cloud.
            transform: Optional transform to apply to the sample.
            verbose: If True, print extra debug info.
        """
        self.depth_dir = depth_dir
        self.base_dataset_dir = base_dataset_dir
        self.num_points = num_points
        self.transform = transform
        self.verbose = verbose
        self.files = sorted([f for f in os.listdir(depth_dir) if f.lower().endswith('.png')])
        self.label_cache = {}  # Cache for annotation files keyed by predicted scene name
        if self.verbose:
            print(f"Loaded {len(self.files)} predicted depth PNG files from {depth_dir}")
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        # ---------------------------------------------
        # 1. Load the predicted depth image
        # ---------------------------------------------
        depth_filename = self.files[idx]  # e.g., "76-1studyroom2_0196_depth.png"
        depth_path = os.path.join(self.depth_dir, depth_filename)
        depth_img = np.array(Image.open(depth_path)).astype(np.float32) / 1000.0  # Convert from mm to meters

        # ---------------------------------------------
        # 2. Parse scene name and frame number from filename
        # ---------------------------------------------
        import re
        match = re.match(r"(.+?)_(\d+)_depth\.png", depth_filename)
        if not match:
            raise ValueError(f"Filename '{depth_filename}' does not match expected pattern '<scene>_<frame>_depth.png'")
        predicted_scene_name = match.group(1)  # e.g., "76-1studyroom2"
        
        # ---------------------------------------------
        # 3. Map to real dataset folder
        # ---------------------------------------------
        real_scene_folder = get_real_scene_folder(predicted_scene_name, self.base_dataset_dir)
        if self.verbose:
            print(f"Mapping '{predicted_scene_name}' to real folder: {real_scene_folder}")

        # ---------------------------------------------
        # 4. Determine real frame index based on sorted depth files in real dataset
        #    (Since frame numbers are from SUN and not contiguous)
        # ---------------------------------------------
        depth_folder = "depth" if "harvard_tea_2" in real_scene_folder else "depthTSDF"
        real_depth_dir = os.path.join(real_scene_folder, depth_folder)
        depth_filenames = sorted([f for f in os.listdir(real_depth_dir) if f.endswith('.png')])
        
        if idx < len(depth_filenames):
            frame_index = idx  # Use index position as frame index
        else:
            print(f"⚠️ Warning: Predicted frame index {idx} exceeds real dataset size ({len(depth_filenames)}).")
            frame_index = 0  # Fallback

        # ---------------------------------------------
        # 5. Load intrinsics from file (or use default)
        # ---------------------------------------------
        intrinsics_file = os.path.join(real_scene_folder, "intrinsics.txt")
        if os.path.exists(intrinsics_file):
            K = load_intrinsics(intrinsics_file)
        else:
            K = np.array([[570.3422, 0, 320],
                        [0, 570.3422, 240],
                        [0, 0, 1]])

        # ---------------------------------------------
        # 6. Convert depth map to point cloud and downsample
        # ---------------------------------------------
        pointcloud = depth_to_pointcloud(depth_img, K)
        pointcloud = downsample_pointcloud(pointcloud, self.num_points)

        # ---------------------------------------------
        # 7. Load annotation (labels) and retrieve label
        # ---------------------------------------------
        annotation_file = os.path.join(real_scene_folder, "labels", "tabletop_labels.dat")
        if predicted_scene_name not in self.label_cache:
            if os.path.exists(annotation_file):
                with open(annotation_file, 'rb') as f:
                    self.label_cache[predicted_scene_name] = pickle.load(f)
                if self.verbose:
                    print(f"Loaded annotations for {predicted_scene_name} with {len(self.label_cache[predicted_scene_name])} entries.")
            else:
                self.label_cache[predicted_scene_name] = None
                if self.verbose:
                    print(f"Warning: No annotation file found at {annotation_file}.")

        annotations = self.label_cache[predicted_scene_name]
        if annotations is not None and frame_index < len(annotations):
            polygons = annotations[frame_index]
            label = int(has_table(polygons))  # Convert polygon list to binary label
        else:
            label = 0
            if self.verbose:
                print(f"Warning: Could not match annotation for frame index {frame_index} in {real_scene_folder}. Setting label to 0.")

        # ---------------------------------------------
        # 8. Return sample
        # ---------------------------------------------
        sample = {
            "pointcloud": torch.from_numpy(pointcloud).float(),
            "label": torch.tensor(label, dtype=torch.long)
        }
        if self.transform:
            sample = self.transform(sample)
        return sample


In [None]:
'''
class TableClassificationDataset(Dataset):
    def __init__(self, root_dir, depth_folder="", annotation_path=None,
                 intrinsics_path=None, num_points=1024, transform=None, verbose=False):
        super().__init__()
        if depth_folder:
            self.depth_dir = os.path.join(root_dir, depth_folder)
        else:
            self.depth_dir = root_dir    # e.g., "data/depth_maps_PNG/train"

        self.annotation_path = annotation_path
        self.num_points = num_points
        self.transform = transform
        self.verbose = verbose

        # Load PNG depth maps
        self.depth_files = sorted([f for f in os.listdir(self.depth_dir) if f.lower().endswith('.png')])
        if self.verbose:
            print(f"Found {len(self.depth_files)} PNG files in {self.depth_dir}")

        # If no annotation file, we'll just set label=0 for all
        if annotation_path is None or not os.path.exists(os.path.join(root_dir, annotation_path)):
            self.annotations = [None] * len(self.depth_files)
        else:
            # If you still want to load .dat annotations from somewhere else, handle it here
            pass

        # If intrinsics_path is None, use default intrinsics
        self.intrinsics = load_intrinsics(intrinsics_path) if intrinsics_path else np.array([[570.3422, 0, 320],
                                                                                            [0, 570.3422, 240],
                                                                                            [0, 0, 1]])

    def __len__(self):
        return len(self.depth_files)

    def __getitem__(self, idx):
        depth_path = os.path.join(self.depth_dir, self.depth_files[idx])
        depth_img = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)
        if depth_img is None:
            raise FileNotFoundError(f"Depth image not found: {depth_path}")
        depth_img = depth_img.astype(np.float32)

        pointcloud = depth_to_pointcloud(depth_img, self.intrinsics)
        pointcloud = downsample_pointcloud(pointcloud, self.num_points)

        # If no annotation, default label=0 (or 1, or your logic)
        label = 0

        pointcloud = torch.from_numpy(pointcloud).float()
        label = torch.tensor(label, dtype=torch.long)
        sample = {"pointcloud": pointcloud, "label": label}
        if self.transform:
            sample = self.transform(sample)
        return sample
'''

In [27]:

if __name__ == "__main__":
    base_path = "../../data/"
    predicted_train_dir = os.path.join(base_path, "depth_maps_PNG", "train")
    predicted_test_dir  = os.path.join(base_path, "depth_maps_PNG", "test")

    # base_path = "data/"
    # Train
    print("Processing Training Depths:")
    train_dataset = PredictedDepthDataset(
        depth_dir=predicted_train_dir,
        base_dataset_dir=os.path.join(base_path),  # where 'mit_*' and 'harvard_*' folders live
        num_points=1024,
        transform=random_augmentation,
        verbose=False
    )
    pos_count = sum(1 for i in range(len(train_dataset)) if train_dataset[i]["label"].item() == 1)
    neg_count = len(train_dataset) - pos_count
    print(f"Train Dataset: {len(train_dataset)} samples — {pos_count} positives, {neg_count} negatives.")

    print("--------------------------------------------------------------------------------")

    
    # Test
    print("Processing Test Depths:")
    test_dataset = PredictedDepthDataset(
        depth_dir=predicted_test_dir,
        base_dataset_dir=os.path.join(base_path),  # same logic
        num_points=1024,
        transform=random_augmentation,
        verbose=False
    )
    pos_count = sum(1 for i in range(len(test_dataset)) if test_dataset[i]["label"].item() == 1)
    neg_count = len(test_dataset) - pos_count
    print(f"Test Dataset: {len(test_dataset)} samples — {pos_count} positives, {neg_count} negatives.")


Processing Training Depths:


FileNotFoundError: [Errno 2] No such file or directory: '../../data/76-1studyroom2/depthTSDF'

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, ConcatDataset
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix, classification_report
from torch.optim.lr_scheduler import ReduceLROnPlateau


# ------------------------
# Helper functions for DGCNN
# ------------------------

def knn(x, k):
    inner = -2 * torch.matmul(x.transpose(2, 1), x)  # (B, N, N)
    xx = torch.sum(x ** 2, dim=1, keepdim=True)  # (B, 1, N)
    pairwise_distance = -xx - inner - xx.transpose(2, 1)  # (B, N, N)
    idx = pairwise_distance.topk(k=k, dim=-1)[1]   # (B, N, k)
    return idx

def get_graph_feature(x, k=20, idx=None):
    batch_size, num_dims, num_points = x.size()
    if idx is None:
        idx = knn(x, k=k)
    device = x.device
    idx_base = torch.arange(0, batch_size, device=device).view(-1, 1, 1) * num_points
    idx = idx + idx_base
    idx = idx.view(-1)
    x = x.transpose(2, 1).contiguous()  # (B, N, C)
    feature = x.view(batch_size * num_points, -1)[idx, :]
    feature = feature.view(batch_size, num_points, k, num_dims)
    x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1)
    feature = torch.cat((feature - x, x), dim=3).permute(0, 3, 1, 2).contiguous()
    return feature

# ------------------------
# DGCNN Model Definition
# ------------------------

class DGCNNClassifier(nn.Module):
    def __init__(self, k=20, emb_dims=1024, num_classes=2, dropout=0.5):
        super(DGCNNClassifier, self).__init__()
        self.k = k
        self.bn1 = nn.BatchNorm2d(64)
        self.conv1 = nn.Sequential(
            nn.Conv2d(6, 64, kernel_size=1, bias=False),
            self.bn1,
            nn.LeakyReLU(negative_slope=0.2)
        )
        self.bn2 = nn.BatchNorm2d(64)
        self.conv2 = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size=1, bias=False),
            self.bn2,
            nn.LeakyReLU(negative_slope=0.2)
        )
        self.bn3 = nn.BatchNorm2d(128)
        self.conv3 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=1, bias=False),
            self.bn3,
            nn.LeakyReLU(negative_slope=0.2)
        )
        self.bn4 = nn.BatchNorm2d(256)
        self.conv4 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=1, bias=False),
            self.bn4,
            nn.LeakyReLU(negative_slope=0.2)
        )
        self.conv5 = nn.Sequential(
            nn.Conv1d(512, emb_dims, kernel_size=1, bias=False),
            nn.BatchNorm1d(emb_dims),
            nn.LeakyReLU(negative_slope=0.2)
        )
        self.linear1 = nn.Linear(emb_dims * 2, 512, bias=False)
        self.bn6 = nn.BatchNorm1d(512)
        self.dp1 = nn.Dropout(p=dropout)
        self.linear2 = nn.Linear(512, 256)
        self.bn7 = nn.BatchNorm1d(256)
        self.dp2 = nn.Dropout(p=dropout)
        self.linear3 = nn.Linear(256, num_classes)

    def forward(self, x):
        batch_size = x.size(0)
        x = x.transpose(2, 1)  # (B, 3, N)
        x = get_graph_feature(x, k=self.k)  # (B, 6, N, k)
        x = self.conv1(x)                   # (B, 64, N, k)
        x1 = x.max(dim=-1, keepdim=False)[0] # (B, 64, N)

        x = get_graph_feature(x1, k=self.k)  # (B, 128, N, k)
        x = self.conv2(x)                   # (B, 64, N, k)
        x2 = x.max(dim=-1, keepdim=False)[0] # (B, 64, N)

        x = get_graph_feature(x2, k=self.k)  # (B, 128, N, k)
        x = self.conv3(x)                   # (B, 128, N, k)
        x3 = x.max(dim=-1, keepdim=False)[0] # (B, 128, N)

        x = get_graph_feature(x3, k=self.k)  # (B, 256, N, k)
        x = self.conv4(x)                   # (B, 256, N, k)
        x4 = x.max(dim=-1, keepdim=False)[0] # (B, 256, N)

        x = torch.cat((x1, x2, x3, x4), dim=1)  # (B, 512, N)
        x = self.conv5(x)                     # (B, emb_dims, N)

        x1 = F.adaptive_max_pool1d(x, 1).view(batch_size, -1)
        x2 = F.adaptive_avg_pool1d(x, 1).view(batch_size, -1)
        x = torch.cat((x1, x2), 1)            # (B, emb_dims*2)

        x = F.leaky_relu(self.bn6(self.linear1(x)), negative_slope=0.2)
        x = self.dp1(x)
        x = F.leaky_relu(self.bn7(self.linear2(x)), negative_slope=0.2)
        x = self.dp2(x)
        x = self.linear3(x)
        return x



SyntaxError: invalid syntax (4163668725.py, line 13)

In [2]:
# ------------------------
# Training and Evaluation Functions
# ------------------------

def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for batch in dataloader:
        pointclouds = batch["pointcloud"].to(device)  # (B, num_points, 3)
        labels = batch["label"].to(device)             # (B,)

        if pointclouds.size(0) <= 1:
            continue

        optimizer.zero_grad()
        outputs = model(pointclouds)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * pointclouds.size(0)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels).item()
        total += labels.size(0)

    if total == 0:
        return 0.0, 0.0
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in dataloader:
            pointclouds = batch["pointcloud"].to(device)
            labels = batch["label"].to(device)

            if pointclouds.size(0) <= 1:
                continue

            outputs = model(pointclouds)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * pointclouds.size(0)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels).item()
            total += labels.size(0)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    if total == 0:
        return 0.0, 0.0, None, None
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc, all_labels, all_preds



In [None]:

# ------------------------
# Process Sequences Function
# ------------------------

def process_sequences(seq_list, base_path, set_name="Set"):
    ds_list = []
    for seq in seq_list:
        seq_path = os.path.join(base_path, seq)  
        # e.g. "data/depth_maps_PNG/train"

        # We skip the old 'if "harvard_tea_2" in seq' logic
        depth_folder = ""  # We'll pass an empty string so we read directly from seq_path
        annotation_path = None  # We may not have annotation files in the new folder
        intrinsics_path = None  # We can set a default intrinsics or skip

        dataset = TableClassificationDataset(
            root_dir=seq_path,
            depth_folder=depth_folder,        # effectively the same folder
            annotation_path=annotation_path,  # no table labels if not needed
            intrinsics_path=intrinsics_path,  # or set a default
            num_points=1024,
            transform=random_augmentation,
            verbose=False
        )

        print(f"{set_name} '{seq}' has {len(dataset)} PNG depth images.")
        ds_list.append(dataset)

    combined = ConcatDataset(ds_list)
    print(f"{set_name} Combined dataset has {len(combined)} images total.")
    return combined




'\n# ------------------------\n# Process Sequences Function\n# ------------------------\n\ndef process_sequences(seq_list, base_path, set_name="Set"):\n    ds_list = []\n    for seq in seq_list:\n        seq_path = os.path.join(base_path, seq)\n        if "harvard_tea_2" in seq:\n            depth_folder = "depth"\n        else:\n            depth_folder = "depthTSDF"\n        annotation_path = "labels/tabletop_labels.dat"\n        intrinsics_path = os.path.join(seq_path, "intrinsics.txt")\n        dataset = TableClassificationDataset(\n            root_dir=seq_path,\n            depth_folder=depth_folder,\n            annotation_path=annotation_path,\n            intrinsics_path=intrinsics_path,\n            num_points=1024,\n            transform=random_augmentation,\n            verbose=False\n        )\n        pos_count = sum(1 for i in range(len(dataset)) if dataset[i]["label"].item() == 1)\n        neg_count = len(dataset) - pos_count\n        print(f"{set_name} \'{seq}\' has {l

In [None]:
from torch.utils.data import Dataset
import glob

class PreprocessedPointCloudDataset(Dataset):
    def __init__(self, root_dir):
        self.files = sorted([f for f in os.listdir(root_dir) if f.endswith(".npy")])
        self.root_dir = root_dir
        print(f"✅ Loaded {len(self.files)} point clouds from {root_dir}")

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        sample = np.load(os.path.join(self.root_dir, self.files[idx]), allow_pickle=True).item()
        pointcloud = torch.tensor(sample["points"], dtype=torch.float32)  # [N, 3]
        label = torch.tensor(sample["label"], dtype=torch.long)           # 0 or 1
        return {
            "pointcloud": pointcloud,
            "label": label
        }


In [None]:
# ------------------------
# Main Training Script with 5-Fold Cross Validation (using StratifiedKFold) and ReduceLROnPlateau
# and final evaluation with confusion matrix on the Test Set
# ------------------------

def main():
    # Hyperparameters
    batch_size = 8
    num_epochs = 50
    learning_rate = 0.0001
    weight_decay = 1e-4  # L2 regularization
    k_val = 20
    emb_dims = 1024
    dropout = 0.5
    num_classes = 2
    n_splits = 5

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # base_path = "CW2-Dataset/data/"
    # sequences_train = [
    #     "mit_32_d507/d507_2",
    #     "mit_76_459/76-459b",
    #     "mit_76_studyroom/76-1studyroom2",
    #     "mit_gym_z_squash/gym_z_squash_scan1_oct_26_2012_erika",
    #     "mit_lab_hj/lab_hj_tea_nov_2_2012_scan1_erika"
    # ]
    # sequences_test = [
    #     "harvard_c5/hv_c5_1",
    #     "harvard_c6/hv_c6_1",
    #     "harvard_c11/hv_c11_2",
    #     "harvard_tea_2/hv_tea2_2"
    # ]

    # print("Processing Training Sequences:")
    # # train_full_dataset = process_sequences(sequences_train, base_path, set_name="Train")
    # train_full_dataset = PreprocessedPointCloudDataset("../../data/pointclouds/train")
    train_full_dataset = process_sequences(["train"], base_path="data/depth_maps_PNG", set_name="Train")
    print(f"Total samples in train_full_dataset: {len(train_full_dataset)}")

    # Try printing a few samples if non-empty
    if len(train_full_dataset) > 0:
        print("Sample item:", train_full_dataset[0])
    else:
        print("🚨 ERROR: Your train dataset is empty!")

    print("\nProcessing Test Sequences:")
    # test_dataset = process_sequences(sequences_test, base_path, set_name="Test")
    # test_dataset = PreprocessedPointCloudDataset("../../data/pointclouds/test")
    test_dataset  = process_sequences(["test"],  base_path="data/depth_maps_PNG", set_name="Test")

    # Create label array for StratifiedKFold
    labels = [train_full_dataset[i]["label"].item() for i in range(len(train_full_dataset))]

    from sklearn.model_selection import StratifiedKFold
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    fold_val_accs = []
    fold_val_losses = []
    best_fold_model_state = None
    best_fold_val_acc = 0.0
    all_fold_train_losses = []
    all_fold_val_losses = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(labels)), labels)):
        print(f"\nStarting Fold {fold+1}/{n_splits}")
        print(f"Training size: {len(train_idx)} | Validation size: {len(val_idx)}")
        train_subset = Subset(train_full_dataset, train_idx)
        val_subset = Subset(train_full_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=4)

        model = DGCNNClassifier(k=k_val, emb_dims=emb_dims, num_classes=num_classes, dropout=dropout)
        model = model.to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
        # Use ReduceLROnPlateau to adjust learning rate when validation loss plateaus
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)

        best_val_acc_fold = 0.0
        best_val_loss_fold = float('inf')
        train_losses_fold = []
        val_losses_fold = []

        for epoch in range(num_epochs):
            train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
            val_loss, val_acc, _, _ = evaluate(model, val_loader, criterion, device)
            train_losses_fold.append(train_loss)
            val_losses_fold.append(val_loss)
            print(f"Fold {fold+1} Epoch [{epoch+1}/{num_epochs}] "
                  f"Train Loss: {train_loss:.4f} Train Acc: {train_acc:.4f} | "
                  f"Val Loss: {val_loss:.4f} Val Acc: {val_acc:.4f}")
            scheduler.step(val_loss)
            if val_acc > best_val_acc_fold:
                best_val_acc_fold = val_acc
                best_val_loss_fold = val_loss
                best_model_state_fold = model.state_dict()

        print(f"Fold {fold+1} Best Val Acc: {best_val_acc_fold:.4f} with Loss: {best_val_loss_fold:.4f}")
        fold_val_accs.append(best_val_acc_fold)
        fold_val_losses.append(best_val_loss_fold)
        all_fold_train_losses.append(train_losses_fold)
        all_fold_val_losses.append(val_losses_fold)

        if best_val_acc_fold > best_fold_val_acc:
            best_fold_val_acc = best_val_acc_fold
            best_fold_model_state = best_model_state_fold

        # Plot loss curves for this fold
        epochs_arr = np.arange(1, num_epochs + 1)
        plt.figure()
        plt.plot(epochs_arr, train_losses_fold, label="Train Loss")
        plt.plot(epochs_arr, val_losses_fold, label="Val Loss")
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.title(f"Fold {fold+1} Loss Curves")
        plt.legend()
        plt.grid(True)
        plt.show()

    print("\nCross Validation Complete.")
    print(f"Average Validation Accuracy over {n_splits} folds: {np.mean(fold_val_accs):.4f}")
    print(f"Best Fold Validation Accuracy: {best_fold_val_acc:.4f}")

    plt.figure()
    plt.bar(np.arange(1, n_splits+1), fold_val_losses, tick_label=np.arange(1, n_splits+1))
    plt.xlabel("Fold")
    plt.ylabel("Best Validation Loss")
    plt.title("Best Validation Loss per Fold")
    plt.grid(True)
    plt.show()

    torch.save(best_fold_model_state, "best_dgcnn_model.pth")
    print("Best model saved to 'best_dgcnn_model.pth'")


    # Evaluate best model on the test set
    model = DGCNNClassifier(k=k_val, emb_dims=emb_dims, num_classes=num_classes, dropout=dropout)
    model.load_state_dict(torch.load("best_dgcnn_model.pth"))
    model = model.to(device)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    test_loss, test_acc, y_true, y_pred = evaluate(model, test_loader, criterion, device)
    print(f"\nTest Loss: {test_loss:.4f} Test Accuracy: {test_acc:.4f}")

    cm = confusion_matrix(y_true, y_pred)
    cr = classification_report(y_true, y_pred)
    print("Confusion Matrix:")
    print(cm)
    print("\nClassification Report:")
    print(cr)

if __name__ == "__main__":
    main()

Using device: cuda
Processing Training Sequences:
Total samples in train_full_dataset: 281
Sample item: {'pointcloud': tensor([[-1.3456, -1.0092,  2.3983],
        [-1.3083, -0.9843,  2.3392],
        [-1.3612, -1.0273,  2.4413],
        ...,
        [ 0.4877,  0.3690,  1.4716],
        [ 0.4903,  0.3690,  1.4716],
        [ 0.4928,  0.3690,  1.4716]]), 'label': tensor(1)}

Processing Test Sequences:

Starting Fold 1/5
Training size: 224 | Validation size: 57




OutOfMemoryError: CUDA out of memory. Tried to allocate 1152.00 GiB. GPU 0 has a total capacity of 3.94 GiB of which 3.59 GiB is free. Including non-PyTorch memory, this process has 337.75 MiB memory in use. Of the allocated memory 297.56 MiB is allocated by PyTorch, and 16.44 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)