In [1]:
import os
from os import listdir
import pandas as pd
import numpy as np
import glob
import cv2
import json
from os.path import expanduser
import splitfolders
import shutil
from define_path import Def_Path

from tqdm import tqdm

import torch 
import torchvision
from torchvision import models
from torchvision.models.detection.rpn import AnchorGenerator
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn 
import torch.nn.functional as func
import torchvision.transforms as T
from torchvision.transforms import functional as F
from torchsummary import summary
from torch.cuda.amp import GradScaler, autocast
from sklearn.model_selection import train_test_split
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.data import Data

import albumentations as A # Library for augmentations

import matplotlib.pyplot as plt 
from PIL import Image

import transforms, utils, engine, train
from utils import collate_fn
from engine import train_one_epoch, evaluate

import networkx as nx


t = torch.cuda.get_device_properties(0).total_memory
print(t)
torch.cuda.empty_cache()

r = torch.cuda.memory_reserved(0)
print(r)
a = torch.cuda.memory_allocated(0)
print(a)
# f = r-a  # free inside reserved

16908615680
0
0


In [2]:
# to generalize home directory. User can change their parent path without entering their home directory
path = Def_Path()

parent_path =  path.home + "/Pictures/" + "Data/"

root_dir = parent_path + path.year + "-" + path.month + "-" + path.day + "/"

print(root_dir)

/home/jc-merlab/Pictures/Data/2023-10-13/


In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# torch.cuda.set_per_process_memory_fraction(0.9, 0)
print(device)

cuda


In [4]:
# this fucntion tranforms an input image for diverseifying data for training
def train_transform():
    return A.Compose([
        A.Sequential([
            A.RandomRotate90(p=1), 
            A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.2, brightness_by_max=True, always_apply=False, p=1), 
        ], p=1),
        A.Resize(640, 480),  # Resize every image to 640x480 after all other transformations
    ],
    keypoint_params=A.KeypointParams(format='xy'),
    bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bboxes_labels'])
    )

In [5]:
# this function is to split the dataset into train, test and validation folder.
def train_test_split(src_dir):
    dst_dir_img = src_dir + "images"
    dst_dir_anno = src_dir + "annotations"
    
    if os.path.exists(dst_dir_img) and os.path.exists(dst_dir_anno):
        print("folders exist")
    else:
        os.mkdir(dst_dir_img)
        os.mkdir(dst_dir_anno)
        
    for jpgfile in glob.iglob(os.path.join(src_dir, "*.jpg")):
        shutil.copy(jpgfile, dst_dir_img)

    for jsonfile in glob.iglob(os.path.join(src_dir, "*.json")):
        shutil.copy(jsonfile, dst_dir_anno)
        
    output = parent_path + "split_folder_output" + "-" + path.year + "-" + path.month + "-" + path.day 
    
    print(output)
    
    splitfolders.ratio(src_dir, # The location of dataset
                   output=output, # The output location
                   seed=42, # The number of seed
                   ratio=(.7, .2, .1), # The ratio of split dataset
                   group_prefix=None, # If your dataset contains more than one file like ".jpg", ".pdf", etc
                   move=False # If you choose to move, turn this into True
                   )
    
    shutil.rmtree(dst_dir_img)
    shutil.rmtree(dst_dir_anno)
    
    return output

In [6]:
class ClassDataset(Dataset):
    def __init__(self, root, transform=None, demo=False):                
        self.root = root
        self.transform = transform
        self.demo = demo 
        self.imgs_files = sorted(os.listdir(os.path.join(root, "images")))
        self.annotations_files = sorted(os.listdir(os.path.join(root, "annotations")))
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.root, "images", self.imgs_files[idx])
        annotations_path = os.path.join(self.root, "annotations", self.annotations_files[idx])
        img_original = cv2.imread(img_path)
        img_original = cv2.cvtColor(img_original, cv2.COLOR_BGR2RGB)        
        
        with open(annotations_path) as f:
            data = json.load(f)
            bboxes_original = data['bboxes']
            keypoints_original = data['keypoints']
            
            # All objects are keypoints on the arm
            bboxes_labels_original = [] 
            bboxes_labels_original.append('base_kp')
            bboxes_labels_original.append('joint1')
            bboxes_labels_original.append('joint2')
            bboxes_labels_original.append('joint3')
            bboxes_labels_original.append('joint4')
            bboxes_labels_original.append('joint5')

        if self.transform:
            keypoints_original_flattened = [el[0:2] for kp in keypoints_original for el in kp]
            transformed = self.transform(image=img_original, bboxes=bboxes_original, bboxes_labels=bboxes_labels_original, keypoints=keypoints_original_flattened)
            img = transformed['image']
            bboxes = transformed['bboxes']
            keypoints_transformed_unflattened = np.reshape(np.array(transformed['keypoints']), (-1,1,2)).tolist()
            
            keypoints = []
            for o_idx, obj in enumerate(keypoints_transformed_unflattened):
                obj_keypoints = []
                for k_idx, kp in enumerate(obj):
                    obj_keypoints.append(kp + [keypoints_original[o_idx][k_idx][2]])
                keypoints.append(obj_keypoints)
        else:
            img, bboxes, keypoints = img_original, bboxes_original, keypoints_original  

            # Convert everything into a torch tensor        
        bboxes = torch.as_tensor(bboxes, dtype=torch.float32)       
        target = {}
        labels = [1, 2, 3, 4, 5, 6]   
#         labels = [1, 2, 3, 4]
        target["boxes"] = bboxes
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64) # all objects are joint positions
        target["image_id"] = torch.tensor([idx])
        target["area"] = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])
        target["iscrowd"] = torch.zeros(len(bboxes), dtype=torch.int64)
        target["keypoints"] = torch.as_tensor(keypoints, dtype=torch.float32)
        img = F.to_tensor(img)        
        bboxes_original = torch.as_tensor(bboxes_original, dtype=torch.float32)
        target_original = {}
        target_original["boxes"] = bboxes_original
        target_original["labels"] = torch.as_tensor(labels, dtype=torch.int64) # all objects are glue tubes
        target_original["image_id"] = torch.tensor([idx])
        target_original["area"] = (bboxes_original[:, 3] - bboxes_original[:, 1]) * (bboxes_original[:, 2] - bboxes_original[:, 0])
        target_original["iscrowd"] = torch.zeros(len(bboxes_original), dtype=torch.int64)
        target_original["keypoints"] = torch.as_tensor(keypoints_original, dtype=torch.float32)        
        img_original = F.to_tensor(img_original)


        if self.demo:
            return img, target, img_original, target_original
        else:
            return img, target
    
    def __len__(self):
        return len(self.imgs_files)                     
    

In [7]:
def to_geometric_data(G):
    x = []
    edge_index = []
    for _, node_data in G.nodes(data=True):
        x.append([node_data['x'], node_data['y']])
    for edge in G.edges():
        edge_index.append(edge)
    return Data(x=torch.tensor(x, dtype=torch.float), edge_index=torch.tensor(edge_index, dtype=torch.long).t().contiguous())

In [8]:
def create_gt_graph(keypoints, labels):
    G = nx.DiGraph()
    for i, kp in enumerate(keypoints):
        x, y = kp
        G.add_node(i, x=x, y=y, label=labels[i])
    edges = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 0)]
    G.add_edges_from(edges)
    
        # Debugging print:
    for node, data in G.nodes(data=True):
        print(node, data)
        
    return to_geometric_data(G)


In [15]:
def create_pred_graph(predicted_keypoints, predicted_labels=None):
    G = nx.DiGraph()
    placeholder = [-9999, -9999]  # Sentinel value for missing keypoints
    mask = []

    total_keypoints = 6  # Assuming you have 6 keypoints as per your edges

    for i in range(total_keypoints):
        if i < len(predicted_keypoints) and predicted_keypoints[i] is not None:
            # If the keypoint is present and within the bounds of predicted_keypoints
            x, y = predicted_keypoints[i][0:2]
            mask.append(1.)
            label = predicted_labels[i] if predicted_labels else -1
        else:
            # If keypoint is out of bounds of predicted_keypoints or missing
            x, y = placeholder
            mask.append(0.)
            label = -1
        
        G.add_node(i, x=x, y=y, label=label)

    # Add edges to the graph
    edges = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 0)]
    G.add_edges_from(edges)

    return to_geometric_data(G), torch.tensor(mask, dtype=torch.float)

In [16]:
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

class SimpleGNNLayer(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(SimpleGNNLayer, self).__init__(aggr='add')  # 'add' aggregation
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):
        # Add self loops to the adjacency matrix.
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Transform node feature matrix.
        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=self.lin(x))

    def message(self, x_j, edge_index, size):
        # Compute normalization.
        row, col = edge_index
        deg = degree(row, size[0], dtype=x_j.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        return norm.view(-1, 1) * x_j

class SimpleGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(SimpleGNN, self).__init__()
        self.layer1 = SimpleGNNLayer(in_channels, hidden_channels)
        self.layer2 = SimpleGNNLayer(hidden_channels, hidden_channels)
#         self.layer3 = SimpleGNNLayer(hidden_channels, hidden_channels)
        self.fc = nn.Linear(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = torch.relu(self.layer1(x, edge_index))
        x = self.layer2(x, edge_index)
#         x = torch.relu(self.layer3(x, edge_index))
        x = self.fc(x)
        return x

In [17]:
import torch.nn as nn
import torch_geometric.nn as geom_nn

class KeypointGNN(nn.Module):
    def __init__(self):
        super(KeypointGNN, self).__init__()
        
        # Define GNN layers
        self.conv1 = geom_nn.GCNConv(2, 128)
        self.conv2 = geom_nn.GCNConv(128, 64)
        self.regressor = nn.Linear(64, 2)  # Predicts x,y for each keypoint
    
    def forward(self, data):
        x, edge_index = data.x.to(device), data.edge_index.to(device)
        
        x = self.conv1(x, edge_index)
        x = torch.nn.functional.relu(x)
        x = self.conv2(x, edge_index)
        x = self.regressor(x)

        return x

In [21]:
class CombinedModel(nn.Module):
    def __init__(self):
        super(CombinedModel, self).__init__()
        self.keypoint_rcnn = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=True, num_keypoints=6, num_classes=7)
        self.kp_gnn = KeypointGNN()    

    def forward(self, images, targets=None, train=False):
        if train:
            output = self.keypoint_rcnn(images, targets)
            return output
    
        else:
            with torch.no_grad():
                self.keypoint_rcnn.eval()
                output = self.keypoint_rcnn(images)
                self.keypoint_rcnn.train()
                
                keypoints = output[0]['keypoints'].detach().cpu().numpy()
                kp_score = output[0]['keypoints_scores'].detach().cpu().numpy()
                labels = output[0]['labels'].detach().cpu().numpy()
                unique_labels = list(set(labels))
                scores = output[0]['scores'].detach().cpu().numpy()
                print("labels", unique_labels)
                kps = []
                kp_scores = []
                ulabels = []

                for label in unique_labels:
                    indices = [j for j, x in enumerate(labels) if x == label]
                    scores_for_label = [scores[j] for j in indices]
                    max_score_index = indices[scores_for_label.index(max(scores_for_label))]
                    kp_score_label = kp_score[max_score_index].tolist()
                    kps.append(keypoints[max_score_index][kp_score_label.index(max(kp_score_label))])
                    ulabels.append(label)

                kps = [torch.tensor(kp, dtype=torch.float32) for kp in kps]
                keypoints = torch.stack(kps)
                
                pred_data, mask = create_pred_graph(keypoints, unique_labels)
                pred_kps = self.kp_gnn(pred_data)
                        
            print("All keypoints", pred_kps)

            return pred_kps, mask, pred_data

In [22]:
def graph_classification_loss(pred_graph, gt_graph):
    # Assuming pred_graph and gt_graph are in a format where .edge_index provides the pair of nodes connected by an edge
    pred_edges = set(tuple(e.cpu().numpy()) for e in pred_graph.edge_index.t())
    gt_edges = set(tuple(e.cpu().numpy()) for e in gt_graph.edge_index.t())

    # Compute missing and spurious edges
    missing_edges = gt_edges - pred_edges
    spurious_edges = pred_edges - gt_edges

    loss = len(missing_edges) + len(spurious_edges)
    return loss

In [28]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gnn = KeypointGNN().to(device)
model = CombinedModel().to(device)
optimizer = torch.optim.Adam(list(gnn.parameters()) + list(model.parameters()), lr=0.0001)
criterion = torch.nn.MSELoss()

KEYPOINTS_FOLDER_TRAIN = train_test_split(root_dir) +"/train" #train_test_split(root_dir) +"/train"
KEYPOINTS_FOLDER_VAL = train_test_split(root_dir) +"/val"
KEYPOINTS_FOLDER_TEST = train_test_split(root_dir) +"/test"

num_epochs = 1
batch_size = 4

dataset_train = ClassDataset(KEYPOINTS_FOLDER_TRAIN, transform=train_transform(), demo=False)
dataset_val = ClassDataset(KEYPOINTS_FOLDER_VAL, transform=None, demo=False)
dataset_test = ClassDataset(KEYPOINTS_FOLDER_TEST, transform=None, demo=False)

data_loader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=collate_fn, pin_memory=True)
data_loader_val = DataLoader(dataset_val, batch_size=1, shuffle=False, collate_fn=collate_fn)
data_loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, collate_fn=collate_fn)


# Training loop
for epoch in range(num_epochs):
    for batch_idx, batch in enumerate(data_loader_train):
        images, targets = batch
        images = torch.stack(images).to(device)  
        # Move targets to GPU
        for target in targets:
            for key, val in target.items():
                target[key] = val.cuda()
        optimizer.zero_grad()
        indiviual_losses = []
        output_train = model(images, targets=targets, train=True)
        for i in range(len(images)):
            gt_keypoints = targets[i]['keypoints'].to(device).squeeze()[:,:2]
            gt_labels = targets[i]['labels'].to(device).squeeze()
            print(gt_keypoints, gt_labels)
            # Craeting GT graph
            gt_data = create_gt_graph(gt_keypoints.to(device), gt_labels.to(device))
            refined_gt_keypoints = gnn(gt_data)
            # GNN for gt in the forward pass
            gt_loss = criterion(refined_gt_keypoints, gt_keypoints)            
            refined_pred_keypoints, mask, pred_data = model(images[i].unsqueeze(0), train=False)
            print(refined_pred_keypoints)
            print(gt_keypoints)
            pred_loss = criterion(refined_pred_keypoints[0], gt_keypoints)            
            graph_loss = graph_classification_loss(pred_data, gt_data)
            loss = gt_loss + pred_loss + graph_loss
            individual_losses.sppend(lodd.item())
        
        # Aggregate the individual losses to get a scalar loss
        scalar_loss = sum(individual_losses) / len(individual_losses)      
        loss_keypoint = output_train['loss_keypoint']
        total_loss = 0.01*loss_keypoint + scalar_loss
        optimizer.zero_grad()
#         total_loss.backward()
        optimizer.step()
    print(f'Epoch:{epoch} and Loss:{total_loss.item()}')

/home/jc-merlab/Pictures/Data/split_folder_output-2023-10-13


Copying files: 2662 files [00:00, 18343.90 files/s]


/home/jc-merlab/Pictures/Data/split_folder_output-2023-10-13


Copying files: 2662 files [00:00, 18677.40 files/s]


/home/jc-merlab/Pictures/Data/split_folder_output-2023-10-13


Copying files: 2662 files [00:00, 19291.92 files/s]


tensor([[112.0801, 257.9522],
        [195.9869, 257.9597],
        [271.8108, 282.1697],
        [265.5089, 301.9530],
        [348.2030, 356.1391],
        [367.1588, 368.3154]], device='cuda:0') tensor([1, 2, 3, 4, 5, 6], device='cuda:0')
0 {'x': tensor(112.0801, device='cuda:0'), 'y': tensor(257.9522, device='cuda:0'), 'label': tensor(1, device='cuda:0')}
1 {'x': tensor(195.9869, device='cuda:0'), 'y': tensor(257.9597, device='cuda:0'), 'label': tensor(2, device='cuda:0')}
2 {'x': tensor(271.8108, device='cuda:0'), 'y': tensor(282.1697, device='cuda:0'), 'label': tensor(3, device='cuda:0')}
3 {'x': tensor(265.5089, device='cuda:0'), 'y': tensor(301.9530, device='cuda:0'), 'label': tensor(4, device='cuda:0')}
4 {'x': tensor(348.2030, device='cuda:0'), 'y': tensor(356.1391, device='cuda:0'), 'label': tensor(5, device='cuda:0')}
5 {'x': tensor(367.1588, device='cuda:0'), 'y': tensor(368.3154, device='cuda:0'), 'label': tensor(6, device='cuda:0')}
labels [1, 2, 4, 5]
All keypoints tens

ValueError: not enough values to unpack (expected 3, got 2)

In [None]:
# # Initialization
# keypoint_rcnn = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True)
# keypoint_rcnn = keypoint_rcnn.to(device)
# gnn = KeypointGNN().to(device)
# optimizer = torch.optim.Adam(list(keypoint_rcnn.parameters()) + list(gnn.parameters()), lr=0.001)
# criterion = nn.MSELoss()

# # Training loop
# for epoch in range(epochs):
#     for img, target, gt_graph, pred_graph in dataloader:
        
#         # 1. Keypoint R-CNN forward pass
#         predictions = keypoint_rcnn(img)
        
#         # Loss for Keypoint R-CNN
#         rcnn_loss = compute_rcnn_loss(predictions, target)  # Assuming you have this
        
#         # 2. GNN forward pass with GT keypoints
#         refined_gt_keypoints = gnn(gt_graph)
#         gt_loss = criterion(refined_gt_keypoints, torch.stack([k["keypoints"] for k in target]))
        
#         # 3. GNN forward pass with predicted keypoints
#         refined_pred_keypoints = gnn(pred_graph)
#         pred_loss = criterion(refined_pred_keypoints, torch.stack([k["keypoints"] for k in target]))
        
#         # Combine losses
#         total_loss = rcnn_loss + gt_loss + pred_loss
        
#         # Backpropagation
#         optimizer.zero_grad()
#         total_loss.backward()
#         optimizer.step()


In [None]:
gnn_model = HybridModel().to(device)
weights_path = '/home/jc-merlab/Pictures/Data/trained_models/keypointsrcnn_weights_sim_b1_e25_v0.pth'
cnn_model = torch.load(weights_path).to(device)
image = '/home/jc-merlab/Pictures/Data/2023-08-14-Occluded/002654.rgb.jpg'
image = Image.open(image).convert("RGB")

def predict_keypoints(cnn_model, gnn_model, image):
    gnn_model.eval()
    cnn_model.eval()
    image = F.to_tensor(image).to(device)
#     image = list(image)    
    with torch.no_grad():
        output = cnn_model([image])  
        scores = output[0]['scores'].detach().cpu().numpy()
        high_scores_idxs = np.where(scores > 0.7)[0].tolist() # Indexes of boxes with scores > 0.7
        post_nms_idxs = torchvision.ops.nms(output[0]['boxes'][high_scores_idxs], output[0]['scores'][high_scores_idxs], 0.3).cpu().numpy() 
        keypoints = []
        labels = []
        for kps in output[0]['keypoints'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy():
            keypoints.append(list(map(int, kps[0,0:2])))        
        for label in output[0]['labels'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy():
            labels.append(label)
        initial_keypoints = [x for _,x in sorted(zip(labels,keypoints))]
        print(initial_keypoints)
        data = construct_graph_for_prediction(initial_keypoints)
        data = data.to(device)
        predicted_keypoints = gnn_model(image.unsqueeze(0), data).cpu().numpy()
    print(predicted_keypoints)
    return predicted_keypoints, initial_keypoints

In [None]:
predicted_keypoints, gt_keypoints = predict_keypoints(cnn_model, gnn_model, image)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def visualize_keypoints(image_path, keypoints, gt_keypoints):
    """
    Visualize the keypoints on an image.
    
    Args:
    - image_path (str): Path to the image.
    - keypoints (np.array): Array of keypoints, assumed to be in (x, y) format.
    """
    
    # Load the image
#     img = Image.open(image_path).convert("RGB")
    
    # Create a figure and axis
    fig, ax = plt.subplots(1)
    
    # Display the image
    ax.imshow(image_path)
    print(type(keypoints))
    # Extract the x and y coordinates
    x_coords = keypoints[:, 0]
    y_coords = keypoints[:, 1]
    
    print(type(gt_keypoints))
    gt_keypoints = np.array(gt_keypoints)
    
    x_gt = gt_keypoints[:, 0]
    y_gt = gt_keypoints[:, 1]
    
    # Plot the keypoints
    ax.scatter(x_coords, y_coords, c='r', s=40, label="Keypoints")
    ax.scatter(x_gt, y_gt, c='b', s=40, label="gt_keypoints")
    
    # Show the image with keypoints
    plt.legend()
    plt.show()


In [None]:
visualize_keypoints(image, predicted_keypoints, gt_keypoints)