## Dataset

## Datasets (train/val)

## Gat Layer

# TO DO: batch normalization and weight normalization.

In [None]:
#This is  original Gat Layer from PYG

from typing import Optional, Tuple, Union

import torch
import torch.nn.functional as F
from torch import Tensor
from torch.nn import Parameter
from torch_sparse import SparseTensor, set_diag

from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.dense.linear import Linear
from torch_geometric.nn.inits import glorot, zeros, kaiming_uniform
from torch_geometric.typing import Adj, OptTensor, PairTensor
from torch_geometric.utils import add_self_loops, remove_self_loops, softmax


class GATv2Conv(MessagePassing):
    r"""The GATv2 operator from the `"How Attentive are Graph Attention
    Networks?" <https://arxiv.org/abs/2105.14491>`_ paper, which fixes the
    static attention problem of the standard
    :class:`~torch_geometric.conv.GATConv` layer.
    Since the linear layers in the standard GAT are applied right after each
    other, the ranking of attended nodes is unconditioned on the query node.
    In contrast, in :class:`GATv2`, every node can attend to any other node.

    .. math::
        \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} +
        \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j},

    where the attention coefficients :math:`\alpha_{i,j}` are computed as

    .. math::
        \alpha_{i,j} =
        \frac{
        \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta}
        [\mathbf{x}_i \, \Vert \, \mathbf{x}_j]
        \right)\right)}
        {\sum_{k \in \mathcal{N}(i) \cup \{ i \}}
        \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta}
        [\mathbf{x}_i \, \Vert \, \mathbf{x}_k]
        \right)\right)}.

    If the graph has multi-dimensional edge features :math:`\mathbf{e}_{i,j}`,
    the attention coefficients :math:`\alpha_{i,j}` are computed as

    .. math::
        \alpha_{i,j} =
        \frac{
        \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta}
        [\mathbf{x}_i \, \Vert \, \mathbf{x}_j \, \Vert \, \mathbf{e}_{i,j}]
        \right)\right)}
        {\sum_{k \in \mathcal{N}(i) \cup \{ i \}}
        \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta}
        [\mathbf{x}_i \, \Vert \, \mathbf{x}_k \, \Vert \, \mathbf{e}_{i,k}]
        \right)\right)}.

    Args:
        in_channels (int or tuple): Size of each input sample, or :obj:`-1` to
            derive the size from the first input(s) to the forward method.
            A tuple corresponds to the sizes of source and target
            dimensionalities.
        out_channels (int): Size of each output sample.
        heads (int, optional): Number of multi-head-attentions.
            (default: :obj:`1`)
        concat (bool, optional): If set to :obj:`False`, the multi-head
            attentions are averaged instead of concatenated.
            (default: :obj:`True`)
        negative_slope (float, optional): LeakyReLU angle of the negative
            slope. (default: :obj:`0.2`)
        dropout (float, optional): Dropout probability of the normalized
            attention coefficients which exposes each node to a stochastically
            sampled neighborhood during training. (default: :obj:`0`)
        add_self_loops (bool, optional): If set to :obj:`False`, will not add
            self-loops to the input graph. (default: :obj:`True`)
        edge_dim (int, optional): Edge feature dimensionality (in case
            there are any). (default: :obj:`None`)
        fill_value (float or Tensor or str, optional): The way to generate
            edge features of self-loops (in case :obj:`edge_dim != None`).
            If given as :obj:`float` or :class:`torch.Tensor`, edge features of
            self-loops will be directly given by :obj:`fill_value`.
            If given as :obj:`str`, edge features of self-loops are computed by
            aggregating all features of edges that point to the specific node,
            according to a reduce operation. (:obj:`"add"`, :obj:`"mean"`,
            :obj:`"min"`, :obj:`"max"`, :obj:`"mul"`). (default: :obj:`"mean"`)
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
        share_weights (bool, optional): If set to :obj:`True`, the same matrix
            will be applied to the source and the target node of every edge.
            (default: :obj:`False`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.MessagePassing`.

    Shapes:
        - **input:**
          node features :math:`(|\mathcal{V}|, F_{in})` or
          :math:`((|\mathcal{V_s}|, F_{s}), (|\mathcal{V_t}|, F_{t}))`
          if bipartite,
          edge indices :math:`(2, |\mathcal{E}|)`,
          edge features :math:`(|\mathcal{E}|, D)` *(optional)*
        - **output:** node features :math:`(|\mathcal{V}|, H * F_{out})` or
          :math:`((|\mathcal{V}_t|, H * F_{out})` if bipartite.
          If :obj:`return_attention_weights=True`, then
          :math:`((|\mathcal{V}|, H * F_{out}),
          ((2, |\mathcal{E}|), (|\mathcal{E}|, H)))`
          or :math:`((|\mathcal{V_t}|, H * F_{out}), ((2, |\mathcal{E}|),
          (|\mathcal{E}|, H)))` if bipartite
    """
    _alpha: OptTensor

    def __init__(
        self,
        in_channels: Union[int, Tuple[int, int]],
        out_channels: int,
        heads: int = 1,
        concat: bool = True,
        negative_slope: float = 0.2,
        dropout: float = 0.0,
        add_self_loops: bool = True,
        edge_dim: Optional[int] = None,
        fill_value: Union[float, Tensor, str] = 'mean',
        bias: bool = True,
        share_weights: bool = False,
        **kwargs,
    ):
        super().__init__(node_dim=0, **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.heads = heads
        self.concat = concat
        self.negative_slope = negative_slope
        self.dropout = dropout
        self.add_self_loops = add_self_loops
        self.edge_dim = edge_dim
        self.fill_value = fill_value
        self.share_weights = share_weights

        if isinstance(in_channels, int):
            self.lin_l = Linear(in_channels, heads * out_channels, bias=bias,
                                weight_initializer='kaiming_uniform')
            if share_weights:
                self.lin_r = self.lin_l
            else:
                self.lin_r = Linear(in_channels, heads * out_channels,
                                    bias=bias, weight_initializer='kaiming_uniform')
        else:
            self.lin_l = Linear(in_channels[0], heads * out_channels,
                                bias=bias, weight_initializer='kaiming_uniform')
            if share_weights:
                self.lin_r = self.lin_l
            else:
                self.lin_r = Linear(in_channels[1], heads * out_channels,
                                    bias=bias, weight_initializer='kaiming_uniform')

        self.att = Parameter(torch.Tensor(1, heads, out_channels))

        if edge_dim is not None:
            self.lin_edge = Linear(edge_dim, heads * out_channels, bias=False,
                                   weight_initializer='kaiming_uniform')
        else:
            self.lin_edge = None

        if bias and concat:
            self.bias = Parameter(torch.Tensor(heads * out_channels))
        elif bias and not concat:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)

        self._alpha = None

        self.reset_parameters()

    def reset_parameters(self):
        self.lin_l.reset_parameters()
        self.lin_r.reset_parameters()
        if self.lin_edge is not None:
            self.lin_edge.reset_parameters()
        #glorot(self.att)    
        kaiming_uniform(self.att, self.in_channels,0.1)
        zeros(self.bias)
        


    def forward(self, x: Union[Tensor, PairTensor], edge_index: Adj,
                edge_attr: OptTensor = None,
                return_attention_weights: bool = None):
        # type: (Union[Tensor, PairTensor], Tensor, OptTensor, NoneType) -> Tensor  # noqa
        # type: (Union[Tensor, PairTensor], SparseTensor, OptTensor, NoneType) -> Tensor  # noqa
        # type: (Union[Tensor, PairTensor], Tensor, OptTensor, bool) -> Tuple[Tensor, Tuple[Tensor, Tensor]]  # noqa
        # type: (Union[Tensor, PairTensor], SparseTensor, OptTensor, bool) -> Tuple[Tensor, SparseTensor]  # noqa
        r"""
        Args:
            return_attention_weights (bool, optional): If set to :obj:`True`,
                will additionally return the tuple
                :obj:`(edge_index, attention_weights)`, holding the computed
                attention weights for each edge. (default: :obj:`None`)
        """
        H, C = self.heads, self.out_channels

        x_l: OptTensor = None
        x_r: OptTensor = None
        if isinstance(x, Tensor):
            assert x.dim() == 2
            x_l = self.lin_l(x).view(-1, H, C)
            if self.share_weights:
                x_r = x_l
            else:
                x_r = self.lin_r(x).view(-1, H, C)
        else:
            x_l, x_r = x[0], x[1]
            assert x[0].dim() == 2
            x_l = self.lin_l(x_l).view(-1, H, C)
            if x_r is not None:
                x_r = self.lin_r(x_r).view(-1, H, C)

        assert x_l is not None
        assert x_r is not None

        if self.add_self_loops:
            if isinstance(edge_index, Tensor):
                num_nodes = x_l.size(0)
                if x_r is not None:
                    num_nodes = min(num_nodes, x_r.size(0))
                edge_index, edge_attr = remove_self_loops(
                    edge_index, edge_attr)
                edge_index, edge_attr = add_self_loops(
                    edge_index, edge_attr, fill_value=self.fill_value,
                    num_nodes=num_nodes)
            elif isinstance(edge_index, SparseTensor):
                if self.edge_dim is None:
                    edge_index = set_diag(edge_index)
                else:
                    raise NotImplementedError(
                        "The usage of 'edge_attr' and 'add_self_loops' "
                        "simultaneously is currently not yet supported for "
                        "'edge_index' in a 'SparseTensor' form")

        # propagate_type: (x: PairTensor, edge_attr: OptTensor)
        out = self.propagate(edge_index, x=(x_l, x_r), edge_attr=edge_attr,
                             size=None)

        alpha = self._alpha
        self._alpha = None

        if self.concat:
            out = out.view(-1, self.heads * self.out_channels)
        else:
            out = out.mean(dim=1)

        if self.bias is not None:
            out = out + self.bias

        if isinstance(return_attention_weights, bool):
            assert alpha is not None
            if isinstance(edge_index, Tensor):
                return out, (edge_index, alpha)
            elif isinstance(edge_index, SparseTensor):
                return out, edge_index.set_value(alpha, layout='coo')
        else:
            return out


    def message(self, x_j: Tensor, x_i: Tensor, edge_attr: OptTensor,
                index: Tensor, ptr: OptTensor,
                size_i: Optional[int]) -> Tensor:
        x = x_i + x_j
        #Niyazi
        #print('called')
        if edge_attr is not None:
            if edge_attr.dim() == 1:
                edge_attr = edge_attr.view(-1, 1)
            assert self.lin_edge is not None
            edge_attr = self.lin_edge(edge_attr)
            edge_attr = edge_attr.view(-1, self.heads, self.out_channels)
            x = x + edge_attr

        x = F.leaky_relu(x, self.negative_slope)
        alpha = (x * self.att).sum(dim=-1)
        alpha = softmax(alpha, index, ptr, size_i)
        self._alpha = alpha
        alpha = F.dropout(alpha, p=self.dropout, training=self.training)
        mes = x_j * alpha.unsqueeze(-1)
        #Niyazi
        #print(mes.shape)
        return mes

    def __repr__(self) -> str:
        return (f'{self.__class__.__name__}({self.in_channels}, '
                f'{self.out_channels}, heads={self.heads})')

## Gat Model

In [None]:
import pdb
class GAT(torch.nn.Module):
    """Graph Attention Network"""
    def __init__(self, dim_in, dim_h, dim_out, heads=2):
        super().__init__()        
        self.gat1 = GATv2Conv(dim_in, dim_h, edge_dim=6, heads=7,add_self_loops=False)
        self.gat2 = GATv2Conv(dim_h*heads, dim_h, edge_dim=6, heads=7,add_self_loops=False)
        self.gat3 = GATv2Conv(dim_h*heads, dim_h, edge_dim=6, heads=7,add_self_loops=False)
        
        
        self.node_lin_1 = Linear(dim_h*7, 252) 
        
        self.node_lin_2 = Linear(252, 128)
         
        self.node_lin_3 = Linear(128, 64)
         
        self.node_lin_4 = Linear(64, dim_out)
        
        self.ats_1 = Linear(729, 256)
        self.ats_2 = Linear(256,216)
        self.ats_3 = Linear(216,128)
        self.ats_4 = Linear(128,64)
        self.ats_5 = Linear(64,32)
        self.ats_6 = Linear(32,16)
        self.ats_7 = Linear(16, 2)
        
        
        self.optimizer = torch.optim.Adam(self.parameters(),
                                          lr=0.0005,betas=(0.95, 0.999),
                                          weight_decay=1e-4)

    def forward(self, x, edge_index,edge_attr):
        
        #h = F.dropout(x, p=0.6, training=self.training)
        
#         PARAMETERS
#             return_attention_weights (bool, optional) – If set to True,
#             will additionally return the tuple (edge_index, attention_weights),
#             holding the computed attention weights for each edge. (default: None)
        
        #print(f'edge_index:{edge_index.shape},edge_attr : {edge_attr.shape}') 
        h = self.gat1(x, edge_index,edge_attr, return_attention_weights=True)
        ascore1 =  h[1][1].mean(dim=1).unsqueeze(dim=1)
        # h = F.relu(h)
        #print(f'h out:{h[0].shape},h edge index shape: {h[1][0].shape}, h attention_score_shape: {h[1][1].shape}')
        h=h[0]        
        h = F.elu(h)
        h = F.dropout(h, p=0.5, training=self.training)
        
        
        h = self.gat2(h, edge_index,edge_attr,  return_attention_weights=True)
        ascore2 =  h[1][1].mean(dim=1).unsqueeze(dim=1)
        # print(f'out:{h[0].shape},edge index shape: {h[1][0].shape}, attention_score_shape: {h[1][1].shape}')
        h=h[0]
        h = F.elu(h)
        # h = F.relu(h)
        h = F.dropout(h, p=0.5, training=self.training)
        
        h = self.gat3(h, edge_index,edge_attr, return_attention_weights=True)
        ascore3 =  h[1][1].mean(dim=1).unsqueeze(dim=1)
        # print(f'out:{h[0].shape},edge index shape: {h[1][0].shape}, attention_score_shape: {h[1][1].shape}')

 


        
        edge_ind = h[1][0]
        # arranging edge indexes [2,num_edge] to [num_edge,2]
        edge_ind = rearrange(edge_ind, 'h w ->  w h')
        
        #this is the output from Gat Layers
        h = h[0]
        
        
        #------------------------------------------------------------------------------
        # this is extra at this time normally I did not use activation for the last layer.
        h_activated = F.elu(h)        
        node_embeddings = h_activated
        #------------------------------------------------------------------------------
        #pdb.set_trace()
        
        
        
        lin_out = torch.relu(self.node_lin_1(node_embeddings))
        lin_out = torch.relu(self.node_lin_2(lin_out))
        lin_out = torch.relu(self.node_lin_3(lin_out))
        lin_out = self.node_lin_4(lin_out)

        

        att_score_aggregaded = torch.mean(torch.stack((ascore1 , ascore2 , ascore3),dim=0),dim=0)
        #print(f'att_score_aggregaded:{att_score_aggregaded.shape}')
        # this is  the concatanation process of (node1,node2,attention score between them)
        #first get the pairs of node embeddings
        pairs = node_embeddings[edge_ind]
        #second put them into a same tensor instead of pairs of two tensors.
        pairs = rearrange(pairs, 'b h w -> b (h w)')
        # third --> (node1,node2,attention)
        edge_emb = torch.cat((pairs,att_score_aggregaded),dim=1)                
        #print(f'att score: {att_scores.shape}',f' node embedding: {h[0].shape}',f'edge index: {h[1][0].shape}' )

        att_out = torch.relu(self.ats_1(edge_emb))
        att_out = torch.relu(self.ats_2(att_out))
        att_out = torch.relu(self.ats_3(att_out))
        att_out = torch.relu(self.ats_4(att_out))
        att_out = torch.relu(self.ats_5(att_out))
        att_out = torch.relu(self.ats_6(att_out))
        att_out = self.ats_7(att_out)
        #print(f'att_out: {att_out[:10]}')  
        #h[0] is node embeddings.
        
        att_out= torch.sigmoid(att_out)
        #att_out= F.softmax(att_out)
        
        
        return F.log_softmax(lin_out, dim=1),att_out

## Accuracy and training

In [None]:
def accuracy(pred_y, y):
    """Calculate accuracy."""
    return ((pred_y == y).sum() / len(y)).item()

def accuracy2(pred_yy, yy):
    """Calculate accuracy2."""
    #this makes the prediction results(same or not) complement to 2
    sm = torch.nn.Softmax(dim=1)
    soft_pred = sm(pred_yy)
    #this makes wrong predictions zero:
    # tensor([[0.0000, 0.4014],
    #     [0.6602, 0.0000],
    #     [0.5958, 0.0000],
    #     ...,
    #     [0.6334, 0.0000],
    #     [0.6778, 0.0000],
    #     [0.6734, 0.0000]], device='cuda:0')
    out= soft_pred.where(yy == 1 ,yy)
    #print(f'out size: {out.size()}')
    out = out[:,0] + out[:,1]
    #print(f'out size: {out.size()}')
    outTrue = (out > .7)
    #print(f'out True: {outTrue.size()}')
    #print(f'len yy: {len(yy)}')
    #Count Trues
    #print(f'percent:{(torch.count_nonzero(opTrue) / len(yy)).item()}')
    #print(f'nonzero:{torch.count_nonzero(outTrue)}')
    return (torch.count_nonzero(outTrue) / len(yy)).item()

def train(model,train_dl,valid_dl):
    """Train a GNN model and return the trained model."""
    criterion = torch.nn.CrossEntropyLoss()
    
    optimizer = model.optimizer
    epochs = 150
    model.train()
    for epoch in range(epochs+1):
        # Training
        #print('epoch')
        cumul_loss,tot_loss, tot_acc,tot_acc2,tot_loss1,tot_loss2 = 0.,0.,0.,0.,0.,0.
        #print(len(train_dl))
        
        #thi is number of nodes in every batch
        #datax=[]
        counter = 0.
        for data in train_dl:
            nt = len(train_dl)
            #print(nt)             
                        
            pairs = rearrange(data.edge_index, 'h w -> w h')
            edge_vertex_class = data.y[pairs]
            same_class_or_not = edge_vertex_class[:,0] == edge_vertex_class[:,1]
            
            weight_yy = torch.ones( data.yy.size(),device=device)
            
            weight_yy[:,0][same_class_or_not] = 10            
            criterion2 = torch.nn.BCELoss(weight=weight_yy)
            
            data = data.to(device)
            out1,out2 = model(data.x, data.edge_index,data.edge_attr)
            
            
        
            
            loss1 = criterion(out1, data.y)
            # print(f'same_class_or_not:{same_class_or_not.size()}')
            # print(f'weight_yy:{weight_yy.size()}')
            # print(f'out2:{out2.size()}')
            loss2 = criterion2(out2, data.yy)
            
            # print(f'loss2{loss2:.2f}'
            #       f'target shape{data.yy[:10]}'
            #      )
            loss =loss1 + loss2
            tot_loss1 += loss1
            tot_loss2 += loss2
            tot_loss += loss
            cumul_loss += loss
            acc = accuracy(out1.argmax(dim=1), data.y)
            acc2 = accuracy2(out2, data.yy)
            tot_acc += acc
            tot_acc2 += acc2
            # if counter % 5 == 0:
            #     loss.backward()
            #     optimizer.step()
            #     cumul_loss = 0.
            #     optimizer.zero_grad()
                        
            loss.backward()
            optimizer.step()
            cumul_loss = 0.
            optimizer.zero_grad()
        
        #print(f'number of nodes {max(datax)}')
        
        
        tot_loss = tot_loss/nt
        tot_acc = tot_acc/nt
        tot_acc2 = tot_acc2/nt
        tot_loss1 = tot_loss1/nt
        tot_loss2 = tot_loss2/nt
        
        with torch.no_grad():
            model.eval()
            tot_v_loss, tot_v_acc,tot_v_acc2,tot_v_loss1,tot_v_loss2 = 0.,0.,0.,0.,0.
            #print(len(valid_dl))
            for v_data in valid_dl:
                # Validation
                
                v_data=v_data.to(device)
                pairs = rearrange(v_data.edge_index, 'h w -> w h')
                edge_vertex_class = v_data.y[pairs]
                same_class_or_not = edge_vertex_class[:,0] == edge_vertex_class[:,1]
            
                weight_yy = torch.ones(v_data.yy.size(),device=device)
            
                weight_yy[:,0][same_class_or_not] = 10            
                criterion2 = torch.nn.BCELoss(weight=weight_yy)
                v_out1,v_out2 = model(v_data.x, v_data.edge_index,v_data.edge_attr)
                
                #v_data.yy= torch.abs(torch.cat((data.yy,data.yy-1),dim=1).to(torch.float))
                #print(v_data.yy.shape)
                tot_v_loss1 += criterion(v_out1, v_data.y)
                tot_v_loss2 += criterion2(v_out2, v_data.yy)
                
                tot_v_acc += accuracy(v_out1.argmax(dim=1), v_data.y)
                tot_v_acc2 += accuracy2(v_out2, v_data.yy)
                

            nv = len(valid_dl)
            tot_v_loss1 = tot_v_loss1/nv
            tot_v_loss2 = tot_v_loss2/nv
            tot_v_acc = tot_v_acc/nv
            tot_v_acc2 = tot_v_acc2/nv
        # Print metrics every 10 epochs
        if(epoch % 10 == 0):
            print(f'Epoch {epoch:>3} |\n  '
                  f'Train Loss:  {tot_loss:.2f} | Val Loss:  {tot_v_loss:.2f}\n'                    
                  f'Train Loss1: {tot_loss1:.2f} | Train Loss2: {tot_loss2:.2f}\n '
                  f'Val Loss1: {tot_v_loss1:.2f} | Val Loss2: {tot_v_loss2:.2f}\n '
                  f'Train Acc 1: {tot_acc*100:.2f}% | Train Acc 2: {tot_acc2*100:.2f}%\n '                  
                  f'Val Acc1: {tot_v_acc*100:.2f}% | Val Acc2: {tot_v_acc2*100:.2f}% '
                                    
                  )
          
    return model

## Train

In [None]:
train_ds = floorPlanCad(path/"dataset_train_full/")
#train_ds = floorPlanCad(path/"_mini_toy_dataset/")

In [None]:
val_ds = floorPlanCad(path/"dataset_validation_full")
#val_ds = floorPlanCad(path/"_mini_toy_dataset_validation/")

In [None]:
train_dl = DataLoader(train_ds, batch_size=15, shuffle=True)

In [None]:
val_dl = DataLoader(val_ds, batch_size=15, shuffle=True)

In [None]:
gat = GAT(train_ds.num_features, 52, 36,heads=7)

In [None]:
gat = gat.to('cuda:0')

only Gat layers initialized with kaiming lineer should be initialized too.

models/three_layers_52_hidden_150_ep_full_dataset_84_4__87_6_kaiming.pt

three_layers_52_hidden_150_ep_full_dataset_83_6__83_8_glorot
```Python
GAT(
  (gat1): GATv2Conv(4, 52, heads=7)
  (gat2): GATv2Conv(364, 52, heads=7)
  (gat3): GATv2Conv(364, 52, heads=7)
  (node_lin_1): Linear(364, 252, bias=True)
  (node_lin_2): Linear(252, 128, bias=True)
  (node_lin_3): Linear(128, 64, bias=True)
  (node_lin_4): Linear(64, 36, bias=True)
  (ats_1): Linear(729, 256, bias=True)
  (ats_2): Linear(256, 216, bias=True)
  (ats_3): Linear(216, 128, bias=True)
  (ats_4): Linear(128, 64, bias=True)
  (ats_5): Linear(64, 32, bias=True)
  (ats_6): Linear(32, 16, bias=True)
  (ats_7): Linear(16, 2, bias=True)
)
```

# kaiming_uniform batchsize:15

In [None]:
train(gat,train_dl,val_dl)

Epoch   0 |
  Train Loss:  3.46 | Val Loss:  0.00
Train Loss1: 2.50 | Train Loss2: 0.96
 Val Loss1: 2.38 | Val Loss2: 0.92
 Train Acc 1: 34.22% | Train Acc 2: 32.94%
 Val Acc1: 32.00% | Val Acc2: 11.66% 
Epoch  10 |
  Train Loss:  1.96 | Val Loss:  0.00
Train Loss1: 1.30 | Train Loss2: 0.66
 Val Loss1: 1.38 | Val Loss2: 0.71
 Train Acc 1: 62.81% | Train Acc 2: 66.29%
 Val Acc1: 61.09% | Val Acc2: 63.66% 
Epoch  20 |
  Train Loss:  1.43 | Val Loss:  0.00
Train Loss1: 0.89 | Train Loss2: 0.55
 Val Loss1: 0.91 | Val Loss2: 0.58
 Train Acc 1: 75.07% | Train Acc 2: 71.30%
 Val Acc1: 75.27% | Val Acc2: 73.15% 
Epoch  30 |
  Train Loss:  1.16 | Val Loss:  0.00
Train Loss1: 0.71 | Train Loss2: 0.45
 Val Loss1: 0.83 | Val Loss2: 0.53
 Train Acc 1: 80.04% | Train Acc 2: 76.60%
 Val Acc1: 77.65% | Val Acc2: 72.75% 
Epoch  40 |
  Train Loss:  1.03 | Val Loss:  0.00
Train Loss1: 0.64 | Train Loss2: 0.39
 Val Loss1: 0.74 | Val Loss2: 0.48
 Train Acc 1: 82.21% | Train Acc 2: 79.00%
 Val Acc1: 79.92% 

GAT(
  (gat1): GATv2Conv(4, 52, heads=7)
  (gat2): GATv2Conv(364, 52, heads=7)
  (gat3): GATv2Conv(364, 52, heads=7)
  (node_lin_1): Linear(364, 252, bias=True)
  (node_lin_2): Linear(252, 128, bias=True)
  (node_lin_3): Linear(128, 64, bias=True)
  (node_lin_4): Linear(64, 36, bias=True)
  (ats_1): Linear(729, 256, bias=True)
  (ats_2): Linear(256, 216, bias=True)
  (ats_3): Linear(216, 128, bias=True)
  (ats_4): Linear(128, 64, bias=True)
  (ats_5): Linear(64, 32, bias=True)
  (ats_6): Linear(32, 16, bias=True)
  (ats_7): Linear(16, 2, bias=True)
)

In [None]:
torch.save(gat.state_dict(), path/"models/three_layers_52_hidden_150_ep_full_dataset_84_4__87_6_kaiming.pt")

In [None]:
#gat.load_state_dict(torch.load(path/"models/84_72_100_7_layers_att_sigmoid_beta_95_1e-4-full_dataset.pt"))

In [None]:
num_to_name = dict()

In [None]:
for i in range(36):
    num_to_name[str(i+1)]= str(i+1)

In [None]:
num_to_name['1'] = 'wall'
num_to_name['3'] = 'single door'
num_to_name['4'] = 'double door'
num_to_name['5'] = 'sliding door'
num_to_name['9'] = 'window-9'
num_to_name['13'] = 'sofa'
num_to_name['14'] = 'bed'
num_to_name['15'] = 'chair'
num_to_name['16'] = 'table'
num_to_name['17'] = 'tv cabinet'
num_to_name['18'] = 'wardrobe'
num_to_name['19'] = 'cabinet'
num_to_name['20'] = 'refrigerator'
num_to_name['22'] = 'gas stove'
num_to_name['23'] = 'sink'
num_to_name['29'] = 'toilet'
num_to_name['31'] = 'elevator'



## Inference

In [None]:
def infer(model,v_data):      
    with torch.no_grad():
        v_data.edge_index,v_data.edge_attr = remove_self_loops(v_data.edge_index,v_data.edge_attr)
        model.eval()
        v_data=v_data.to('cuda:0')
        v_out1,v_out2 = model(v_data.x, v_data.edge_index,v_data.edge_attr)
        print(
            f'data: {v_data} |\n |'
            f'class_shape: {v_out1.shape} | '
            f'instance_shape: {v_out2.shape}')
    return v_out1,v_out2

In [None]:
data = torch.load(path/"_mini_toy_dataset_validation/processed/d_0969-0027.pt")
#data = torch.load(path/"dataset_validation_full/processed/d_0001-0072.pt")
#data = torch.load(path/"dataset_validation_full/processed/d_0000-0003.pt")
data

Data(x=[720, 4], edge_index=[2, 10446], edge_attr=[10446, 6], y=[720], yy=[10446, 2])

::: {.callout-important}
## or dallas presentation
lots of manipulation needed to implement the paper, use pdb to explain where and how
:::

In [None]:
v1,v2 = infer(gat,data)
#v1.shape,v2.shape

# Check Accuracy2

In [None]:
v2.size()

In [None]:
data.yy

In [None]:
v2

In [None]:
m = torch.nn.Softmax(dim=1)
input = v2
output = m(data.yy)
output

In [None]:
m = torch.nn.Softmax(dim=1)
input = v2
output = m(input)
output

In [None]:
#vv = v2[:10,:].where(yy10 == 1 ,yy10)

In [None]:
op= output.where(data.yy == 1 ,data.yy)
op

In [None]:
op = op[:,0] + op[:,1]
op

I think it is working

In [None]:
opTrue = (op > .5)

print(opTrue)

torch.count_nonzero(opTrue)

In [None]:
9537/12542

In [None]:
#visualisation = path/"dataset_validation_full/raw/0001-0072.svg"
visualisation = path/"_mini_toy_dataset_validation/raw/0969-0027.svg"

In [None]:
paths, attributes = svg2paths(visualisation)

## **This part made for checking if the model could predict the real connection between members of a instance.**

#### **Clean attributes and paths from drawing not from the results**

In [None]:
count = 0
atts = []
pths = []
#available class list in the drawing
drw_sem_id_list = []
for att in attributes:    
    if ('semantic-id' and 'instance-id') in att:
        idx = attributes.index(att)
        atts.append(attributes[idx])
        pths.append(paths[idx])
        if int(att['semantic-id']) not in drw_sem_id_list:
            drw_sem_id_list.append(int(att['semantic-id']))
        
len(atts),len(pths), sorted(drw_sem_id_list)

## **Vertex Prediction**

### **This is how many class in this particular prediction**

In [None]:
prediction_sem_id_list = set(v1.argmax(dim=1).tolist())

In [None]:
len(prediction_sem_id_list), prediction_sem_id_list

In [None]:
preds = v1.argmax(dim=1).tolist()
len(preds)

In [None]:
#vertex class predictions
vertex_dict = {}
for i in  prediction_sem_id_list:
    vertex_dict[str(i)]= []
    
for idx, pred in enumerate(preds):
    temp_list = vertex_dict[str(pred)]
    temp_list.append(idx)
    vertex_dict[str(pred)] = temp_list
        
#print(vertex_dict['15'])

In [None]:
vertex_dict

In [None]:
#all edges in pairs
pairs = rearrange(data.edge_index, 'h w -> w h')
len(pairs), pairs.size()

In [None]:
#data.yy[:10]  same_class_or_not

this is for loss weight matrix

In [None]:
edge_vertex_class = data.y[pairs]
same_class_or_not = edge_vertex_class[:,0] == edge_vertex_class[:,1]


In [None]:
edge_vertex_class.size()

In [None]:
v2,v2.mean(dim=0)

In [None]:
#possibility_tensor_bool = (v2[:,0]>0.8) & (v2[:,1]<0.5)
possibility_tensor_bool = (v2[:,0]>v2[:,1]) & (v2[:,1]<0.5)
possibility_tensor_bool

In [None]:
pairs = pairs[possibility_tensor_bool]

In [None]:
# pairs in list format
pairs = pairs.tolist()
pairs[0],len(pairs)

In [None]:
#edge dictionary and idx's 
edge_dict = {}
for i in  prediction_sem_id_list:
    edge_dict[str(i)]= []
edge_dict_idx = {}
for i in  prediction_sem_id_list:
    edge_dict_idx[str(i)]= []

In [None]:
#extracting pairs for each classes
for k,v in vertex_dict.items():
    for idx,i in  enumerate(pairs):
        if (i[0] in v) and (i[1] in v):
            edge_dict[k].append(i)
            edge_dict_idx[k].append(idx)

In [None]:
edge_dict_idx

In [None]:
# removing vertexes that don't have a connection in that class
# this can be considered as verification of previous step.
# no need to make this if vertex dict not necessary anymore which
# is optional. it can be done during the loop
for k,v in edge_dict.items():
    nodes = set()
    nodes = nodes.union(set(item[0] for item in v))
    nodes = nodes.union(set(item[1] for item in v))
    for i in vertex_dict[k]:
        if i not in nodes:
            vertex_dict[k].remove(i)  
    

In [None]:
# links = [
#     (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (1, 7), (7, 9), (7, 8), (8, 9), # Component 1
#     (10, 11), (11, 12), (10, 12) # Component 2
# ]

# # Create list of all nodes
# nodes = set()
# nodes = nodes.union(set(item[0] for item in links))
# nodes = nodes.union(set(item[1] for item in links))

# # Create a data structure in the form of  from -> List<to>
# # {"1":[2,7],"2":[3],"3":[4],"4":[5],"5":[6],"7":[9,8],"8":[9],"10":[11,12],"11":[12]}
# links_by_nodes = {}
# for x, y in links:
#     if x in links_by_nodes:
#         links_by_nodes[x].append(y)
#     else:
#         links_by_nodes[x] = [y]

def find_connected_components(all_nodes, links_by_nodes):
    connected_components = []
    all_component_nodes = set()
    for node in all_nodes:
        if node in all_component_nodes:
            continue
        component = set()
        dfs(links_by_nodes, component, all_component_nodes, node)
        connected_components.append(component)
    return connected_components

def dfs(links_by_nodes, component, all_component_nodes, node):
    component.add(node)
    all_component_nodes.add(node)

    linked_nodes = links_by_nodes.get(node)
    if linked_nodes is None:
        return
    for linked_node in linked_nodes:
        if linked_node in all_component_nodes:
            continue
        dfs(links_by_nodes, component, all_component_nodes, linked_node)
        


In [None]:
edge_dict

In [None]:
instance_dict = {}
for k,v in edge_dict.items():
    links_by_nodes = {}
    for x, y in edge_dict[k]:
        if x in links_by_nodes:
            links_by_nodes[x].append(y)
        else:
            links_by_nodes[x] = [y]
    instance_dict[k]=find_connected_components(vertex_dict[k], links_by_nodes)
    

In [None]:
the_list = []
for semantic_key,instance_index_lists  in instance_dict.items():
    for instance_indexes in instance_index_lists:            
        temp_list=[]
        the_dict = {}
        for idx in instance_indexes:            
            temp_list.append(pths[idx])
        the_dict[semantic_key] = temp_list
        the_list.append(the_dict)

In [None]:
def boundingbox(xmin,xmax,ymin,ymax):
    spath=svg_path(Line(start=(xmin+ymin), end=(xmax+ymin)),
             Line(start=(xmax+ymin), end=(xmax+ymax)),
             Line(start=(xmax+ymax), end=(xmin+ymax)),
             Line(start=(xmin+ymax), end=(xmin+ymin)))
    return spath
# def min_max_bbox(instance_paths):    
#     real_list = []
#     imag_list = []
#     for path in instance_paths:
#         # print(path)
#         if isinstance(path[0],Line):
#             real_list.extend((path[0].start.real,path[0].end.real))
#             imag_list.extend((path[0].start.imag,path[0].end.imag))
#         elif isinstance(path[0],Arc):
#             if path[0].start.real < path[0].end.real:
#                 center_real = (path[0].start.real + path[0].radius.real) 
#                 center_imag = path[0].start.imag                
#                 bottom_real = center_real
#                 bottom_imag = center_imag - path[0].radius.imag
#                 #print(f'bottom: {center_imag,bottom_imag}')
#                 real_list.extend((path[0].start.real, path[0].end.real, bottom_real))
#                 imag_list.extend((path[0].start.imag, path[0].end.imag, bottom_imag))
#             if path[0].start.real > path[0].end.real:
#                 center_real = (path[0].start.real - path[0].radius.real)  
#                 center_imag = path[0].start.imag
#                 top_real = center_real 
#                 top_imag = center_imag + path[0].radius.imag
#                 #print(f'top: {center_imag,bottom_imag}')
#                 real_list.extend((path[0].start.real,path[0].end.real, top_real))
#                 imag_list.extend((path[0].start.imag,path[0].end.imag, top_imag))
def min_max_bbox(instance_paths):    
    real_list = []
    imag_list = []
    for path in instance_paths:  
        
        real_list.extend((path[0].start.real,path[0].end.real,path[0].point(0.5).real))
        imag_list.extend((path[0].start.imag,path[0].end.imag,path[0].point(0.5).imag))
         
    xmin = min(real_list)
    xmax = max(real_list)
    ymin = min(imag_list)*1j
    ymax = max(imag_list)*1j
    return xmin,xmax,ymin,ymax
           

In [None]:
instance_borders =[]
instance_text = []
# this is where the text goes
instance_path = []
for i in the_list:
    if  list(i.items())[0][0] != '1':
        ins_lines = list(i.items())[0][1]
        ins_name = list(i.items())[0][0]
        mmx= min_max_bbox(ins_lines)
        bbx_borders = boundingbox(*mmx)    
        instance_borders.append(bbx_borders)
        instance_text.append(ins_name)
        instance_path.append(bbx_borders[0])

In [None]:
tmp_list = list()
for i in instance_text:
    tmp_list.append(num_to_name[i])

In [None]:
instance_text=tmp_list

In [None]:
border_atts = []
for i in instance_borders:
    border_atts.append({'stroke': 'rgb(255,0,0)','fill': 'none','stroke-width': '0.2'})

In [None]:
wsvg(pths+instance_borders,attributes= atts+border_atts,font_size=2, text=instance_text, text_path=instance_path, filename='S____100.svg') 

if semantic segmentation underperforms then it means :
- if it skips some connection that is necessary. it is destructive because  it could lead us to find many more instances than actual.(false negatives)
- if it makes false positives for a class I think it is less destructive because we can filter them out during instance extraction
if instance extraction underperforms then it means:
- if it creates false positives then it leads to find much less instances
- if it creates false negatives