# Version 2 (sparse matrices...)
### I created the last notebook naively and completely missed that the discusses method operates on sparse matrices, so I'm starting again from scratch

# Generating a sparse matrix representation of a graph...

In [1]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

num_nodes = 100
average_out_degree = 9
total_edge_classes = 36
embedding_size = 128
output_dim = 4



# First we generate some arbitrary graph
def generate_graph(num_nodes, average_out_degree, total_edge_classes):
    num_edges = average_out_degree*num_nodes
    indices = torch.randint(0, num_nodes, (2, num_edges))
    print(indices.size())
    edges = torch.ones(num_edges)
    print(edges.size())
    A = torch.sparse_coo_tensor(indices, edges, (num_nodes, num_nodes))
    return A
# A = generate_graph(15,3,5).to_dense()

# Now maybe it's useful to ensure it's connected...
def generate_connected_graph(num_nodes, average_out_degree, total_edge_classes, embedding_size):
    edges_set = set()
    num_edges = average_out_degree * num_nodes

    def sample_random_edge_class():
        return torch.randint(0, total_edge_classes, (1,)).item()
    
    def generate_random_edge():
        u, v = torch.randint(0, num_nodes, (2,)).tolist()
        #no self loops
        while u == v:
            u, v = torch.randint(0, num_nodes, (2,)).tolist()
        return (u, v)
    
    def generate_random_node():
        return torch.randint(0, num_nodes, (1,)).item()
    
    def sample_random_node(A):
        connected_nodes = A._indices()[1].tolist() #select from incoming to...
        if not connected_nodes:
            return generate_random_node()
        return connected_nodes[torch.randint(0, len(connected_nodes), (1,)).item()]#torch.nonzero(A)
    
    initial_edge = generate_random_edge()
    edges_set.add(initial_edge)
    indices = torch.tensor([[initial_edge[0]], [initial_edge[1]]], dtype=torch.long, device=device)
    edges = torch.ones(1, device=device)
    A = torch.sparse_coo_tensor(indices, edges, (num_nodes, num_nodes), device=device)
    A_list = []
    for edge_class in range(total_edge_classes):
        for _ in range(num_edges - 1):
            u = sample_random_node(A)
            v = generate_random_node()
            while u == v:
                v = generate_random_node()
            new_edge = (u, v)#torch.cat([u, v], dim=0).unsqueeze(1)
            if new_edge in edges_set:
                continue
            edges_set.add(new_edge)
            edge_tensor = torch.tensor([[u], [v]], dtype=torch.long, device=device)
            indices = torch.cat([indices, edge_tensor], dim=1)
            edges = torch.ones(indices.size(1), device=device)
            A = torch.sparse_coo_tensor(indices, edges, (num_nodes, num_nodes), device=device)
            # indices = torch.stack((indices,torch.cat([u,v]).unsqueeze(1)),dim=0)
        # print(edges_set)
        A_list.append(A)
    random_labels = torch.rand((num_nodes, embedding_size), device=device)
    return A_list, random_labels 

A, X = generate_connected_graph(num_nodes,average_out_degree,total_edge_classes, embedding_size)

# Now per edge type? or randomly assign edges...? maybe multinomial_sample(1/k)^k?

In [2]:
device

device(type='cuda')

In [5]:
# print(torch.cuda.is_available())
print(torch.cuda.device_count())

1


In [None]:
torch.cuda.current_device

# GCN

In [3]:
import torch.nn as nn
import torch.functional as f

k_hop = 3

def normalize_adjacency(A, self=True): # Self-loop doesn't work with R-GCN
    size = A.size()[0]
    A = torch.add(torch.eye(size,device=device).to_sparse(), A)
    degree = torch.sparse.sum(A, dim=1).to_dense()
    # print(degree.size())
    d_inv_sqrt = degree.pow(-0.5)
    D_inv_sqrt = torch.diag(d_inv_sqrt)
    A = A.to_dense()
    normalized_A = D_inv_sqrt @ A @ D_inv_sqrt
    return normalized_A
A_prime = normalize_adjacency(A)


class GCNLayer(nn.Module):
    def __init__(self, x_dim, y_dim):
        super(GCNLayer, self).__init__()
        self.x_dim = x_dim
        self.y_dim = y_dim
        # self.A_norm = A_norm

        self.lin = nn.Linear(x_dim, y_dim)

    def forward(self, A_norm, X):
        device = next(self.parameters()).device
        # print(A_norm, X)
        transformed = self.lin(X)
        aggregated = torch.matmul(A_norm, transformed)
        return aggregated


class GCN(nn.Module):
    def __init__(self, x_dim, h_dim, y_dim, max_k_hop):
        super(GCN, self).__init__()
        self.max_k_hop =max_k_hop
        self.x_dim = x_dim
        self.gcns = nn.ModuleList([GCNLayer(x_dim, h_dim) for _ in range(max_k_hop)])
        self.final_gcn = GCNLayer(x_dim, y_dim) 
        self.act = nn.ReLU()
        self.drop = nn.Dropout(p=0.2)
        # self.norm = nn.LayerNorm(y_dim)
    
    def forward(self, A, X):
        # outer layers
        device = next(self.parameters()).device
        for i in range(self.max_k_hop):
            H = self.gcns[i](A, X)
            H = self.act(H)
            H = self.drop(H)
        Y = self.final_gcn(A, H)
        out = self.act(Y)
        return out#nn.LogSoftmax(Y)
model = GCN(X.size(1), embedding_size, output_dim, k_hop).to(device)
out = model(A_prime, X)


AttributeError: 'list' object has no attribute 'size'

# R-GCN

### Block diagonal weight matrix (one is held in memory for each relational weight per layer, so block diagonal sparse matrices save some memory at the cost of some layer-level information flow)

In [4]:
block_size = embedding_size//4 #some partition... ensure it's a round number
relation_weights = [torch.randn(block_size, block_size), torch.randn(block_size, block_size)]

block_diag_matrix = torch.block_diag(*relation_weights).to_sparse()
block_diag_matrix.to_dense()

tensor([[ 1.5196, -1.4885,  1.5626,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.3460,  0.2630,  0.4484,  ...,  0.0000,  0.0000,  0.0000],
        [-1.5315, -0.1715, -2.4562,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ..., -0.0429,  1.1042,  0.8317],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.4913,  0.2087,  1.9447],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0329,  1.9868,  0.5162]])

In [5]:
print( embedding_size//4)

32


# R-GCN model:

In [6]:
def normalize_adjacency(A, self=True): # Self-loop doesn't work with R-GCN
    size = A.size()[0]
    A = torch.add(torch.eye(size,device=device).to_sparse(), A)
    degree = torch.sparse.sum(A, dim=1).to_dense()
    # print(degree.size())
    d_inv_sqrt = degree.pow(-0.5)
    D_inv_sqrt = torch.diag(d_inv_sqrt)
    A = A.to_dense()
    normalized_A = D_inv_sqrt @ A @ D_inv_sqrt
    return normalized_A
# A_prime = normalize_adjacency(A)

A_list = [normalize_adjacency(a, False) for a in A]


##save non-error version
def block_diag (weights):
    block_diag_matrix = torch.block_diag(*weights).to_sparse()
    return block_diag_matrix
    

class R_GCNLayer(nn.Module):
    def __init__(self, x_dim, y_dim, num_relations, block_split):
        super(R_GCNLayer, self).__init__()
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.num_relations = num_relations
        gain = nn.init.calculate_gain('relu')
        max_block_size = 1
        for i in range(1,11):
            if x_dim % i == 0 and y_dim % i == 0:
                max_block_size +=1 
        x_block_size = x_dim // block_split
        y_block_size = y_dim // block_split
        self.W = nn.ParameterList()
        for _ in range(num_relations):
            wr = nn.ParameterList()
            # relation_weights = []
            for i in range(block_split):
                w = nn.Parameter(torch.randn(x_block_size, y_block_size)) 
                nn.init.kaiming_uniform_(w, a=gain)
                # relation_weights.append(w)
             #move blocks to params, not block diag
            # print(block_diag_matrix)
                wr.append(w)
                # self.Wr.append(wr)
            self.W.append(wr)

        self.bias = nn.Parameter(torch.zeros(y_dim))
        nn.init.zeros_(self.bias)

#dense version change forward loop such that weigted is matmul, etc of blockdiag(w)
# def block_diag (weights):
#     block_diag_matrix = torch.block_diag(*weights)
#     return block_diag_matrix
    

# class R_GCNLayer(nn.Module):
#     def __init__(self, x_dim, y_dim, num_relations, block_split):
#         super(R_GCNLayer, self).__init__()
#         self.x_dim = x_dim
#         self.y_dim = y_dim
#         self.num_relations = num_relations
#         gain = nn.init.calculate_gain('relu')
#         max_block_size = 1
#         for i in range(1,11):
#             if x_dim % i == 0 and y_dim % i == 0:
#                 max_block_size +=1 
#         x_block_size = x_dim // block_split
#         y_block_size = y_dim // block_split
#         self.W = nn.ParameterList()
#         for _ in range(num_relations):
#             # wr = nn.ParameterList()
#             relation_weights = []
#             for i in range(block_split):
#                 w = torch.randn(x_block_size, y_block_size)
#                 relation_weights.append(w)
#                 nn.init.kaiming_uniform_(block_diag_dense, a=gain)
#             block_diag_dense = block_diag(relation_weights)
#             # print(block_diag_dense)
#              #move blocks to params, not block diag
#             # print(block_diag_matrix)
#                 # wr.append(w)
#             self.W.append(block_diag_dense)
#             # self.W.append(wr)

#         self.bias = nn.Parameter(torch.zeros(y_dim))
#         nn.init.zeros_(self.bias)
        

    

    def forward(self, A, X):
        device = next(self.parameters()).device
        aggregated = torch.zeros((X.size(0), self.y_dim), device=device)
        # print(self.W[1], self.W[1].size())
        for r in range(self.num_relations):
            # print(block_diag(self.W[r]).to_dense())
            # print(X.size(), self.Wr[r])
            weighted = torch.matmul(X, block_diag(self.W[r]))  # (num_nodes, out_dim)
            # print(weighted.size())
            transformed = torch.sparse.mm(A[r], weighted)
            # aggregated_r = torch.matmul(A_norm, transformed)
            # print(aggregated.size(), transformed.size())
            aggregated += transformed
        aggregated += self.bias
        return aggregated


class R_GCN(nn.Module):
    def __init__(self, x_dim, h_dim, y_dim, max_k_hop, num_relations):
        super(R_GCN, self).__init__()
        self.max_k_hop =max_k_hop
        self.num_relations = num_relations
        self.x_dim = x_dim
        self.block_split = 2
        self.gcns = nn.ModuleList([R_GCNLayer(x_dim, h_dim, num_relations, self.block_split) for _ in range(max_k_hop)])
        self.final_r_gcn = R_GCNLayer(x_dim, y_dim,num_relations, self.block_split) 
        self.act = nn.ReLU()
        self.drop = nn.Dropout(p=0.2)
        # self.norm = nn.LayerNorm(y_dim)
        self.norm = nn.BatchNorm1d(y_dim)


    
        
    def forward(self, A, X):
        # outer layers
        device = next(self.parameters()).device
        for i in range(self.max_k_hop):
            H = self.gcns[i](A, X)
            H = self.act(H)
            H = self.drop(H)
        Y = self.final_r_gcn(A, H)
        out = self.act(Y)
        out = self.norm(out)
        return out#nn.LogSoftmax(Y)
model = R_GCN(X.size(1), embedding_size, output_dim, k_hop, total_edge_classes).to(device)
out = model(A_list, X)
out

tensor([[ 0.5920,  1.1350,  0.0000, -1.1087],
        [ 0.6620, -0.4539,  0.0000, -0.0728],
        [ 0.0807,  1.4868,  0.0000,  1.1986],
        [-0.6910, -0.4539,  0.0000, -1.5395],
        [ 0.5642, -0.4539,  0.0000, -0.0079],
        [ 1.0080, -0.0392,  0.0000, -0.8843],
        [ 0.8802, -0.4539,  0.0000, -0.5808],
        [-0.3409,  2.0548,  0.0000,  0.5090],
        [ 0.3340, -0.4431,  0.0000,  1.3803],
        [-1.4383, -0.4539,  0.0000, -0.4917],
        [ 0.7711,  4.2626,  0.0000,  1.4262],
        [ 1.4438,  0.7113,  0.0000,  1.2710],
        [-0.6211, -0.4539,  0.0000,  0.3680],
        [ 0.8349, -0.4539,  0.0000,  0.3027],
        [-0.5995,  1.3687,  0.0000, -1.4483],
        [ 1.0016,  0.2350,  0.0000, -0.5727],
        [-0.1088,  0.0843,  0.0000,  0.6895],
        [ 0.5223, -0.1109,  0.0000, -0.4441],
        [ 1.5128, -0.4539,  0.0000,  2.0340],
        [-1.3691, -0.4539,  0.0000, -0.5100],
        [-1.4959,  1.6615,  0.0000, -0.4773],
        [-0.0174, -0.4539,  0.0000

In [7]:
output_dim

4

# Train

In [8]:
from torchviz import make_dot
from tqdm import tqdm
criterion = nn.CrossEntropyLoss()#nn.NLLLoss()
params = list(model.parameters())
# params.extend(list(classifier.parameters()))
optimizer = torch.optim.Adam(params,lr=0.01)

num_epochs = 600

true_labels = torch.randint(0, output_dim, (num_nodes,), dtype=torch.long).to(device)

# print(true_labels)

def save_gradient_hook(grad):
    gradients.append(grad)


for epoch in tqdm(range(num_epochs)):#, desc = f"epoch {epoch}/{num_epochs}"):
    model.train()
    epoch_loss = 0.0
    correct = 0
    total = 0
    i = 0

    predicted_labels = model(A_list, X)
    
    loss = criterion(predicted_labels, true_labels)

    # for param in params:
    #     param.register_hook(save_gradient_hook)
    
    # if i % batch_size == 0:
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    i += 1
    epoch_loss += loss.item()

    _, predicted = torch.max(predicted_labels.data, 1)
    total += true_labels.size(0)
    correct += (predicted == true_labels).sum().item()
    avg_loss = epoch_loss
    accuracy = 100 * correct / total
    if epoch % 5 == 0:
        print(f"epoch: {epoch}\navg loss: {avg_loss} accuracy: {accuracy}")
        # make_dot(predicted_labels, params=dict(list(model.named_parameters()))).render("r_gcn_torchviz", format="png")


           

  0%|                                           | 1/600 [00:00<01:34,  6.32it/s]

epoch: 0
avg loss: 1.5931912660598755 accuracy: 24.0


  1%|▌                                          | 7/600 [00:01<02:08,  4.62it/s]

epoch: 5
avg loss: 1.3879225254058838 accuracy: 38.0


  2%|▊                                         | 12/600 [00:02<01:15,  7.83it/s]

epoch: 10
avg loss: 1.3931773900985718 accuracy: 30.0


  3%|█                                         | 16/600 [00:02<01:09,  8.40it/s]

epoch: 15
avg loss: 1.3721710443496704 accuracy: 38.0


  4%|█▍                                        | 21/600 [00:04<02:22,  4.06it/s]

epoch: 20
avg loss: 1.381373405456543 accuracy: 33.0


  4%|█▊                                        | 26/600 [00:05<02:34,  3.70it/s]

epoch: 25
avg loss: 1.3520781993865967 accuracy: 34.0


  5%|██▏                                       | 31/600 [00:06<02:32,  3.74it/s]

epoch: 30
avg loss: 1.3763445615768433 accuracy: 31.0


  6%|██▌                                       | 36/600 [00:08<02:34,  3.65it/s]

epoch: 35
avg loss: 1.380155086517334 accuracy: 33.0


  7%|██▊                                       | 41/600 [00:09<02:29,  3.73it/s]

epoch: 40
avg loss: 1.3595530986785889 accuracy: 30.0


  8%|███▏                                      | 46/600 [00:10<02:28,  3.74it/s]

epoch: 45
avg loss: 1.3274924755096436 accuracy: 37.0


  8%|███▌                                      | 51/600 [00:12<02:30,  3.66it/s]

epoch: 50
avg loss: 1.360823631286621 accuracy: 36.0


  9%|███▉                                      | 56/600 [00:13<02:30,  3.61it/s]

epoch: 55
avg loss: 1.333591341972351 accuracy: 38.0


 10%|████▎                                     | 61/600 [00:14<02:27,  3.66it/s]

epoch: 60
avg loss: 1.3560552597045898 accuracy: 33.0


 11%|████▌                                     | 66/600 [00:16<02:31,  3.54it/s]

epoch: 65
avg loss: 1.3014674186706543 accuracy: 38.0


 12%|████▉                                     | 71/600 [00:17<02:22,  3.72it/s]

epoch: 70
avg loss: 1.2964564561843872 accuracy: 40.0


 13%|█████▍                                    | 77/600 [00:18<01:12,  7.19it/s]

epoch: 75
avg loss: 1.3276289701461792 accuracy: 39.0


 14%|█████▋                                    | 82/600 [00:19<00:59,  8.67it/s]

epoch: 80
avg loss: 1.3033359050750732 accuracy: 37.0


 14%|██████                                    | 86/600 [00:19<01:22,  6.24it/s]

epoch: 85
avg loss: 1.281274437904358 accuracy: 43.0


 15%|██████▍                                   | 92/600 [00:20<01:18,  6.51it/s]

epoch: 90
avg loss: 1.3038192987442017 accuracy: 43.0


 16%|██████▊                                   | 97/600 [00:21<00:59,  8.50it/s]

epoch: 95
avg loss: 1.2798079252243042 accuracy: 45.0


 17%|██████▉                                  | 102/600 [00:21<00:55,  8.95it/s]

epoch: 100
avg loss: 1.2892463207244873 accuracy: 40.0


 18%|███████▎                                 | 107/600 [00:22<00:55,  8.95it/s]

epoch: 105
avg loss: 1.2962119579315186 accuracy: 44.0


 19%|███████▋                                 | 112/600 [00:23<00:54,  8.99it/s]

epoch: 110
avg loss: 1.2696088552474976 accuracy: 41.0


 20%|███████▉                                 | 117/600 [00:23<01:18,  6.14it/s]

epoch: 115
avg loss: 1.2839549779891968 accuracy: 47.0


 20%|████████▎                                | 121/600 [00:24<01:43,  4.65it/s]

epoch: 120
avg loss: 1.2907065153121948 accuracy: 41.0


 21%|████████▋                                | 127/600 [00:26<01:27,  5.38it/s]

epoch: 125
avg loss: 1.2852177619934082 accuracy: 40.0


 22%|█████████                                | 132/600 [00:26<00:58,  7.97it/s]

epoch: 130
avg loss: 1.2596861124038696 accuracy: 45.0


 23%|█████████▎                               | 137/600 [00:27<00:51,  8.91it/s]

epoch: 135
avg loss: 1.2619649171829224 accuracy: 43.0


 24%|█████████▋                               | 141/600 [00:28<01:26,  5.32it/s]

epoch: 140
avg loss: 1.2644131183624268 accuracy: 45.0


 24%|█████████▉                               | 146/600 [00:29<02:01,  3.75it/s]

epoch: 145
avg loss: 1.2360329627990723 accuracy: 53.0


 25%|██████████▎                              | 151/600 [00:30<02:03,  3.64it/s]

epoch: 150
avg loss: 1.1861118078231812 accuracy: 47.0


 26%|██████████▋                              | 156/600 [00:32<02:00,  3.69it/s]

epoch: 155
avg loss: 1.2239594459533691 accuracy: 43.0


 27%|███████████                              | 162/600 [00:33<01:30,  4.85it/s]

epoch: 160
avg loss: 1.3009917736053467 accuracy: 36.0


 28%|███████████▍                             | 167/600 [00:34<00:54,  7.94it/s]

epoch: 165
avg loss: 1.28175687789917 accuracy: 40.0


 29%|███████████▊                             | 172/600 [00:34<00:53,  8.05it/s]

epoch: 170
avg loss: 1.311435580253601 accuracy: 40.0


 30%|████████████                             | 177/600 [00:35<00:55,  7.64it/s]

epoch: 175
avg loss: 1.2474011182785034 accuracy: 42.0


 30%|████████████▍                            | 182/600 [00:35<00:48,  8.61it/s]

epoch: 180
avg loss: 1.265802025794983 accuracy: 38.0


 31%|████████████▋                            | 186/600 [00:36<00:48,  8.57it/s]

epoch: 185
avg loss: 1.252300500869751 accuracy: 43.0


 32%|█████████████                            | 192/600 [00:37<00:59,  6.82it/s]

epoch: 190
avg loss: 1.279752492904663 accuracy: 41.0


 33%|█████████████▍                           | 197/600 [00:38<00:46,  8.63it/s]

epoch: 195
avg loss: 1.2423733472824097 accuracy: 42.0


 34%|█████████████▊                           | 202/600 [00:38<00:44,  9.01it/s]

epoch: 200
avg loss: 1.2703264951705933 accuracy: 39.0


 34%|██████████████▏                          | 207/600 [00:39<00:43,  9.10it/s]

epoch: 205
avg loss: 1.2157301902770996 accuracy: 48.0


 35%|██████████████▍                          | 211/600 [00:39<01:03,  6.17it/s]

epoch: 210
avg loss: 1.2505460977554321 accuracy: 50.0


 36%|██████████████▊                          | 216/600 [00:41<01:37,  3.94it/s]

epoch: 215
avg loss: 1.2124401330947876 accuracy: 50.0


 37%|███████████████                          | 221/600 [00:42<01:29,  4.24it/s]

epoch: 220
avg loss: 1.2335026264190674 accuracy: 45.0


 38%|███████████████▍                         | 226/600 [00:43<01:41,  3.69it/s]

epoch: 225
avg loss: 1.205970048904419 accuracy: 47.0


 39%|███████████████▊                         | 232/600 [00:44<00:59,  6.19it/s]

epoch: 230
avg loss: 1.1566271781921387 accuracy: 53.0


 40%|████████████████▏                        | 237/600 [00:45<00:53,  6.84it/s]

epoch: 235
avg loss: 1.2285709381103516 accuracy: 44.0


 40%|████████████████▌                        | 242/600 [00:46<00:43,  8.28it/s]

epoch: 240
avg loss: 1.2213845252990723 accuracy: 44.0


 41%|████████████████▉                        | 247/600 [00:46<00:40,  8.81it/s]

epoch: 245
avg loss: 1.1644874811172485 accuracy: 50.0


 42%|█████████████████▏                       | 252/600 [00:47<00:41,  8.48it/s]

epoch: 250
avg loss: 1.096948266029358 accuracy: 57.0


 43%|█████████████████▌                       | 257/600 [00:47<00:38,  8.86it/s]

epoch: 255
avg loss: 1.3200058937072754 accuracy: 33.0


 44%|█████████████████▉                       | 262/600 [00:48<00:37,  8.90it/s]

epoch: 260
avg loss: 1.3156260251998901 accuracy: 38.0


 44%|██████████████████▏                      | 267/600 [00:49<00:36,  9.10it/s]

epoch: 265
avg loss: 1.2527694702148438 accuracy: 40.0


 45%|██████████████████▌                      | 272/600 [00:49<00:35,  9.16it/s]

epoch: 270
avg loss: 1.206191062927246 accuracy: 53.0


 46%|██████████████████▉                      | 277/600 [00:50<00:36,  8.94it/s]

epoch: 275
avg loss: 1.2287808656692505 accuracy: 42.0


 47%|███████████████████▎                     | 282/600 [00:50<00:35,  9.08it/s]

epoch: 280
avg loss: 1.206937313079834 accuracy: 45.0


 48%|███████████████████▌                     | 287/600 [00:51<00:34,  9.16it/s]

epoch: 285
avg loss: 1.185381293296814 accuracy: 52.0


 49%|███████████████████▉                     | 292/600 [00:51<00:33,  9.22it/s]

epoch: 290
avg loss: 1.1275010108947754 accuracy: 52.0


 50%|████████████████████▎                    | 297/600 [00:52<00:32,  9.24it/s]

epoch: 295
avg loss: 1.303713321685791 accuracy: 38.0


 50%|████████████████████▋                    | 302/600 [00:52<00:32,  9.17it/s]

epoch: 300
avg loss: 1.2996249198913574 accuracy: 37.0


 51%|████████████████████▉                    | 307/600 [00:53<00:32,  9.11it/s]

epoch: 305
avg loss: 1.3181191682815552 accuracy: 33.0


 52%|█████████████████████▎                   | 312/600 [00:53<00:31,  9.18it/s]

epoch: 310
avg loss: 1.358932614326477 accuracy: 36.0


 53%|█████████████████████▋                   | 317/600 [00:54<00:30,  9.19it/s]

epoch: 315
avg loss: 1.3484885692596436 accuracy: 32.0


 54%|██████████████████████                   | 322/600 [00:55<00:30,  9.23it/s]

epoch: 320
avg loss: 1.2953979969024658 accuracy: 36.0


 55%|██████████████████████▎                  | 327/600 [00:55<00:30,  8.87it/s]

epoch: 325
avg loss: 1.327468752861023 accuracy: 34.0


 55%|██████████████████████▋                  | 332/600 [00:56<00:31,  8.62it/s]

epoch: 330
avg loss: 1.2585242986679077 accuracy: 42.0


 56%|███████████████████████                  | 337/600 [00:56<00:28,  9.07it/s]

epoch: 335
avg loss: 1.2836192846298218 accuracy: 41.0


 57%|███████████████████████▎                 | 342/600 [00:57<00:28,  9.03it/s]

epoch: 340
avg loss: 1.3168702125549316 accuracy: 39.0


 58%|███████████████████████▋                 | 347/600 [00:57<00:31,  8.03it/s]

epoch: 345
avg loss: 1.3157339096069336 accuracy: 38.0


 59%|████████████████████████                 | 352/600 [00:58<00:28,  8.62it/s]

epoch: 350
avg loss: 1.28369140625 accuracy: 40.0


 60%|████████████████████████▍                | 357/600 [00:59<00:26,  9.08it/s]

epoch: 355
avg loss: 1.2548044919967651 accuracy: 39.0


 60%|████████████████████████▋                | 362/600 [00:59<00:27,  8.66it/s]

epoch: 360
avg loss: 1.2455413341522217 accuracy: 35.0


 61%|█████████████████████████                | 366/600 [01:00<00:27,  8.64it/s]

epoch: 365
avg loss: 1.3257793188095093 accuracy: 43.0


 62%|█████████████████████████▍               | 372/600 [01:01<00:49,  4.64it/s]

epoch: 370
avg loss: 1.3691078424453735 accuracy: 35.0


 63%|█████████████████████████▊               | 377/600 [01:02<00:30,  7.34it/s]

epoch: 375
avg loss: 1.3542070388793945 accuracy: 31.0


 64%|██████████████████████████               | 381/600 [01:02<00:38,  5.75it/s]

epoch: 380
avg loss: 1.4102771282196045 accuracy: 30.0


 64%|██████████████████████████▍              | 387/600 [01:04<00:35,  6.08it/s]

epoch: 385
avg loss: 1.355613350868225 accuracy: 30.0


 65%|██████████████████████████▋              | 391/600 [01:05<00:50,  4.11it/s]

epoch: 390
avg loss: 1.3184797763824463 accuracy: 33.0


 66%|███████████████████████████              | 396/600 [01:06<00:45,  4.47it/s]

epoch: 395
avg loss: 1.3756871223449707 accuracy: 35.0


 67%|███████████████████████████▍             | 401/600 [01:07<00:44,  4.51it/s]

epoch: 400
avg loss: 1.2785409688949585 accuracy: 41.0


 68%|███████████████████████████▋             | 406/600 [01:08<00:51,  3.78it/s]

epoch: 405
avg loss: 1.2995092868804932 accuracy: 36.0


 68%|████████████████████████████             | 411/600 [01:09<00:39,  4.75it/s]

epoch: 410
avg loss: 1.309653878211975 accuracy: 42.0


 70%|████████████████████████████▍            | 417/600 [01:11<00:40,  4.54it/s]

epoch: 415
avg loss: 1.3073431253433228 accuracy: 39.0


 70%|████████████████████████████▊            | 422/600 [01:11<00:22,  7.79it/s]

epoch: 420
avg loss: 1.3481391668319702 accuracy: 33.0


 71%|█████████████████████████████▏           | 427/600 [01:12<00:19,  8.97it/s]

epoch: 425
avg loss: 1.3344523906707764 accuracy: 31.0


 72%|█████████████████████████████▌           | 432/600 [01:13<00:18,  9.06it/s]

epoch: 430
avg loss: 1.3168660402297974 accuracy: 38.0


 73%|█████████████████████████████▊           | 437/600 [01:13<00:18,  8.96it/s]

epoch: 435
avg loss: 1.2968922853469849 accuracy: 41.0


 74%|██████████████████████████████▏          | 442/600 [01:14<00:17,  8.95it/s]

epoch: 440
avg loss: 1.3241184949874878 accuracy: 32.0


 74%|██████████████████████████████▌          | 447/600 [01:14<00:17,  8.94it/s]

epoch: 445
avg loss: 1.3012275695800781 accuracy: 36.0


 75%|██████████████████████████████▉          | 452/600 [01:15<00:16,  8.82it/s]

epoch: 450
avg loss: 1.2847884893417358 accuracy: 43.0


 76%|███████████████████████████████▏         | 457/600 [01:15<00:15,  9.09it/s]

epoch: 455
avg loss: 1.2644374370574951 accuracy: 40.0


 77%|███████████████████████████████▌         | 462/600 [01:16<00:15,  9.12it/s]

epoch: 460
avg loss: 1.3073806762695312 accuracy: 41.0


 78%|███████████████████████████████▉         | 467/600 [01:16<00:14,  9.14it/s]

epoch: 465
avg loss: 1.3118581771850586 accuracy: 39.0


 79%|████████████████████████████████▎        | 472/600 [01:17<00:14,  9.12it/s]

epoch: 470
avg loss: 1.314601182937622 accuracy: 37.0


 80%|████████████████████████████████▌        | 477/600 [01:18<00:13,  8.91it/s]

epoch: 475
avg loss: 1.3044672012329102 accuracy: 34.0


 80%|████████████████████████████████▊        | 481/600 [01:18<00:15,  7.71it/s]

epoch: 480
avg loss: 1.4269262552261353 accuracy: 44.0


 81%|█████████████████████████████████▎       | 487/600 [01:20<00:24,  4.66it/s]

epoch: 485
avg loss: 1.285253643989563 accuracy: 39.0


 82%|█████████████████████████████████▌       | 491/600 [01:21<00:27,  3.97it/s]

epoch: 490
avg loss: 1.2641642093658447 accuracy: 43.0


 83%|█████████████████████████████████▉       | 497/600 [01:22<00:20,  5.04it/s]

epoch: 495
avg loss: 1.2692644596099854 accuracy: 42.0


 84%|██████████████████████████████████▏      | 501/600 [01:23<00:18,  5.35it/s]

epoch: 500
avg loss: 1.2946690320968628 accuracy: 45.0


 84%|██████████████████████████████████▌      | 506/600 [01:24<00:24,  3.77it/s]

epoch: 505
avg loss: 1.2675445079803467 accuracy: 43.0


 85%|██████████████████████████████████▉      | 511/600 [01:25<00:16,  5.50it/s]

epoch: 510
avg loss: 1.453860878944397 accuracy: 35.0


 86%|███████████████████████████████████▎     | 516/600 [01:26<00:22,  3.79it/s]

epoch: 515
avg loss: 1.3832271099090576 accuracy: 40.0


 87%|███████████████████████████████████▌     | 521/600 [01:28<00:22,  3.57it/s]

epoch: 520
avg loss: 1.3401455879211426 accuracy: 40.0


 88%|████████████████████████████████████     | 527/600 [01:29<00:17,  4.07it/s]

epoch: 525
avg loss: 1.3338743448257446 accuracy: 41.0


 89%|████████████████████████████████████▎    | 532/600 [01:30<00:09,  7.20it/s]

epoch: 530
avg loss: 1.3001843690872192 accuracy: 44.0


 89%|████████████████████████████████████▋    | 536/600 [01:31<00:09,  6.69it/s]

epoch: 535
avg loss: 1.361900806427002 accuracy: 36.0


 90%|████████████████████████████████████▉    | 541/600 [01:32<00:15,  3.83it/s]

epoch: 540
avg loss: 1.2537102699279785 accuracy: 45.0


 91%|█████████████████████████████████████▎   | 546/600 [01:33<00:14,  3.75it/s]

epoch: 545
avg loss: 1.3000102043151855 accuracy: 37.0


 92%|█████████████████████████████████████▋   | 551/600 [01:35<00:10,  4.47it/s]

epoch: 550
avg loss: 1.2917653322219849 accuracy: 39.0


 93%|█████████████████████████████████████▉   | 556/600 [01:36<00:11,  3.74it/s]

epoch: 555
avg loss: 1.2859292030334473 accuracy: 42.0


 94%|██████████████████████████████████████▎  | 561/600 [01:37<00:11,  3.54it/s]

epoch: 560
avg loss: 1.277706265449524 accuracy: 44.0


 94%|██████████████████████████████████████▋  | 566/600 [01:39<00:09,  3.62it/s]

epoch: 565
avg loss: 1.3203575611114502 accuracy: 47.0


 95%|███████████████████████████████████████  | 572/600 [01:40<00:05,  4.84it/s]

epoch: 570
avg loss: 1.278803825378418 accuracy: 42.0


 96%|███████████████████████████████████████▍ | 577/600 [01:41<00:03,  6.60it/s]

epoch: 575
avg loss: 1.2727084159851074 accuracy: 40.0


 97%|███████████████████████████████████████▊ | 582/600 [01:42<00:02,  6.71it/s]

epoch: 580
avg loss: 1.2281014919281006 accuracy: 47.0


 98%|████████████████████████████████████████ | 587/600 [01:42<00:01,  8.49it/s]

epoch: 585
avg loss: 1.2286983728408813 accuracy: 50.0


 99%|████████████████████████████████████████▍| 592/600 [01:43<00:00,  8.94it/s]

epoch: 590
avg loss: 1.237452507019043 accuracy: 42.0


100%|████████████████████████████████████████▊| 597/600 [01:43<00:00,  8.45it/s]

epoch: 595
avg loss: 1.2395005226135254 accuracy: 48.0


100%|█████████████████████████████████████████| 600/600 [01:44<00:00,  5.73it/s]


# I'm confused about the rdf stuff. I don't see any multimodal data.

In [53]:
true_labels

tensor([3, 3, 0, 3, 2, 3, 2, 0, 1, 1, 2, 3, 1, 1, 2, 0, 1, 1, 3, 0, 2, 2, 1, 3,
        1, 1, 3, 2, 0, 3, 1, 1, 3, 0, 2, 2, 0, 2, 2, 1, 2, 2, 3, 1, 3, 1, 3, 2,
        1, 2, 1, 2, 2, 3, 2, 1, 0, 1, 3, 0, 1, 3, 1, 2, 3, 0, 2, 2, 0, 1, 1, 0,
        3, 2, 1, 2, 0, 0, 0, 1, 2, 0, 0, 2, 3, 1, 1, 2, 3, 1, 3, 3, 3, 3, 3, 0,
        0, 1, 1, 0])

In [56]:
predicted

tensor([3, 3, 0, 3, 0, 3, 1, 0, 1, 1, 1, 3, 1, 3, 1, 0, 1, 3, 3, 1, 1, 1, 1, 3,
        1, 1, 3, 1, 0, 3, 3, 1, 3, 1, 1, 1, 0, 0, 1, 1, 1, 1, 3, 1, 1, 1, 3, 3,
        1, 1, 0, 1, 1, 3, 1, 1, 0, 1, 3, 1, 1, 3, 0, 1, 3, 1, 3, 0, 1, 1, 0, 0,
        3, 3, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 3, 1, 0, 0, 3, 0, 3, 3, 1, 3, 3, 0,
        0, 1, 1, 0])

# Create graph from n-triple file:

In [1]:
import rdflib
from rdflib import Graph
import logging
import os
from IPython.display import clear_output


data_loc = './Downloads/ml4g/dmg/mmkg/dmg/scripts/dmg777k_stripped.nt'
folder = './Downloads/ml4g'

def create_new_graph(path, batch_size = 1000, test=True):
    logging.basicConfig(
    filename='rdf_parsing_errors.log',
    filemode='w')
    
    graph = Graph()
    # graph = graph.parse(, format='nt')
    batch_num = 0
    i = 0
    with open(path, 'r', encoding='utf-8') as f:
        while True:
            batch = []
            try:
                [batch.append(next(f)) for j in range(batch_size)]
                i += j
            except:
                pass
            if not batch:
                break
            batch_num += 1
            nt_string = ''.join(batch)
            try:
                graph.parse(data=nt_string, format='nt')
                if test:
                    graph = Graph()
            except ParseError as e:
                logging.error(f"in batch: {batch_num}:\npproblematic data:\n\n{batch}\n\n")
                check(batch, batch_num, test=test)

            if batch_num == 5:
                # print(batch)
                pass
            if batch_num % 10 == 0:
                # clear_output()
                
                pass
                # print(f"{batch_num}/:o?")

    return graph

def check(batch, batch_num, test = True):
    graph = Graph()
    for i, line in enumerate(batch):
        try:
            graph.parse(line)
        except Exception as e:
            logging.error(f'in line: {i}:\n{line}\n{e}')
graph=create_new_graph(data_loc, test=False)
# graph = Graph()
# graph.parse(data_loc)

In [2]:
# encoder_map

In [3]:
from datetime import datetime
from typing import defaultdict
node_set = set()
http_set_s = set()
http_set_m = set()
http_set_l = set()

edge_set = set()
string_set_s = set()
string_set_m = set()
string_set_l = set()

image_set = set()
num_set = set()
poly_set = set()
datetime_set = set()
date_set = set()
year_set = set()
point_set = set()


text_edge_set_s = set()
text_edge_set_m = set()
text_edge_set_l = set()

image_edge_set = set()
num_edge_set = set()
spatial_edge_set = set()
temporal_edge_set = set()
encoder_map = defaultdict(list)

i = 0

dtype_set = set()

def is_date(date_string):
    try:
        # datetime.strptime(date_string, '%Y-%m-%d')
        rdflib.term.parse_datetime(date_string)
        return 'datetime'
    except ValueError:
        try:
            rdflib.term.parse_xsd_gyear(date_string)
            return 'year'
        except:
            try:
                rdflib.term.parse_xsd_date(date_string)
                return 'date'
            except:
                return False

def add_str_to_set(string,edge):
    if len(string) < 20:
        string_set_s.add(string)
        text_edge_set_s.add(edge)
    elif len(string) < 50:
        string_set_m.add(string)
        text_edge_set_m.add(edge)
    else:
        string_set_l.add(string)
        text_edge_set_l.add(edge)

for s,p,o in graph:
    i+=1
    pi = p.identifier
    for node in [s,o]:
        ni = node.identifier
        node_set.add(ni)
        try:
            dtype = node.datatype.identifier
            dtype_set.add(dtype)
        except AttributeError:
            dtype = ''
        # if 'http' in ni[:200] and 'geonames' not in ni: #just add geonames to node set I think...
        if 'http' in ni[:200]: #200, because images sometimes have kgbench url attached
            if len(ni) < 20:
                http_set_s.add(''.join(ni.split(':')[1].split('/')[:-1]))
            elif len(ni) < 50:
                http_set_m.add(''.join(ni.split(':')[1].split('/')[:-1]))
            else:
                http_set_l.add(''.join(ni.split(':')[1].split('/')[:-1]))

        else:
            if is_date(ni) and ('Year' in dtype or 'date' in dtype):
                date_type = is_date(ni)
                if date_type == 'datetime':
                    datetime_set.add(ni)
                elif date_type == 'year':
                    year_set.add(ni)
                elif date_type == 'date':
                    date_set.add(ni)
                    
            elif node.isalnum():
                if node.isnumeric():
                    if node.isdigit():
                        num_set.add(int(node.identifier))
                        num_edge_set.add(pi)
                    else:
                        num_set.add(float(node.identifier))
            elif ni.startswith('POINT') or ni.startswith('Point'): #didn't see any points, but according to the paper they can be included.
                point_set.add(ni)
                spatial_edge_set.add(pi)
            elif node.isalpha(): #maybe elif maybe not dunno if it filters out strings with numbers
                add_str_to_set(ni.pi)
            elif ni.startswith('_9j_'):
                image_set.add(ni) #might want to load this to hard drive if memory becomes an issue.
                image_edge_set.add(pi)
                # print(s,p,o) if i < 100 else 0
            elif ni.startswith('POLYGON') or ni.startswith('Polygon'):
                poly_set.add(ni)
                spatial_edge_set.add(pi)
                # print(s,p,o) if i < 100 else 0
                
                temporal_edge_set.add(pi)
            elif ni.lower().startswith('multipolygon'):
                poly_set.add(ni)
                # print(s,p,o) if i < 100 else 0
            elif ni.isascii():
                add_str_to_set(ni,pi)
                # print(s,p,o) if i < 100 else 0
            elif ni.isprintable():
                add_str_to_set(ascii(ni),pi) #don't know if it's necessary, but it probably can't hurt
                # print(s,p,o) if i < 100 else 0
            else: #all that's left seems to be monument stories and property description related text. 
                            #If there's an error later it's probably from here
                add_str_to_set(ascii(ni),pi)
                # print(s,p,o) if i < 100 else 0
                
                # print(ni.isalpha(),ni)
                

    edge_set.add(pi)

    
i

777124

In [4]:
dtypes

NameError: name 'dtypes' is not defined

### maybe coalesce features on literal predicates...

In [75]:
# doubles = []
# literals = set()
# decimals = set()

# # think about how to properly create the feature matrix...
# for s,p,o in graph:
#     si,pi,oi = s.identifier, p.identifier, o.identifier
#     if isinstance(o, rdflib.term.Literal):
#         literals.add(o)
#     if "http://purl.org/dc/terms/" in p:
#         decimals.add((p, o))
#     if s.isnumeric():
#         decimals.append(s)
#     if 'XSD.doubles' in pi:
#         doubles.append(oi)
#     elif 'XSD.float' in pi:
#         floats.append(oi)
#     elif 'XSD.decimal' in pi:
#         # decimals.append(oi)
#         pass
#     elif 'XSD.boolean' in pi:
#         bools.append(oi)# representation handled differently from reals
#     elif 'XSD.time' in pi:
#         times.append(oi)
#     elif 'XSD.date' in pi:
#         dates.append(oi)
#     elif 'XSD.gMonth' in pi:
#         months.append(oi)
        
# print(len(doubles))

0


In [5]:
# literals

# Redo of graph parsing to align with paper method...

### rdflib parsing seems highly inconsistent.

In [135]:
# dtypes = set()
# node_set = set()
# edge_set = set()

# node_dtypes = set()
# instance_types = set()

# #literals:
# point_set = set()
# poly_set = set()
# string_set_s = set()
# string_set_m = set()
# string_set_l = set()
# img_set = set()
# max_str_len = 1000 # for example, lets see what happens to memory...
# bool_set = set()
# num_set = set()
# date_set = set()
# year_set = set()
# uri_set = set() #maybe use a different encoder for uri if possible... 
#                 # in the paper it's not done this way, but I think using bert
#                 # for non-uri strings makes sense, and the proposed method for uri strings...
#                 # also the dimensionality of the uri-safe character vocab is lower than that of any text.
# year_set_parsed = set()

# def validate_type(func, string):
#     try:
#         func(string)
#         return True
#     except:
#         return False

# for s,p,o in graph:
#     si,pi,oi = s.identifier, p.identifier, o.identifier #isinstance(node, rdflib.terms.Literal) vs Uriref does not split literals from nodes
#     for node in [s,o]: #I think only o can be literal

#         literal = node
#         try:
#             dtype = literal.datatype.identifier
#         except:
#             pass #bypass random meaningless error
#         dtypes.add(dtype)
#         li = literal.identifier
#         if dtype == 'http://www.w3.org/2001/XMLSchema#string':
#             if len(li) < 20:
#                 string_set_s.add(li)
#             elif len(li) < 50:
#                 string_set_m.add(li)
#             else:
#                 string_set_l.add(li[:max_str_len])
        # elif dtype =='http://www.opengis.net/ont/geosparql#wktLiteral' or pi == 'http://data.pdok.nl/def/pdok#asWKT-RD': #poly, point
        #     if 'point' in li.lower():
        #         point_set.add(li)
        #     elif 'polygon' in li.lower():
        #         poly_set.add(li)
        # elif ('_9j_' in li): #dtype == 'http://kgbench.info/dt#base64Image' and # seems redundant
        #     img_set.add(li)
        # elif dtype == 'http://www.w3.org/2001/XMLSchema#anyURI' and 'http' in li:
        #     uri_set.add(li)
        # elif dtype == 'http://www.w3.org/2001/XMLSchema#boolean':
        #     bool_set.add(li)
        # elif dtype == 'http://www.w3.org/2001/XMLSchema#date' and validate_type(rdflib.term.parse_xsd_date, li):
        #     date_set.add(li)
        # elif dtype == 'http://www.w3.org/2001/XMLSchema#gYear' and validate_type(rdflib.term.parse_xsd_gyear, li):
        #     year_set.add(li)
        #     # except:
        #     #     print((pi,li,dtype))
        #     # 
        # elif dtype in ['http://www.w3.org/2001/XMLSchema#nonNegativeInteger', 
        #                'http://www.w3.org/2001/XMLSchema#positiveInteger']:
        #     num_set.add(li)
        # else:
        #     node_set.add(li)
        

In [166]:
# bool_set

set()

In [None]:
instance_types

In [None]:
dtypes

### relations mapping literals to nodes

##### http://dbpedia.org/ontology/thumbnail --> img
##### http://www.opengis.net/ont/geosparql#asWKT --> spatial 
##### http://purl.org/dc/terms/created --> temp 
##### http://www.w3.org/2006/vcard/ns#postal-code  --> str  postal code #postal codes don't have a seperate encoder yet
##### http://www.w3.org/2006/vcard/ns#street-address  --> str adress
##### https://data.labs.pdok.nl/rce/def/techniek --> str (technical photo description)
##### http://purl.org/dc/terms/isPartOf --> str rare and unclear what it means
##### https://data.labs.pdok.nl/rce/def/fotograaf --> str personal name
##### http://purl.org/dc/terms/description --> str image description


In [7]:
import torchvision.transforms as trv


# for i, modality_edge_set in enumerate([text_edge_set_s, image_edge_set, num_edge_set, spatial_edge_set, temporal_edge_set]):
#     for edge in modality_edge_set:
#         encoder_map[edge] = i 

# map to encoders later
#^this logic doesn't necessarily work with other datasets, but I just don't understand how to do it differently.

transform_temp = trv.Compose([
    trv.Resize((224, 224)),
    trv.ToTensor(), trv.Normalize(
        mean=[0.5, 0.5, 0.5], 
        std=[0.3, 0.3, 0.3])]) 

In [8]:
import math
import torch
from torch.utils.data import Dataset, DataLoader
from string import printable


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

node_map = {i:node for i,node in enumerate(node_set)}
inv_node_map = {node:i for i,node in enumerate(node_set)}

edge_map = {i:edge for i,edge in enumerate(edge_set)}
inv_edge_map = {edge:i for i,edge in enumerate(edge_set)}


def tokenize_string(s, character_map, max_len=5000):
    #preprocess for text encoder[1](? layer temporal cnn -->embedding_dim)
    s = s[:max_len]
    #character level embeddings
    tokens = [character_map[char] for char in s]
    return tokens #

def encode_image(encoded_str, transform):
    #preprocess for encoder[2](2 layer cnn -->embedding_dim)
    image, img_bytes = decode_base64_image(encoded_str)
    return transform(image), img_bytes

def encode_num(n, max_num, log=False):
    
    # read how it's done exactly.. cat(e(num),e(bool))
    # maybe add log scale if max_num is really high...
    
    v = n/max_num
    v = math.log(v) if log else v
    return torch.tensor((v),dtype=torch.float32, device=device)

def encode_polygon(poly, global_mean_x, global_mean_y, x_max, y_max):
    #preprocess for spatial encoder[3](? layer temporal cnn -->embedding_dim) 
    poly_tensor_x = (global_mean_x-torch.tensor([ point[0] for point in poly], 
                                                dtype=torch.float32, device=device))/x_max
    poly_tensor_y = (global_mean_y-torch.tensor([ point[1] for point in poly], 
                                                dtype=torch.float32, device=device))/y_max
    # print(poly_tensor_x)
    return torch.stack((poly_tensor_x,poly_tensor_y),dim=0)

def encode_point(point, max_x, max_y):
    #preprocess for spatial encoder[3](? layer temporal cnn -->embedding_dim)
    
    point = torch.tensor(point,dtype=torch.float32, device=device)
    div = torch.tensor((max_x,max_y),dtype=torch.float32, device=device)
    return point/div

def encode_date(date):
    #preprocess for temporal encoder[4](? layer ffnn -->embedding_dim)
    def cyclical(num, max_num, epsilon = 1e-8):
        # cyclical: [sine((2pi * X)/max_num_of_cycle) cos((2pi * X)/max_num_of_cycle)]
        return torch.tensor([math.sin((2 * math.pi * num)/max_num)+epsilon, 
                             math.cos((2 * math.pi * num)/max_num)+epsilon],
                            dtype=torch.float32, device=device)
        
    def norm_cent(num):
        # non-cyclical only centuries: normalized from -99 to 99 (-9999 bc to 9999 ac)
        return torch.tensor((num + 99)/198,dtype=torch.float32, device=device).unsqueeze(0)
        
    split_str = date.split('-')
    years_str = split_str[0]
    month_str = split_str[1]
    day_str = split_str[2]
    centuries = norm_cent(int(years_str[:-2]))
    decades = cyclical(int(years_str[-2]), 10)
    years = cyclical(int(years_str[-1]), 10)
    months = cyclical(int(month_str), 12)
    days = cyclical(int(day_str), 31)
    # print((centuries, decades, years, months, days))
    # print(decades,years)
    return torch.cat((centuries, decades, years, months, days), dim=0)

def encode_year(year):
    #preprocess for temporal encoder[4](? layer ffnn -->embedding_dim)
    def cyclical(num, max_num, epsilon = 1e-8):
        # cyclical: [sine((2pi * X)/max_num_of_cycle) cos((2pi * X)/max_num_of_cycle)]
        return torch.tensor([math.sin((2 * math.pi * num)/max_num)+epsilon, 
                             math.cos((2 * math.pi * num)/max_num)+epsilon],
                            dtype=torch.float32, device=device)
        
    def norm_cent(num):
        # non-cyclical only centuries: normalized from -99 to 99 (-9999 bc to 9999 ac)
        return torch.tensor((num + 99)/198,dtype=torch.float32, device=device).unsqueeze(0)
    centuries = norm_cent(int(years_str[:-2]))
    decades = cyclical(int(years_str[-2]), 10)
    years = cyclical(int(years_str[-1]), 10)
    return torch.cat((centuries, decades, years), dim=0)



class TextDataset(Dataset):
    def __init__(self, strings, character_map, max_length=5000):
        self.strings = strings
        self.character_map = character_map
        self.max_length = max_length
        self.pad = False

    def __len__(self):
        return len(self.strings)

    def __getitem__(self, i, pad=True):
        s = self.strings[i]
        tokens = tokenize_string(s, self.character_map, max_len=self.max_length)

        return torch.tensor(tokens, dtype=torch.long, device=device)


class ImageDataset(Dataset):
    def __init__(self, encoded_images, transform):
        self.encoded_images = encoded_images
        self.transform = transform

    def __len__(self):
        return len(self.encoded_images)

    def __getitem__(self, i):
        encoded_str = self.encoded_images[i]
        image, _ = decode_base64_image(encoded_str, log_note=f"inside generator, image nr: {i}")
        image = self.transform(image)
        return image

class NumericalDataset(Dataset):
    def __init__(self, numbers, log_scale=False):
        self.numbers = numbers
        self.log_scale = log_scale
        self.max_num = max(numbers)

    def __len__(self):
        return len(self.numbers)

    def __getitem__(self, i):
        n = self.numbers[i]
        norm = n / self.max_num
        norm = math.log(norm + 1e-8) if self.log_scale else norm
        return torch.tensor([norm], dtype=torch.float32, device=device)

class SpatialDataset(Dataset):
    def __init__(self, spatial_data, global_mean_x, global_mean_y, x_max, y_max):
        self.spatial_data = spatial_data
        self.global_mean_x = global_mean_x
        self.global_mean_y = global_mean_y
        self.x_max = x_max
        self.y_max = y_max
        self.max_len = max([len(data) for data in spatial_data])

    def __len__(self):
        return len(self.spatial_data)

    def __getitem__(self, i):
        spatial = self.spatial_data[i]
        if isinstance(spatial, list):
            x_coords = [(x - self.global_mean_x) / self.x_max for x, y in spatial]
            y_coords = [(y - self.global_mean_y) / self.y_max for x, y in spatial]
            spatial_tensor = torch.tensor(list(zip(x_coords, y_coords)), dtype=torch.float32, device=device)
        elif isinstance(spatial, tuple):
            x, y = spatial
            spatial_tensor = torch.tensor([(x / self.x_max, y / self.y_max)], dtype=torch.float32, device=device)
        else:
            spatial_tensor = torch.zeros((1, 2), dtype=torch.float32, device=device)
        return spatial_tensor


class TemporalDataset(Dataset):
    # todo: check if there are other temporal feature types in the data. i.e. just years etc.
    def __init__(self, dates):
        
        self.dates = dates#torch.stack([get_dates(date) for date in dates], dim=0)
        

    def __len__(self):
        return len(self.dates)

    def __getitem__(self, i):
        date = self.dates[i]
        date_feature = encode_date(date, max_len=500)
        return date_feature

def process_point(point_str, highest_x, highest_y):
    if 'POINT' in point_str:
        point_str = point_str.split('POINT(')[1].split('))')[0]
    elif 'Point' in point_str:
        point_str = point_str.split('Point(')[1].split('))')[0]
    point_list = point_str.strip(')').strip('(').split()
    point = tuple([float(coord) for coord in point_list])
    point_x,point_y = point
    highest_x = point_x if point_x > highest_x else highest_x
    highest_y = point_y if point_y > highest_y else highest_y
    return point, highest_x, highest_y
    

def get_num_data(poly_str, max_x, y_max):
    # if 'POLYGON' in poly_str:
    #     poly_str = poly_str.split('POLYGON ((')[1].split('))')[0]
    # elif 'Polygon' in poly_str:
    #     poly_str = poly_str.split('Polygon ((')[1].split('))')[0]
    # elif 'MULTIPOLYGON' in poly_str:
    #     poly_str = poly_str.split('MULTIPOLYGON (((')[1].split('))')[0]
    # elif 'Multipolygon' in poly_str:
    #     poly_str = poly_str.split('Multipolygon (((')[1].split('))')[0]
    poly_str = poly_str.split('(')[-1].split(')')[0]
    poly_combi_str_list = [poly for poly in poly_str.split(',')]
    try:
        poly_tupled = [(float(poly.split()[0]),float(poly.split()[1])) 
                       for poly in poly_combi_str_list]
    except ValueError:
        poly_tupled = [(float(poly.split()[0].strip(')').strip('(')),
                     float(poly.split()[1].strip(')').strip('('))) 
                    for poly in poly_combi_str_list]
    x_max, y_max = max([x for x,y in poly_tupled]), max([y for x, y in poly_tupled])
    x_max =  x_max if x_max > max_x else max_x
    y_max =  y_max if y_max > max_y else max_y
    x_mean = sum([tup[0] for tup in poly_tupled])/len(poly_tupled)
    y_mean = sum([tup[1] for tup in poly_tupled])/len(poly_tupled)
    return poly_tupled, x_mean, y_mean, x_max, y_max

global_mean_x = 0
global_mean_y = 0

# print(point)
max_x = 0
max_y = 0

points_tupled = []
for i, point in enumerate(point_set):
    point_tupled,max_x,max_y = process_point(point,max_x,max_y)
    points_tupled.append(point_tupled)


polys_tupled = []
x_max, y_max = 0,0
for i, poly in enumerate(poly_set):
    # if i < 100:
    poly_tupled, x_mean, y_mean, x_max, y_max = get_num_data(poly, 
                                                             x_max, y_max)
    global_mean_x += 1
    global_mean_y += 1
    polys_tupled.append(poly_tupled)
i += 1
print(poly_tupled[0])
global_mean_x, global_mean_y = global_mean_x/i, global_mean_y/i


character_map = {char:i for i,char in enumerate(printable)}
character_map['\x7f'] = 101

text_dataset_s = TextDataset(list(string_set_s), character_map)
text_dataset_m = TextDataset(list(string_set_m), character_map)
text_dataset_l = TextDataset(list(string_set_l), character_map)


image_dataset = ImageDataset(list(image_set), transform_temp)

numerical_dataset = NumericalDataset(num_set, log_scale=True)

spatial_dataset = SpatialDataset(polys_tupled + points_tupled, global_mean_x, global_mean_y, x_max, y_max)

temporal_dataset = TemporalDataset(date_set)



  return torch._C._cuda_getDeviceCount() > 0


(3.9192972649193174, 51.64911756496414)


In [30]:
import base64
from PIL import Image
from io import BytesIO

byte_str = """_9j_4AAQSkZJRgABAQAAAQABAAD_2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL_2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL_wAARCADIAMYDASIAAhEBAxEB_8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL_8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4-Tl5ufo6erx8vP09fb3-Pn6_8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL_8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3-Pn6_9oADAMBAAIRAxEAPwCprVoYUDmLGeoYjIA9hWBHbLOwCRlSGBJPrXea6kQtGYtiQ8Llep5zgGuXdLxpIwwGwbQ2ByR_n-dAHuelqPsUYZgW2jJ7nAq2QAQTwSKpacFFlGTkcA8_SrDNkjJyBmgCpdOoYnOSR2NY05XyJGOSCMDPrV-4fbkBSQQeO5rJuZWNm25SCG5HoKAPPvF00kFhIyShCXA4GSfb2HSuStNd1C3Kxm5MqZIYE5yD1Fdb4vgFzp7omWYygqAOSP6VwLWDxZAjJweSD0oA7Ky1xL6QCX5cgABiARgngH0res9TidnCbgFIBU8ZA64_GvKBJNbSBgoGCRhhkHn-VdBpniWBGVZ0EJGASASPrxzQB61azpLaI0irJGOAc4I9jj0p1xZJMQNxKgcIQRg8etcrpXiSyRZJSzJGynCA7txJx2zg8-oq_wD8JLbIzBTK4BG1wMjp69aAJLiyltc4BZTgYx07ZH481C4LEA8kgEEGtq31KKeMAYdmAyufu57ntVTUEBuA6qFUAnBA4PtigCmHBBCtgAgDIOcYwMfrUlvKDqAXAII2hSCSMY5yDUTqyKDgHcwOPTNOtCZZyxKoRnGBjOBg9-en60Aa0wICFAMAEnH-fapLZmcgkAYAHOe57flUaxBII95IYgkHcDk5B59OaW2YNcADKgEnk-h60AOmJNsGOWJzk9-uetcdrj7JyMndnccDggdK7PG5QoAA5GB0_wA81yevRIt0wODwcd8UAGm3oRyobI8ssQTnJ_zj8qivEMkiYYH5QcgAc4GR_n0rKtWaEs4LbwflIIBAJx_WtX5nRmAYkLgcYPAwcfiD-dAE-naY76hbEszRZ3tkcE4P5V1kduiXKFUUHBIwQNvXqa57RbgGOMHJG3kknAORgc9_8K1Uu5FuRIylQRgnA4OB3H4UAdCoZgUds4OT06_5NFZ0d-H3bNxxjIooAzL-3kn8lQqyEsCy4Bz659O3NcncQONRk5JjVwAFOCef5cela_8AwkFoQWt5ltiyhSuAcnjJPt7VWs7yB7gKLhZVZsvgAsR2Oeg7UAer2ZBtVXp8ox-VTFCTkjGPf2pLJM24YDGQB9OKlQKM7iSTnmgDLuBtBLcnnGKxZ2L20gALAHJFbl8QvBxxnk1izyKljIWcBC2Dgc0AcNrYLxNgEAMBgEA_XmuUFtOGkxMucZIYZGOeP89q63UozcbgoPUYOcAjOP51jX2nsqloywjZcYAxzwPrgmgDmJ8u7EwxkcE8cD6f_XrPn2rlRCpkJyACQAPfNdNbwJEHViTIAcjsD2P5fzqr_ZrvJ5rKWDHO4nt1HXmgDHtoNRQh4Qy5BJUcD8a7HQor29CG6XYgx0AJPbNX9O0xBEiqoGBnGOhPbPX8637OzEAUbQQDk57DigC5bW8VtDtyST6HI6UStvUDIyTkA8ggdKmyHIXJKjoM85qKQA7ARgAkD86AKhGXJfHyYOMc9-1Fkn-kKpcE4J4OM8_T3qaQHLYOGxgEZOf85NNjQxXEbksSCSBwSBQBo7chVA4UnJI6f_XqRIsThy4y3I54HbH54pzozRgHAUHJA5yfT-tMHDAZJAwcnvzQBHI5QMSCcA4zxk1wuoXbvdNwc9eDxjH-NdxOn3hwck8Z6Vw2rWrrcbzjBzyB6mgBkE0krBlKhVbGfUn1_KtyyjJt7gKATjjJwDyelYWnKoLAkABgQCePX866nSlIt5CVAzntwOucdaAOft9WFuk0E2NhIAOf0_L-VbI1RbqERKoIGQcnkDoffNZlzpyT3JthuxI4w6kgryDjHfua2G0qOwmjRGIBTcWducnqMeg5NAE1o-IF-c7sc7TtI-vBopthG8YO4qAckHkcZ_H_ACKKAPLriweQhrS9iJCgnLgZP0NWbeW6tLu2AuFk3SqCAMHJI7irN14ahQhopmjJzwRjAziorLRNShvrdo5VeMTLnAzxkelAH0fZA_YQAcEgEflSsCGGBnA6VPbbBahRxwD-lNc4J2gE0AZU4ad2DAAEYGayLu3AtGBGRuzjGefpW1dy-VC77SccgDrWPJKstmxUkFjkjuPagDidUPkAnIBzxkkj6_8A1vWs4XaSRRqZA5IJIzgD_wCv9PWpPGtwYLcusY2BwMEnuPauStNWuVDMkWRjHCk9e59DyeaAOgjgtGuCxMjEEq2D1HYZ9aswWk8sgiYkorDGeSBwM-9Q6FCb2RNwdCSNzEYU8dgev1roruJrNS6qpjYAZBGRxnnvQBHDETIqIxIB6AEYx-H1rSm2IwYtsA55_wA9DVe2urRCzLMuABg8c-9aA1XS1jBleDkDO4D8snqOOg6UAMidHUMpBBOMgfnTZcBeSc5yABVK78R6NEpAmAkxkBFIB9sHj16d6gTxLpM8YZ5wpBGBtJIx1J_L3oAunCyKWHGM5zzj0qzbIjT5JAAHUnr16EcfyrBfxJYvIxaUshBw4UYB9wKINatyGf7UUyAOAcMO-ff8aAOtX94MA4Ge3YAdB2obbwrEqCQRz29KwV8TafF5cTSFVB3b8_eH8s8frVhNZ0y6mVVvFQE5JckD_P6UAajRCQkEdSc_zrE1TTRKEKKFAJJweuMf4Vt27xyoxilVweFZSMNzjP6U6RGdSuSMjBxQBw6WgEgVVJDNk7TyQSAfrXQ2YP2aRQAQHIx0yeMfTtUMtuttIrlc4JA-mc_jVuBBBb5YnJYsOSOgx0oAoFFGtWyPlQ7EjAyc44wPwH4Vf1uymub6NAFCqocYGSQc5HH41Lp0cLX7zsrb1UnLjhSDjIz35rUkNtLKJDPGqBSFIccj1x6f40AYltMqWy-ZIFUcBgaK6SCOzWBX8xNrc7lIBJ_PpRQB5VqEzSlsNkkAZPYDpUOnorXtv5mQ3mKARxxkVau1XzioAJPGe461Lp8KfaoAACxkXPHTkdKAPa1GYwqnAAA-tWpAI4wAAKhjQJGCMYwOnrSPMWG1h0PBoAzZwSzK2cAE1z8rgI4AIBY4OO1dHclRLIpODt4Getc1cYS2dt2FBJOfSgDifEkDX6GCJwp8wEE9wO3esCDw46c3V6ozwQpyRn8a7DUY1eFGiBVg3XHXOMACkGnLcG2ntUMwEoaVGYKAARwc9e3NAFeDwJaqimeeQqD124BGO-DmnHwJYOw8u-ZI2OAGGST6A556V03203NvMFAQo-0FwMHHfmqxhUoAQTj5sEcAjg4_P9KAMaLwLYzBdt6mSeSYjg_gDxXFap4A8Tf8JC9vBqKG1yGV1kICqTgHaT146Z5PevXLWCRIgzgjJwATz-P4VBMxGvi3U4Row7HHoCcfSgDkLH4XQxkNqGt3N2cYBWMKB-BJz-NaD_C_w9LIHW5vwx4-SRVH5BcV2JQRgEZzjgdv8iobncYSqAgjGUI9PTvQByjfCrRCcLc3wToQsqjJz3G3rU5-GWhNOHie7j2oAUMoZTjjJBGcn2NdGZ1ghBLgA4GSDnJ4xjFSoMxl5M4PAwefxoA5-P4daa4wLmUDoVxkfzqMfDCyG4rfTD0BX_69dZbzqkUjg7huIwDz_npUH2p9rEsSxPHPSgDC0zwuNOMkUWo3AIblSAFyeM9xUkr6vbTlZ4Z5yACro-VYZPAAAAPscVrQvvkk3Pk5HfHGBV6aQCNMHr69DQBw95PrETq4sBKgbJjBKsD9STn8qq3Or-Jb0quk-GCrIAGmuJQFUkc4wRmvQTtBQ4BBOMEdsdRVC9uGhuAUyAy5II47frQByg8MeJtSEY1PxDHCCc-XFESFPcA5HPH6VcT4eCQBLnxJfsDztQAD8yTWnHqEZwC-9xIMKR1GOxJ9-aspeIhZi6tjg44yMDjAzjigCrB8N9IjT97eX0oPTdKAf0FFbUV-HiDJgg_3T_nvmigDy-WTzDuAGCeuK19JjUGKYgMWdQMgYGCOayo1IlZwgKrnI64Jqzp-rG3vhbvEDG0q4GeQSQOD_SgD2UuyxgYyCB0qKUuHO0fU1dRR5CMQAcDpUEhIZjkdOM0AY-oiVpiFIAx1z1rn5gXt5Y2GcHJz39s1vX0reYflBBBHT2rGlRZLVwFIyCRkfrQBgy4EpgdwiPgnknAGD2pNLuUUtbAqxWQggk8qec4P1pFHnapBEMqCwBIGTkketaP9mwW801wmOUCjB6kHv-A_SgBt5P5csazkBpQAY15x6HPvj-VWYYgzqwbCAYCEDj2PtWPqSQ31xHHvKoWUA9SOfXj863Ft_L2YOQDjr79eaALsmSEA7Hg1HDaB_FqSFSwMPzDB44AAz78_lVTxB4gsvCmjm-uwJpHGIYRwWP8AQV5HFr_jrxffzXOlzPaQ_dLRHy0AHQZOSSM__qoA9-lEMShWBJYbcAZ59M-tY98EjkVULgHklxk88YyK8ug1_wCIXht0k1J11azBy8bkMwHcggZBx3Oa9JtNVsPFHhyHU9McgD5JEcYZGGMqR2I-voe9AFc3RRmLIWC5wPX3_Ko7fWXM7RtCTAYwxQnlST9fcH2p0iskTyl8KoJJGMgen1zx-Nc5bKZL5353bCRg9QD0OPb-VAHZhgbePa2QcnGeuKbzk5GCCcdwKhH7uCIAEfLkgY_z61Ij5GCeoIBI6GgB1q5F04JwBjGRzjFXZ2KwgnHHfHNZdo-bpwAcAAdfatCVQYgCuR0x2xigB7znfGuVwzYyTz0zxWdrhEXlkE52gEg4xxTbmWQavYqFzGWAJHqRj-WTTPE52CNcgZAPPIPAoA560lVZi2CXVjh-R6Dj6D3q7e3bKCY5C5B4wACQexwfX-tVFiURs6AEnABBzg9Tmob1VWUMpYBlDKD1PIyD7jHb696ANqyuQjFWkwhX5doyRz3_ADorMtL3zIy7oEQ9CACepooAx21CWJCAgVGO_O3rx6n05_GqVoxfVLeRzwJVPTknIqzNPcyr5TsPLjXy1BAHHfHrz361UtgF1W2GcKZVJyOnIoA-hoXzaq2OwFRyYJOcY9KISGt-CMAA8VG7bcsBnAoAxtQAFyoySMnHoOKyZUcIz-dgg8Dtit6coSwYAEngmsS7jABI5OeAelAHNzvJBd-erhGJJOD1wRjp0rUjRRapKcgMMlWJ4J579uaxbl2efaACxBBA_qO4raeNliigLEBQMkYO7Hrn8aAKy2SGTc4HlnG1gDkY5AH9a6GzgErjYjMccDGM-9Z6OGZIVPAOSB2_Lt2q_YzeVcKpY89R2x6f59KAPJfinLLe-JIrTcfLiAjUHpkkD-ddpPZDw74WhNlbKQpMQHphWJJx1JCk_U5rJ8c6E11fSzRj94p3KRzwec_gea67wtqemeItF-w6gSk8ZBkQEho3Bzkd8Hkg4xgkHjqAc54duLx9cTTbpo51lI3FgDgEEjBAHcDg9Rnpjm3ounf2B8TL3SLZglvfW4uRGR8odSAePcN-g9K6210HQtIuf7TN79oaMloxhAqnBGTtABIBIyemTjFYmiRzav4yu9cmUiMx-TBkYO3IyfXB4A9QM96ANHXtHBicAMDISSVOcEcmua0-xeO4mTBXC9xkkD-XfvXb65LNFFK0eCVBwDnGeAOPbn86wrI4uPML7iRg44z60ARynBQAfwgZJwP88VJuGAeMd8GmXQAkTAHHJx25NSgLtB4CkYxQBDagfbSoBwGAOexwK13AIQLxwAfrzWRagC-K9Bv6gcHArXcAAgLjB_HvQBXESvPCWAGGJHA5IBx_OsnxakjtGMcAAjnrwK2clWjxwCSSTnGMGsPxVcmO9RGzhkGMDuABQBjwGQsWwQpYqDg4PAB6e1WtTslMQlDEKcYTGeP8emahsHWchSWBUnaM-uOfb_8AVW1d7TZEAZAJGc9T3z-tAHOW8RMRUOAQRnBI7fSipLC4WElQOSMsQepBI_xooAp3SIl0zQ7SgO4HHGM8fhxWed0uqI8vJMq8gY5JHarD7y5VlK7TyCc5IJpQmbqAngh1JGehzkZ9-KAPcICFtyMcEDBprbQ5Gc8cg0tuS1vk4JAH8qSYguMYzigDKvEY3AAUYDZ_Ssm9z5DErjkjmtuUk78kbh6Vzc4mfzlZiCSSvpigDmzg3Dy7gpXgBh1raQYjjV1AZUHAJJzgf_X_ADrFtrSe51YW2ThuWJPQZ7fhmulMAWVizKrAZAI569PwoAdYvFIuWARyOFyCce_61oJBGzld-w5GDtz2HSoEhiYq0eARjIBHH19KeAxnywYAjG7jGKAE1G0hlljUMWcDGD9O5_pWFd6Tp5u8snkzjhXTIb8CMED8a6SQK8m_aSFAwR7cislw82thMjYIy-Mck4wDQBBb6PLPIoupZnRSCN7EgD6Enn_Oa6nTESIiJRsVMEYI5Oe4x7Vn2xkCCQ5Jxzk9B9Kv6W58-QNg4GARwefb6UAalzGs4KlQT1yRwR6D8656a0SCVkVBz0JGD_P2rpRgq4wxIGAD0z64rGuFMmCSFIGTnJJPegDnr_jZgAAg8-hyT_WmW77jtA6HIB607VHCRpgA4bBHsf8A9VVLVyZDweBnI_D_AOvQBPA_-luTgEuSQR9OtbGcwgtyTjPGaw7cj7XLnA-bufYGt7JKxkYycAemT1NAEUxYKhUjKg8Z7YJrmfFrYuoSSNwjUknr0HH6V10sQ2gHAG04xxxgiuH8dSCPU4wBgKqjB6YwOaAMW2d3vgq5G7gEHpXTXl25sU5JB5OT1PTp-HSuQhn2SKwIBwOhzn1rXmuDNp5UMRkAggAj_PNAFMTyJIRtzwc4A65oqJXyhLEE8DBbGOvHQ0UAWnX5s55zj1z35pYQXuYycZLA8jBzxnH5VMyBQWBDAHJA9cf4VWtnH2uFg27LjcAehyRj6UAe2wACyDg8lQMH6VBO-CCOoB6VNAVFkMnOAP5VDOSq7woCkdRQBkMSZmkZiF5BB6A1kXTgMSXHzEkYPJArauCRHIpiIJ5VwM596569EZkO4t8nTA65oAztHM8uqzrE4RihwzDOMEehrdjsJWcG5vWZhk_IgAPtznA9-tYmkoBrkxVTtCYJB6ZNdGAThiAB2Gc5oAcYGQHynAU9HJyBgdcAe2MUrl1iYAxM4GRgEfiTz2poQiAgMwUAggcCo8Mcgc5wMYyfQUAPheZwN8CKmc5WTJ-uCB-VV7idLfWYgTgSxMqnryCM8-wq0nmCQKAy57YwK5Dx2jNc6SsMzROWkBIYgtkAY46igDrUJLIsYYBhknOf0q7o5jieYzykMOVDdCCfp-lVLfMcaErkkc-_FSAruQsuTxg9fb_P1oA1LW5K3bxlNoK5Uk5PXOcAfX8qhvJR5hCgg55BGR-FMYkSRys2ABgFQASc9-vHNSXUYIEoOVYc4GDk96AOZ1VCQwweDkkcDt_jVS0jIYE_ebAyTWlqSErnB7jHSqdsBhQMg5x1oAbbD_TpRkj5hkD6CtxcFYgRwrZI9cHNZFuP9KlbGMsMAjnPTNa0IBYAEAY4A6mgCad9zjgcAjr2xXBePoib5SQfmjB_QV3km0FQc7iME_nXG-Oj5t8owAVjAx3HT_EUAcUHYeWCDhVxkdK0hLm32oVIwCCT1rNMXzkkE7R0PQ-1Wo0cxAAZK98dOPT04oAmjiVnYM2G7lcjP5fU0U6CNmkK-aVOMkD5fSigCxM0sSBIvmLMcnpxgA5pbNSJIQUBHmAcd-eue_Jz-FWJY0L_ALzcGBJBXg4xjp3qIPCtxCCGG2QZA4HA_nmgD2uEAxbTzwP5VBeHYqKoBGe_Sp4zmJMAAlRx-FQ3MeWG7nHIAoAxWjeaVpDLtHKlc8dK5e9IgecB1YHqTzmuqzGu87dxBOQfpXLXEKSpO7AqQx474oAbozql5J8wIMeCcdDng1rM4RgY5CW6H0ye4rnNJB-3zAE4CEAE9eRWnI4QIoJJBz6_h9KANe0lRVCkkt0JxgGrGGLMMkDIIJOTWXASdhOBk5I61sI6GY_KR8o9hmgBlpKsVwzOGYkEcnt-Ncr4wSWbVNNmRFJjdjjgDAIODXUbQ84AAIBxisXxLKkUungKGaS7EPPoRg_XoKAN0wKIo0YcgAjHXpSOpxt3M3OASelXXjTgOSSBweuR9KhbETEsQT1475oACSIAwwcgEjPJFWY2D2xXqRyP8KqB9y5yCccKeePpmpreQGMrgLuGcY6Y7CgDJvo2JJY8AZwf6VSt0YKBzwSCR2rSvFUsrMpBAIHJz6-tUIgY3Y88HIyOvagBFYC5myQQGA4-mauQNsuY-Tg8ZxWci_vbgHgkg57cir6DLBsc8HpjGf8A9VAF-Rlfk8kZI_OuI8Zyg6gM8BlAPqOB_gK69nwSSTkdOn-e1cR4vJfVgoOCoHGOvGKAMD5CgUgZGSW5PpWlaRo0bBhnnAI6Gs_DNgE5ABBA4AHFa1ghMXABGAAfx45oAYYRnc77AMqOOvSitKWzZyGABJ5JooAydQlMd0CgySODjgA9s_nzTYVaQRSkciTIB6gcf1FToFeaQSncCOCQO2QP6frTo5RFIUCbiWAUk9OefxxQB7NAR9nRhwSgP44qNpQ5IxyOuaLUjyExk5QZJPtTZAr5U8E9cdaAMe4TEsjEggkYGOlc5dwStJMArNkZ4HQCuouUwrFmHtjvXO3LFJHbeQdpGAcD3FAGNpoQao42ksUIAz06VfYg8EqR39RWbpjH-3CCDjBAJ6cVpMMTMAc5OR1NAFqE4UAckDJ46-hrVspiYyhySQec81jRsRIrrkZAGen6VesmxICp55yT_OgDSjQPONwIUn5iQcjPcetYHjGREXSdsQZv7Rj5JwQCDzx6YHFdBnC5bI78d_wrnfGBfzNKRl-YXiHB9CD_AI0AdKjkRAMA2CRknJHNVp3KPhjuRgcEnOMdv0qxLF5WCDjjkjgZ71RdGAcL82CGXJ69jigCxEVOC3I6A44PH_66toFRCQcEcHPU1Tt8sRwQR68DHcGryKBEVPIxx6DtxQBl3ODGGIOAc5B_D-tVgOCSMEEE49P_ANdWpwDGQRjjn8OapuxABX7p7DpntQBHGm6-uABwApIPuBVxwEZgTgYAB_z06Vn6bIZNUuxnjC9T7f8A1qtzk7mBGCAPw6_40AWR84U5wD69-lcV40eNdVDJnBUZ-oArsUIKovOAuQSe3AriPGcmdUG5eABwPp_n86AMmOMtliGK8ngdPrWxpyM0YBO07gMcDjk4_X_OKw4JQIgNwBbIXJP-eK6HSc7kYuGAAGR0P50AbccYX5im4Hsxxg0VoW0e9dwxzzyfpRQByJhBt8gkkN1z04659v61TkRjeQoo4JyAByTyR_OtOLatoyBcsVBbkcYxge_X9KiEZgmt3GDtAc4HI6nj_PagD1WzBW2jDdQg_lTLhgGB3YIPNELs1rGxySYwSR34qKcEorEjk80AZN5cogJVizE845ArDAe4lkDKDgHknqTW3JGyysAi7MHIA5rI2souZApXaSc-g7frQBjWEZTVlRlO4FiDjqO_1rWnQlt2cEHr1rMsZ2l1mAjaWUMCN2DjFa1ywMhCqQcYxn2oAZEWwCSenPH9K2baJY4POdtxI6AdDWKSI488nJ6Z_CtGzuGnjCMc4GQQOvYZoA1YmV2BIJJBGMVzXihGN7pxUqQLkMQRk9COD6D-tdJDyuQSADkA9qxdfhL6vYKW5UEgcDOc9vagDoZAskRZlySAeQB9c1mzqABg4PYir28tEA2CRnPNVLrBgZtqgggnHUD1oAIHJUAkFhxx1I9frV23dslWyc5OSOlZyASmM8IwGeD1561owsdo3DJ6AgYNAFKcEF-gIzx7YrOBYRDCjIGRz6Vr3SBmkzj8Py_pWaqBFk5zgnnrnnrQBm6a5TXLsE9cdenQVozEgyMDgEA5-hHb6GsaFwmuXDA4BwAcHqFFakpxbswIIPHPXtk0ALbu_nxoSBlSST0xkcY_GuR8cFjq7ZIAUKMYwDkZrrrIJJcRluXVcjmuS8bqTrMuQRhEOc5zwBmgDAiVS6bipAY8Z5HGa6bSAGwAASCDjBPArmrcB2Vs4IPI9hjNdZpEStIhj3AAAMCfb_8AUMfWgDq7ZC0IG5RjHXiim2cohJy-PlA5OP8APSigDkZFMfMYP3CrAdzzz-ff2FSQJC88bOFCAqm0ZzjIyT-v50y3AMSkZACEHjp0Ax-RocBGSQAEA44OckduOnSgD1LYEKquAoXAFMkRgoJAx24pLN_Ns42Ay5QHk-1WhIRGA4BGOBigDBeNjcyYO3PHWuduWl-13ETMSCcnPANddOIDcFkBB7gniuS1AxDUXwWJI5A7HtQBiaaBH4ijCkHBYEEYHT9a6GdC0mQADjPWub04s_iVDllAZie-cCuokwXBPBJxwaAKDucELgBegx0_xrR05HMRcgBCCCc8_wCf8KoTc8KxOTkZFaOnkhNgAwBycHNAGvakhlU5AOMg85rJ1nP_AAkVjhTtAILE8DOcAe5wfyrat0AXKgk46kflWB4hZ01zSo0Bbe7HOem0d_wJoA2GJVAABg8jFRYbaXJ5x1Izn8KlcNgNjAwRnqPWow-FAPcccc-tAFcCKNwzkD_ZI4z7f5xV2MkxkRt3yAc8Zqu6biCuNwPGen-etLkxgsOOMjA7elAFmUZU46lckjpmsmciK4PAIPJxxkf5NalvKJY1KgYwRjr3_wD11m3ykOc8HHB6ZoA5l5MaxMpIBDEc8EgYH8hWmtyDYjdjJPp_n0rFu2KavIRk85HAOeAP8amWUG1KAgMoBweuCcn_AD70Aa2lSGSfJPCqeMc54zWF4yKNqcyk4HlKASehwa0tFlczOq5OUbAP1_xH61i-L2YavKgA-ZQRkY59c0AYFlKMgE5IBwQOT_nFdZo04VkGWwBk5xjJ_wD1CuOgRVjVsZCk5Oeo_wA5rf0kSKwdWG3oAQCD6YoA7SMlnATagC-nvRT9LhMkQJ2jg5785ooA5-MRSQF1XaMBeDx0yT9elVgpVgRkFSMn1yRRRQB6fpYzp8JOT-7Bz36U922IpJIzxRRQBm3ETPcBgxAPp1rn71YY744Rmkwdx7Z7GiigDA0_MevxsCDvLblHGODW3dTlGChhgDJBHXOKKKAIC5bZjjGOM9a1dMYZZTjJIyOgxRRQB0cO3aBtyAQcg859q53xLIqapo8-duZHUYzznA_XI_OiigDTYBosYPQdO1VnQh1-Y8DnB7UUUAPTJbBBOPTvU7LuRjjIAOOcGiigBli-xguCMEg5A5qrqkREm4kAEgAHnPrRRQBxGpuPtbsMZDkEk_jgn8aapYBSqkgxDg5IABzx-RH50UUAbOgBmm3YGAh-YHrzWX4xjaTWX2jBWJWGe4AB_wDrUUUAc_bxJuIJKgA844Pt7cGui0y0CSAFF2smAMHGcg5FFFAHV2rFYwoC9MjKnpRRRQB__9k=
https://data.labs.pdok.nl/rce/id/image/20324688 http://dbpedia.org/ontology/thumbnail _9j_4AAQSkZJRgABAQAAAQABAAD_2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL_2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL_wAARCADIAIIDASIAAhEBAxEB_8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL_8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4-Tl5ufo6erx8vP09fb3-Pn6_8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL_8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3-Pn6_9oADAMBAAIRAxEAPwD3FEwAD1pWjXHQVI4A5HWoyw6UAIqD0GRUyog5CgH1xUasM0_eMUAI6K4wyg_Wo1tYgPu5J9TT9-TTwc9KAK7QoGIAwDxTwFUBQOKkwST6U3ABoAjliVkwOD2qt5DknI4q8Oa5_wAT-IY9FtCiENdyDCJ6D1NAGL4u8QjTYGs7Vh9rcYJBzsB7_Wue8Eau1pfvYTufLuDuQt2f_wCuP1ArDmeS5neWZmeRzliTyc96fcSmWWGVVCvGoGV4JIyc_wAvyoA9cxTWqloOprq2lxzEgzKAsgHqO_49a0CtAEBFAXkE-tSFRnmpIkBOT0FADfLPoaKtbB6UUAX24UkmoAfmqVs4NNGDyRzQAxiAeKaXOetDkbqYTQAomHmlOdwUHOOMHI_pU6ydKy0nRtUnhAG-OKNmPf5i3f8A4DVwE0AWi9NDA1EDWNq3iKDSr6O3lzl03AgZA5xyaALmta1Do1kZXIaVuI0zyx_wryu-up768a5uD5kjnJOeB7D0q3qt_Lq2pPNI4ZQSEA6AVTRCJCGOADjpxQARPaxSFrmFnQA5VDg59RyOlS3UWlyRb7OabzMBvKlXsevPTjND20bxusrkIc4IGSPwqJ7aJFBjuUdVXBUghhn2P0oA0PDOo_2XqYVyRBOQrDPQ9j7V6QVyMgjB6Yrx58odygk9RXovhXVv7T0sRuf9IgG1gT1GODQBrkYpyvjtSsOTj1phGDQBLv8AeiouaKANQuCKj3EA4qEOxPFLlsZwcUAOYkmmcnik3c0uc96AOe06Yv411hQcqYYlA_3Qc_qxroxXH6BL5njDUW4xIJCPoGUD9K6q7uorG2aeZgqqMnPf2oAqa9qjaRpE11EqPOoHlRscBjkZHUdj_L1rzZr-bWZZL67eOOVl3LEWwxGOcDvUPiTXbrWdQUIMxB-ASCFUEEk-5yQPrmn2EELxQy3N15aALHwMlVCjnH-etADbdITDMZCQ-P3Z56_hUYDYwGIJA61PcW0UUzC2uVnjGMOBjI-lNiVsgEkL1JI7UASqG8vDEHtwKR4cZLKcEdaeXJbZEQoPcjJ_-tUU0k4IaZ_MQALuAwVwMD-VAEThcH0HtVrRNSbStWjmBJjYhXUdwf69D-FVZyQBggg9D61FII1t0K5EwYk8nHsaAPXg6SruRgwPORSEc1zfgiWafTrgzPI4DgrvJIAI7A9PwrpiDQA3FFPxRQAJOiziIn5ypYDHYED-oqwTkVyXizVrrRjBPabN5YKwdcgggk_qBWRB491EACW0t3HfGQf5mgD0Bl5z60ydjHbSuOCqE5-gJrkofHyEATaey-6Sg_oR_Wp5_G-nyWUyrDOJWQhVcDaSRjkjNAHN-FL9YvEcEjzjbIsvmEg9ME55HqB-VR-LPEc-q3H2W0BWEZwewHcn-grCjgFsW8rYbplAJycRqARk_mc9M_TObCWCz6cEtXkllZjuCqckg4P54NAEAt4YtOR4ZtzswJIPJ-Yd_ersSg2kZIOc9cegAqeLSEiso4bmZY9oAKINzcHp1wPz_CnSLFH5aQoyRjgZbJPueAM_QCgCvEuCxI4B6A9atZJUKOQeT_hUcIGCQAQDzxVhMCTAAwBnp7_zoAswQQOphLDfIvBzgA9h9c1XvLKeBSJ4yMA4AI5x1P0qSQw7WJA8zggY55681HJK11eqbp2CGPCkHABA6d-o_lQBQaIrHsyMHleenqKi2B5ER3CAsAXPQcjk1cn25jZCSucYPWqt2MxkgDBBHFAHpHhW3SDRUVZUlO4guhyCB0rZIwa5zwEAdClAGMTnjrjgV05Q0AR4op-2igDhPHLrJKsYIyrIcZ68NzXLrEDgj0rfvydTu577a6RqmcZAIUE8EEHnn8KxpLhIzkmEKThTIuCT2GaAGpbtKxWMAkEAD1JIGP1qaTS7yAgtAV5wC5AA9STSRkySoAEUlgFMRyWOQQAPTuT2HuQDYnS5KmBy4JwSHyDx7HmgCCOGytoJYipuJJANzZwM4OeepHOKkS4cRiKMLFHyAiDAI9-5_GqrQujEOxHepUQlgQSQKAJEQliSc47USg7o-gAJwKsJEAec5NR3AUGMA5O7GOlAC2ZwrEAHJPapG2-btUAEg4z9aZageUSOMseD9acQxugQMkqTx9aAElRngBXBkGcgHAxUAiLMglB2EjkAcHtmrCA5YFSMdhTST5ZOTgd8UAQXCAuEGQAfTt7VXuogISQQQKtS8mPJyeuc1FdAeQ5yOBQBueFdeXSAba5X_R5WDBx1UkAcjuOK9BjkjniWSJwyMMgg5BryLZmBDgnAGDWpouvXOkTbTuktiRujJ6e49DQB6ViistPFOklFPnNyPSigDymWdbmeTyw6BmBwGOADgkficn8aW6gD28YLkBW3YzyfYZ_nVK0LYG_gkAA9zg8Z-oIrWsoIk3TXBLuHICZyR6D2GcUAO0OwL3k0-zywWByuRjAB4PX0-pPNdNeWkA05btLtmugACPMyRzzg9ax4ZQ8QUgALkADjk5Ofrzj8BVOaV8RqowC2Rxzg0AaAub4glb6YqTxuCtx-INU77X20-VIp5Ecv3Nuhx6E8D9KtqcADPt7Vxnid2bX4QArAKvXHHNAHaLczlQfJs2BAPEJGR-DUzZJe3cNskEMchJwVY4PtyeKW3lP2ePGMbQP0qG4mZZ4njYqwYEEDkHNAE72ktjK0EwG8HJAORzz1qFiROpGeFIqxHcyzM7zyM7gkAtyeOlTJA9xMXjhdwFwSoyAaAKSSMC5GQDURci3Knvk1p_Y5gz7reQD12Gq5tmEDFoXBGeqmgClKcmIAdgDx7VFdoDA_GMDNW7hEiSNnBUDGSeAOKpX2oWsUDGOVHccgD5gSOe30oAtQQSvaoVQsAB0pkltcA5ETj8OlQ3uvaZcukqwvA4UBgo4JHcdKz5NegXlFuDj_AGiP60Aaeyb-63_fJorK_wCElH924_7-_wD1qKALUTxNexrtdcnchxgEYXBxj2PFW7VTJLMSSSZSSSeetZ1mf9NgYEABccdhhsfyq_Yjd5jYyTKRn8TQBYUqpKhg2CMbSCDwOQR1HHX3ps5AEBAxk8nHtT8AM6joHxj04qObpbjoQeKALSgEDBxnnr_9aqF_4XsdTvY72XU2hkVQCn2csOD6jrV5QOCMk1IBnqKADykhUIkgZFGA54BHrg1XlAaeJQRjI5z71l-KgRpqEMwO44wcdqzfCe5YZCzMx80HJJOOlAHSX88tjp9zcQpE8ykBBICVySOwIJ4z3qPw9qd5qcE5nWGJ1YbTBuXjnrknOeKh1tydLuF3hQXHJ6DkVW8Ms0UE4DhuRkg8d_6UAdSlyImKSXVwXUZYCQjA9etC6qvmrEtxc7nXcoM3UHoep9DXnfi-9uIL-MwyuhZQGw2Mj0NVVv4gHLGTdAoBOTnBOKAPQtW1kPpc5MskkZPlkFwec46Y_rXGtt-YADB6c4rmo7yU6oI1kbyXkyUPQ5710BcYOKAEYgKQDk-hHFViAepPPapGPOM_Wm5HX0oAbsHoKKdn_OKKANRbu7tLnIVJArZZSM8EnH8zW5pU9lPCC0jW7mTJyMqSSeB3FVobOe7BmW2IflSOhOMjn_Peon0S_WRJoIWR85IB4JoA35dNuI2kcJ5gMhAMZDAcDGcHIz71XktLl2hAtpjgnkRmq1omt2kUiokis7hmIYjP5Vau7zVnAjEtwgJAJLkdj05oAuJYXeM_Zpsf7hrkPEniWfRdQNsQE46OMEdPX61ufbdTRAGnmyc4XzjzjqaydT0q31W4W4vLOO4uCoyzuenvyKAOZu_Fg1KPyblwFJ42gZB9qbZa-mmLIiOMltwDDnHarOu6FY2ot5La0jgKgk7CTnPTqTTtF0aC9jE01rDN8wUmTqBj0_GgAbxKdVtJ7ZwpBAbI4III_wAa2_DJBtZSvcjnpk85_lVu10iHTj5lpY6coZQH3oCDg55BFXC8s2UtxaRqMcLGFye54HI5oA4vxmMalbjGQQKrGJGa7KgjcF_ABq6jWvCjamy3L30SOmAABkE1S1Cym0u0aUSQozZAZSGJODwAfYGgDjI3U6spA6ODz6YrolkycHtXIWyTTXjTyMW2tkkd-a6K3kZzkUAX-uDjikJwOhxTcsCOh9qUnjBzQA7cPX9KKblaKAO08L6sl5JNG42nIbGeeQB_MH866XCEYU5P1rza1uzaTRyQoqELgHkjtV2XxLqRbAuVUdchQf6UAd0yEkYOMg9Cev8AjWHq8WpowktWDquSQV56Vzra7qL4BvXyQORjAz7iom1e-wVa7kbkcgmgCGXUtSTAZ1V4ycAjoD2zVX7bqLoDvYk-g7Z6VbkuZXJYyMxOSSTn_PSkMvUKWPXJzigCrdtdT6ezXBYkHCk-lS6XpuoXdorWYcruOSGwAfrT7gySadMXlLKpJUHGFyBUNtaPfeXAuvPp6FQBCqg7jzk59f8ACgBmopq-nEi6L7CQAQ-QfyNZj6lcnK-c3UYOSMV1UVloWkwFLzXmnGcv5jA7j7gg1m3eqeBwSkazTSN0MOcg-2TigDEN3cOcNMxGPU0rTzOoDElSOh5qzNpccy-bYOzKRlUZgXH1Axn8KoeTdgMpjfC8HKkAfmKAAJEDnaAc9hjFIysDlWIx0xxTRIBkEnOfwp5KnBDcfWgB_wBquo8fMGwO4zT01GUYMkSse2DjFMY4YHaME_pTvlIJ6Ee1AE_9pr_zwP50VW2j1ooA2WJyGBwo4Hv-FRliELA4JPA9fwpuGUgoCCQCCACSP_retMZyCEAZieSO_vQBIJcggnAJOAO5wO341ds9Lvr6LfDGCnQknBrPwowQoBxznrxzz-dRRavfaVcma3nJUkgo4OGH07UAdInhvUT_AHE4xndSy-Hr6BQ4US8HIQ5Irlbnx5rIkKhmJIBOFOASAcDGOnSnaJ4l1nVNat4Z3mELP83UZA59aAN6ezuINLKso3MckdCB_jVP7LE8CSsAcHHB5B7c9utdD4gu4_KSFWUZALEn8hXMpcxI0kRlXaQTnPGf_rGgCCXTLCSRpHtQzdckZP61raXB4eEgikiWOboAQADj3xVFXdjgAs-B90Zz78VIdKmuUH-iSkg5BKnGPb079-9AG9LqnhzTfvPbKV7AAkfnWXefErSIQyQo056EAYFYU2gWxnP2m0IcHkSHt-PrUlrpOmiZVMSQ57sMj60ASSalaayvmNaRWhY5VkXOc-oH-FVJdGuFRnhAmB6GPnH4da6-28OWIVZC5kGMApgA_lWpbWVpbEGGBVI4DYyfzoA8ya3uo4g_lSFBnJZDgUxLklTlScDHB6V6vLLFEhMzRIncuQB-tctq8vhicmMgNMxwPs4wSf5GgDlvPH-TRVz-yIn-ZZE2nkZnjzj86KALDSsFZcgFfUdcd_T0qe202-njRxbOVkyc44x685qh5-7AALMDk4xz_hXY-Gb8XenrE4AkhJU_TjFAGOnhy_clmVQc5ALgAfgPqelTL4UncgvLApHbk5966onBIIwDQTkYNAHB6po39nygyhWBBKsBgEcf1q14aEUmvRAKuWJJ6-nT-tdRfW6Xdq0UyBlJGPUe4rndLK6HqxSdFHmMTFNj3ORQBF4js2GvzrvHl7VGO54rKgtogC7IGYHnJznFbmuzGfWZZdhwVXHIAyAB1JrKkkjiLM8kaqThhuyceowKAN3TvFdhcIkTMInHBBGPzH-FbSyiRQQxIPQg5FeTX9qnmu0Eu8KchhwRnpxWjoniC9sJkgnkEkfA3g8D2PvzQB6VJDDcx7JolkX0YZx9KxNR8LQ3KH7JII2AOEc8E5z1HNc_c_EEJuWJU4JAwCScfU1lzePb2WMGN2XJIwAByAP8aANQprWhOGUsY-CQSSpHsaq3_ifxFcsVt0WFT0xwT-XP5EVTi1me7UtLLI2AMgsSBVwSjaoUsRnGe2KAMKew1i7k3Xt84J9M5_U5qay0IW0nmmV3YnIJ9vp_nmt3JkUMSAAcMOo59KFAI3qxCk4GBx7nmgCn9k_2n_76P-FFWPtco43t-YooAikOxiyFQAcABeQB0z2yef8AJrQ8NX7Wmrlp2YJKh3E5I45zz7f4VUWeMRFCgZyQSNxHBHTHtWXKzITIoIYkkAZ9wKAPQZvFenLIQFkcAdVXGTn3xWbceO7OEnbCM9i8oU_kM157qBuJ5iFRgMKMcjOBjOPfGapf2VNKSz5GfbpQB3U_xBdyPKEIAP8ACpY_mSBWfPr9zqseDKSAd2NoXGT7ZrnItEmwmX4JwR71p2tmEDDcVx3J9-aALJLOS5ZndjnJPT-XXHpTzbkwlmAAOASSOOBx04_CrdvEpj-4pIIGM8nn_wCvUgjEjuu5SC2cdR0HrQBUSBZFjBViDzkDOfoe9R3Fovm8FVLKeoIBIHsME5B4AFXreBwdyHDR5XAOMjp_I0k6ANuUgkE5BPQd_wCdAGLJosAIf7xORtBIIx1GMU6DTVUiN4ioUkAquckjPOfpW-0Kzq4TLELkBAc5_Ciz0i7mYSLAFUvglztAIAzkn3zQBmQ2QQgsAoIyo6dO5q6AsSghxnHBwCCRnoe3_wBarcumWcU6G61K3jBTdiP5yORwPfmq9zdaRFGEt47q4lJ4LnCkZ6juD_KgBsaMY2TcGLAEk4BB6ke31GKuWelXckKPDauyAljlQFx1HzGov-EjngtyltaWtvhcfKm5umM5NZ1zqF5cqFnvJHwudhYhT2A44oA3ovD0vlJzb_dH_LYf40VymU_54v8An_8AXooAvqJTGQIYuCf4skA9xj0xj8KYEjSUFQrcYJOQPzP-BoooAJgouDgKobkjGVBz9OB3-tJsQxbAAzdz2B9MCiigBOpAIDKepORg446_54pYEXzHiYAAHJPcA9MD09_cUUUAaEdlJLEDFHJKSdo8pSeOxz0B-pqz_Yl6DMXEUaoASZnC8kHgY9gKKKAK9u-lW1w_2rUVKjokCknpjg8jt60y51fSYgVtNMuJyWDb5mxu9RgZoooAiHi3UVUxwrb2iAdI1BYAdOTk5rJuNTnnLuZ5JgWLFSxGSc5P1NFFAEUUpkAYxmMkZAB4x-FWnlbAYMAMYwTySaKKAIYovODBmcYySTgn271NJaKxwsoIGCAXzg_QgUUUAR7X_wCfmP8AMUUUUAf_2Q==
https://data.labs.pdok.nl/rce/id/image/20636163 http://dbpedia.org/ontology/thumbnail _9j_4AAQSkZJRgABAQAAAQABAAD_2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL_2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL_wAARCACFAMgDASIAAhEBAxEB_8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL_8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4-Tl5ufo6erx8vP09fb3-Pn6_8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL_8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3-Pn6_9oADAMBAAIRAxEAPwD1LXtJiS2-0pMIZywH-zk9AT_Wsmx8U3ViJLG72pPkFZZclc_7XsfUVxE_xCuNU0trK-G-QkbpVYgSBTkYHABz3796uX-ox6hp1hcxRkER4k5yUPZSfXr-dD0sNam9NIYbyXUruaJvlCxiLAGB2GOvOfzrmDGHkaRsEkk_SqQlLSccAVaEoVCxOABkmjd3C1h0pAQADLscKPU0kNv5KbSdxJyzepot1LMZnGGIwoP8K_4mrsaGRgoBNAFdisaF26Ac1XW4dnVxlFHbNa2t6DfWCQvcJiNgORyFb-6fQ1jbccDtUsD0yLU5NT0K3iEIeO5XypJFYDyyCN2QffkYqjoU872jWvntG9pNtli4IcZyOvTkEcVieGtVFqJbSVsI7Bkz2boR-I_lT59S-weIpZ0HyTBQ49Scc_p_OkBpWKw2-v39lIild3nwEj7ufT8D-lY_ioBta84f8tUBP1HB_kKh1TUGOqpcwSDKqB8v6iqd_ere3CyBSpA2896B2C0iE95DF2dwD9M810_iCZxDJIk8m24ZUEIPynA649cAD8a5a2lEVwkmSNp61fmvzdahb5-aOJgV9-mf5UnuFjqHv7jSNOltkiXyYoABKW_jKgEY9cng_WodCg0z-z2iv1RriQCUKT8wTBwR37ZNZOqX4uzDZBsBpA0h_l_M1e1a9SGwZIVHnTgQpjqF4yPyAFAalfTNFfWftMsEqxRI-2Pfzu__AFDk1kXsRsrxraV13p1weK6G3WPSrcztNJ5UMIDQhsLJJzgn8Sfwx6Vx9xI9xM8sjbnZizE9yeTQgLJPHWhR371SjLRFsAkHtV8KwVGZGXcAwyMZFMLHP6pZfZpfNjH7pz_3yfSqIkdSMOfpmurmjSaJo5BlGGDXK3Vs9pcNE_I6qcdR60xj1uZB1IPrkVPFOZGCFcE-lUlPvVqyGZ846A0AXwMCipQmeaKQHnUd432eSIyrtByEI579DXQW81za6faXdqA5EY3qDnIyeGHf2NcjFL-7lUSAZOdmOT15zWotxJClu8Mm1xbp368HqKqSvoSnbU723mt7yKG5UFlmAfg_MfUfnxn2qPbvmGT8iHJX1Nc5pmrzPeR2yRgecEO1CQCzHsOxJ9Dj1rfd4-VAZJVJ3K4Kt07jtg5ya5W5035G6UZI0o2LYHrXUaZpGowWserW8asyOCkbDJb6Dv8A5xWJ4a01tbvDbhxC4QuCw4I9K6U6ze2cQ0jUpRbrkLHcqnGFbpnt0xnt3FdMZJq6MWrGhb6wNdnlDxw-QYfLlt3OWJyefoP84rj9d0kabdZibdBITsz95fY-v1ro9b0_zW_tG0fyr1Bu3x9JfwHc-veucvNbmvbRop41Lk9QOAPp60CRkBijbgcEcinPLJMxaRizepppHzZOaBSKA-uaTvTsZFJ26UAO_GnAkU0dadQA5ZSrh-rDnmpTcM9ykrtjbjHtiq54ooAu31-9wgiDfuwc_U4qkBk0nO7mrtg0SXCySNtCcijYDV0_SRaWhv72BpXGPKt1GSefvMPQdcdT-la_iS8srxIbWCNnvEOMqMbB6Edz_Ks0apd6gwttOUx5-_M3BUf0_nWrpUEWiXJkMO6FI9z3TkD5s-h6DHQDJ657UhXsck8bRsVcFWBwQRgiql_ZrewbeBIvKH39K6HV5Bq13PeWVpIsKAGR8Egn1PpWP04NMZyG0pIVYEMDgj0NX9OQmRz7AVb1Wy3j7TGPmH3wO49abpKZjckdWxQBdCYHSipioVSxIAAzk0UCPF4n_cSL5nf7m32POf6e9bQ8qSG2R4wSYUw6HDA7fyNYMD_6PIDI3f5NvHQ85_z1rajliSG3LpJvWFTlSCD8vGQen505CEtDINYjQJJNho1CR_fIA3Y_nXSJeLM5WN2lAPMU5xKn41yYmMGpGUsyhWU7lYgj93jORyK0vtXnqGZxOM5_e8kfSReR-NCV9RtnY6VrdxptyJbSRg6MGZG4f_6_Wunt_Fmgzae9vqBuWkmdpJFaNpCGP8QYDg_lXmC3bKgDMSv8InP_AKDIOPzp9xNHNEfOUlgpG2UgFh3AbofwOanlt8I733O9s9cCtImnXck9nFIU2yIVwcZxgjIqvJI0sjO5yzHJNcJHqB0m7BgJKEbtrfxgdQ3qcdG68YOa7WN1kjSRCSjqHU46g9KE7q4WtoSZowfWgc8UZxTAPpRSZpryRxrukcIOmTnr2HFAEg6U7GajZ1jRndtqryWPanBwRwfccUDHY5o-lIG9BThjHWgACgDNHQ4oyTzSHnmgDVsdTjsIfkiJcn5ucfrWnb21zq7LcX8mIRzHbocA_X0_n9K5mN9sikqCB2Per7a1dBWjhPlg8Ar94D60ibHVjV49D0v7HeLA8hDfuYznOScEjGF49Sfxri2lWVyyrtUk4X09qgYMzEsTk8n1NSQD5iKYyYAsvQVWtYUiaZFGAsh_kKugcdKghGJ7j_rp_wCyigDM8S3RtNFl2th5iIgfr1_TNFZHjOcvdWlqCflUufqTgfoDRUvcpbHnMT5tnXe_BJ2bfl-ufWtOZkEcY6P5KYJ7fIO9ZUD5tHG9zjPy7fl-ufWt1ZpQkMJkJj8lRtIBGPLz36VbdmZvYm0eQR66CAW2QpuC4J-6fXj860Ne0uFLZLuFFhmWQqfKj2FgecfUD-dZVgAdfkB2YadlYPnaR5Z4OOa27-8gOmT7HVkjAbCswwAwCnaevBOSK55NqomvI1STiznZZbq1mYJJ5sZAZXxy6kcH_wDXmmm_Cff4XG4xAfKfQkdPyAq2lndSJvSDMZz8qkHrzkDPQ_l9DWfe6XK5UFJIyDkFkPy-v1Fb7qzM9BTPLqd8iRLl3-VF9Se9enQR-TbwxAcRxqmc9SBivP8ASb5NDWUiO3nlY_60gggYHFdVoevRaqHV_LjmDfLGCcsuOvNGmyA289qXOenQCoycDjNVtQuY7TTLmeX7ixHIzjPHT8aCij4j18aRbCOAq105BEZ5AUHknuB_OuK1PV9R1GSGK9lliil2uFCFUCHo2BywrNVzdNP5807zttKYG4uSeQxPPTp74rrtJ0a3s2tLq7aO5sZ8RONxKwSfwhvX0OehPSqtYnc5cRyTTGGa5KwjdtkIdg-MlQF6jPH0zWpYa_rMFs4geSWGPCt5qbhHngAN19cD2rdFhYN_ZkdzDCsVmLh7klAMiM7Bu7nkg1i63pMNhFFdhzDJcsXWyJLMi9VJ78d89CeOlG4zutM1CLUrKOZJC7YCyBsbg3oavqe1eV2OpRwa5byuZPJXYJw-Bk_xdOoB5H0r1Mc47j1FIY_oOlGMmlAp1IBm0Egmpo4WkYKoyTTAMGrVrM8UihE3-3rQBbg0G7mG4tBGvUl5OlS3ugTaZax3LyJIkpwpRWA6Z7jmpjHd3sYX-ynYKQwO_uDkHpyKua1e6jdWcIvLWKKMNlChyWOPqaEIwAOKrxDE9x_vj_0EVaFQRD_SZx6uP_QRQI4LxLNu1-5kOMQjA5_ur_jRVDXZd76pLjO53HT_AGsUVFrmmxycB_0RwHkwOSpHydu_rW4MGSL2hU_-QqxIGxYuoeTGOhHydR0PrWzGQXQE9IP_AGlWkjIfBhdbmwRlblyAR1IQ8VaIxHeo8-5TA2Fbt90g8D8PSq9sGfxFKikgm5bkf7lXLyxng06-YOpLR7RhWwcFQTnAA6fzrCT1t6Gq2IdFuDstl86zjzC6lpl6KGGAeepzwcZ61sLdsnS4sOfSVlrFs7q0tYVTcWKp5YHlA9DksPc-_b0qdtZgjwRE57D5VXP6VsjO5cuZopELtJZ7x3FwOfrleRWBcSCCaG6tiI2I3jYcgMO4PpVu71WQ5QRMo7gOMn24FR6Xplxrt0rlfLtUO1m7Af3R74pbu6Dod_DKZoYpCNpdA2PTIzWD40hEmlwuXkG2ZUwG4IOe31rocAbVAwAABj0rI8T6dJqOkP5buGhHmiMdHIz198ZqijjfD8clzqlutqbWCWEkmSQFhJtywLc9sAcY6CtrxDqMltZysGsvtM37uV7SYsHHfehGPoc5Fc9ps0wnikaEXdtZAERuFxtZvu475JPrziuunt31jTLdJvKQ3Zxa2lvwkfq7kfeKjtwAcDrTYjhIdQu4ZhcLOzuWDHc27JBzyD15wea7-wnjlRwl1pkE064me4laWdsjndnb69OlYVp4VRp42SQTK5mMKMMCQxsBtPoGGenStm_voIdE8_EN7ayKyRw3QBmgcD7uT94KeoPI9TQwOGuHEZ8jZEzbw_nj75GMAZz07_WvVdCjEWi2YUsd0KMSWJySB615oILmQpp-0BZZEkjiyGwWAAOR3xj9a9R0-zFhZRWoleRY1CgueeBjj2pMC-KXHrTRTu9IYDrVy0uPJkA8vf8ATrVIHmr1ncpAw3qSM9aGI05LmG4jVHtrtcMG-VRg4OcH2q7rGr_2lawxfZ5IvLbPzE-mMAdqrPqVtPB5cd_NbvkchG7HpwfwrQ1bVbK70aKCI_vVkBICYGMHocA96LCOc4FQRD_Sp_8AeX_0EVOOtV4uLuf_AHl_9BFAHnOt2cYs7-UO_wB8nHb79FQa00uzUoyxyrtxn0ailco5OM_6ERmX6EfJ_D0PrWxcXUyx-WZpfLWJRt3fLjaOKyVBFkcmTGehHy9unvWxLbkqWMsI-QEJncx-Udu3TvVO3Uh3sWNLiM2r3ahWeR4wy7W2ney9j2rZ1hiumEgbg8ijmQsTjoPTOePauZtrm5ttVma2ZPMLlAXGQFC__Xq5c29zcGOTUbs5GduQE6-mRuP4LWPs25qRpzWVihMyRSsGILISp29M98ew6e9U7qOa5I2htuMk_wB729hXQR2FtEoYwkHs052BvoDlj-AFPmtTFEbgqke3ndINit_wHlm_HFa6ogzdBvYLGf7Pf2kLws3zM65aPtn3HqK9Ftra3tlK28SRqxyQgwCfWvONTVWuV2JjKEke3-cV6LYq8dlbpJw6xqG-oAzSTurhazsWiARimkFgVORkYpw5FKR3FMo838QeGDpSm5iLSWxP-sP3o29D7e9V9P1W406eKS3aSNpYxG7SkFSxPJB7DOD1zxXpc0EVzC0U0YdDjKsOODWJqnhWyu4HNpH9nmPPyHCn6jp-VO4jnLfX7qKO00-BrbzbJnKTO3ythWzyTgg5OPU4rO89LqSW4vVmjM7q29SMMf4sjHfjpwMd66d_AkIhbZeOXAO390Ov51s6f4b0-0jDSQiecj55Jfn59uwFDAydB8OF2i1KYmBlwbeNR0Hq2fUf412AHoKYkYjRVQYUDAGamUcc0tgDFGcUv1pOlAADyMVes5oUdfMAznrjpVIDLUuCDQB0-2K8hC297ZxyZH3yBn25FaGtwWCaPFJBDDHNvAOzk9D3BINcSD0qaFiW_CgRa6moE5up_wDgP_oNTA8VAn_H1P8A8B_lQB574gtyNX1KED_WFiPxGaK0PF8Ai1uKY5CzRA591OP5YoqG7Gi1R58ATZYzJwTwfu_w9Pety5t3R5JHUJGIzguQN3yY4HU1klQLPGXzzwR8o6dD6-v4VoX277RMFHzcAevQVo1czZHYgjWmbZIw84hhFJsc7lwMGuhFn9mkO4LbOeqQ_vJT9Xasmzt86qHmin8oPEwMY-bKrxj3zW4qTMzAKYEzyBy5Puf_ANdZOajuVyt7DUEVsxKx7ZG998jfUmi40XXdRtxPZWD-Q2cNlclh35OT_Kus8GaBZandyCdyiQKZZHYgBl-p78g1sjUJpUGn6aDczktmcqFCLnjjp06ngegoUnLXoDXLoec6N4dliuHn1KJlnjcbVYg7iB944J6HOB0FdIBgc8Vt3GjLaQtLPNvbGWPbPrWMwFWhCjnAp4P1qIH3pwJJz-FMB-KMHoKB70uaABetSbc0xTUgPtQAoAxQGIozRn3pAO6cik6nim556VLGjO2EGTTAcgwA5XKg_ga37TT7S_tt5GCeGKn5lNRaJNA8n2W5CKTjZuGMn8e9aNzpItbjzdOnSG4IOYHYASAdQM_yotclsxr_AEK5so_OUCa27Sp0H19KoRIVY_kK6_StS0_T7KdJ7Zo7sFmZSDlyT0z1H49q5uVxJM74A3HOFGAKBkYJ9KgQ_wCmT_8AAP5VNJIkMTSOcIoyazbK7NxcTuRjJGB6CgCh4wtfO02K5ABaCTBPs3H88UVsXkCXtnNbP0kQqf6H86KTQ00ePOh-yoP3n8XUfL26f59K3L7ZDdOtvF5k7vgE9j6f559cdKWXR7uOOD7TCyNIwCQlSGwf4sdcV0S6fFYys8mZ7kjDk8DHovpTmC1KtrZC1iwELTAAyOOXZh6-g9PTFTKr3JwzfIv8_pU0cm8ZXGTnPGTyMU0IbaXzSSYzgSA849GrBUW3eRbqaWRdg4TyxwmMYFd61notr4btLi3upYVLK8jK_wA7kdVK9D3AHvn3riFQcFa1dG-xHUrcairPahuVBwPx9umfatzMusk-uyB2U21gpyiZ-Z_c-v16elZuqW8FmViRSG65rpddudN0S-uJrVEkupVUJGv3EA_iI6d-B6DsK4uaaSeZ5pXLuxyWNKwIibg8U5eFPapLeF7h9qAnHX2qR4Skhj-81AyIE4ozinmMqMEEEdjTMHNADxThwOlIik4qdIiSAByfWgCMUuwmrCwMsyxsDk46e9Wbmya12SEYjJAz6GmBnlMdantZzbzKx5Tv7ite-0v_AER54xlowGOO696xCPejcR1dxbWF1pf2mR9sajPnJ1X_AD6VLpt6bC-g_tJDIqqUjudp3bDg4OR2wO2R0rm7DUHsZGXAeGUbZYn-64_x9DXRaxr0V_ptvbQIRj5nLDkH0H9aBWKmv6gmo6g0kSqI1-VSBgsPU1kj65pc1lavfC3i8iIgSuOcfwj_AOvTAp6rffaJfJjP7pD1Hc-tM02TFww9VrMBJq3ZttukP1FBR0Aeiq6uKKRJv-MtPgHiXT4Npx5zzlsncxIAwT6DHFc_FaC-nYlgpYkdM4AoopRfur-urB7sz7i0FnfCNW3BuemPSp9g2kYoopjI7JirSwdVj27fYEdPwq6ODkUUUCILvO4Pnr2qDdyB60UUhnZ6JZRDSopOrTMxJ-h2ioNPtYrq_u5ZBzG-FHp1H9KKKlbkvqVLm2V9W8o_dJHGKp30SQ3JVFxgZooqikOsY993EueCf6VrXdrHDe2rDnzGww_ED-tFFIZa1a3SOySdeHjfj8j_AIVeuoUudOuUZcYh80H0IAP9aKKaJZV0W4nkmjDsjW6osLxFeX3EjOc8Y2-lYN7AtrfTwKcrHIyAnuAaKKbEiKJBIzE9FPQVaXAAoopFMSd_JheQDJVc4rjZXeaRpZG3OxyTRRTBAo_SpYOJkI67hRRQBrA9aKKKAP_Z
"""

def decode_base64_image(encoded_str,log_note='pass values to decode_base_64_jpg'): # - to +, _ to /
    """
    encoded_str: url safe base 64 jpg string --> image bytes string
    """
    try:
        image_bytes = base64.urlsafe_b64decode(encoded_str)
        image_obj = Image.open(BytesIO(image_bytes))
        image = Image.open(BytesIO(image_bytes)).convert('RGB')
        return image, image_obj
    except Exception as e:
        logging.error(f"{e} error encoding image at {log_note}")
        print(e)
        return None, None

def save_bytes_to_jpg(image_bytes, item_num=0,folder='Downloads/ml4g/image_data/',name='Fred'):
    filename = f'{folder}{name}_{item_num}.jpg'
    with open(filename, 'wb') as img_file:
        img_file.write(image_bytes)


image, image_bytes = encode_image(byte_str, transform_temp)
save_img(image_bytes)
image

AttributeError: 'Tensor' object has no attribute 'save'

In [11]:
image

tensor([[[-0.1895, -0.0327, -0.1373,  ...,  1.0784,  1.1307,  0.7908],
         [ 0.0850,  0.1373, -0.0327,  ...,  1.0784,  1.1307,  0.8562],
         [ 0.0065,  0.0196, -0.0850,  ...,  0.9216,  0.9739,  0.8170],
         ...,
         [ 0.0327,  0.2549,  0.1503,  ...,  0.6993,  0.7516,  0.8301],
         [-0.0196,  0.2157,  0.1111,  ...,  0.5686,  0.6078,  0.7386],
         [-0.1242,  0.0719, -0.0065,  ...,  0.6471,  0.6732,  0.8301]],

        [[-0.1895, -0.0327, -0.1373,  ...,  1.0784,  1.1307,  0.7908],
         [ 0.0850,  0.1373, -0.0327,  ...,  1.0784,  1.1307,  0.8562],
         [ 0.0065,  0.0196, -0.0850,  ...,  0.9216,  0.9739,  0.8170],
         ...,
         [ 0.0327,  0.2549,  0.1503,  ...,  0.6993,  0.7516,  0.8301],
         [-0.0196,  0.2157,  0.1111,  ...,  0.5686,  0.6078,  0.7386],
         [-0.1242,  0.0719, -0.0065,  ...,  0.6471,  0.6732,  0.8301]],

        [[-0.1895, -0.0327, -0.1373,  ...,  1.0784,  1.1307,  0.7908],
         [ 0.0850,  0.1373, -0.0327,  ...,  1

# Convert raw values to consistent feature vectors for the encoders:

In [13]:
# feature_dict = defaultdict(lambda: {'text': [], 'image': [], 'num': [], 'spatial': [], 'temporal': []})

# for s, p, o in graph:
#     try:
#         edge_type = inv_edge_map[p.identifier]
#         node_id = inv_node_map[s.identifier]
#         feature_id = inv_node_map[o.identifier]
#     except KeyError:
#         logging.error(f'Error in edge map creation: no matching key for triple {s.identifier}, {p.identifier}, {o.identifier} in node/edge maps' )
    
#     if edge_map[edge_type] == 'text':
#         feature_dict[node_id]['text'].append(feature_id)
#     elif edge_map[edge_type] == 'image':
#         feature_dict[node_id]['image'].append(feature_id)
#     elif edge_map[edge_type] == 'num':
#         feature_dict[node_id]['num'].append(feature_id)
#     elif edge_map[edge_type] == 'spatial':
#         feature_dict[node_id]['spatial'].append(feature_id)
#     elif edge_map[edge_type] == 'temporal':
#         feature_dict[node_id]['temporal'].append(feature_id)

In [None]:
# inv_edge_map

In [None]:
# spatial_dataset.max_len

# 

images.memorix.nlrcedownloadfullsize
bag.basisregistraties.overheid.nlbagidgeometry

In [14]:
from torch.utils.data import DataLoader

def collate_spatial(batch):
   padded = nn.utils.rnn.pad_sequence(batch, batch_first=True)
   padded = padded.permute(0, 2, 1)
   return padded

def collate_text(batch, min_size=12):
    if batch[0].size(0) < min_size:
        pad_num = min_size - batch[0].size(0)
        pads = torch.zeros(pad_num, dtype=torch.long, device=device)
        batch[0] = torch.cat([batch[0], pads], dim=0)
    return nn.utils.rnn.pad_sequence(batch, batch_first=True)

batch_size = 32

spatial_dataloader = DataLoader(spatial_dataset, batch_size=batch_size, collate_fn=collate_spatial)
text_dataloader_s = DataLoader(text_dataset_s, batch_size=batch_size, collate_fn=collate_text)
text_dataloader_m = DataLoader(text_dataset_m, batch_size=batch_size, collate_fn=collate_text)
text_dataloader_l = DataLoader(text_dataset_l, batch_size=batch_size, collate_fn=collate_text)
image_dataloader = DataLoader(image_dataset, batch_size=batch_size)


# edges --> adjacency matrix

In [None]:
# maxl = 0
# for string in string_set_l:
#     if len(string) > maxl:
#         maxl = len(string)
#         print(len(string))

In [None]:
# from typing import defaultdict
# adjacency = defaultdict(list)
# inv_edge_map

# for s, p, o in graph:
#     edge_id = inv_edge_map[p.identifier]
#     s_id = inv_node_map[s.identifier]
#     o_id = inv_node_map[o.identifier]
#     adjacency[edge_id].append([s_id, o_id])


# Set up encoders

In [108]:
import torch.functional as f
import torch.nn as nn

# ]1] temporal conv
# Layer Filters Kernel Padding Pool
# 1 64 7 3 max(2/2)
# 2 64 7 3 max(2/2)
# 3 64 7 3 -
# 4 64 7 2 max(·)
# Layer Dimensions
# 5 512
# 6 128
# 7 128
class TextEncoder(nn.Module):
    def __init__(self, embed_dim, vocab_size=102, dropout=0.4, size_type='medium'): #filters = 64
        super(TextEncoder, self).__init__()
        self.size_type = size_type.lower()
        self.mlp_hidden_dim = 1024 if self.size_type == 'large' else 512 if self.size_type == 'medium' else 256
        self.tcnn_hidden_dim = 128 if self.size_type == 'large' else 64 if self.size_type == 'medium' else 32

        
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.conv1 = nn.Conv1d(
            embed_dim, self.tcnn_hidden_dim , kernel_size=7, padding=3)
        
        self.norm1 = nn.BatchNorm1d(self.tcnn_hidden_dim )
        self.drop1 = nn.Dropout(dropout)
        self.pool1 = nn.MaxPool1d(2,2)
        self.conv2 = nn.Conv1d(
            self.tcnn_hidden_dim , self.tcnn_hidden_dim , kernel_size=7, padding=3)

        self.norm2 = nn.BatchNorm1d(self.tcnn_hidden_dim )
        self.drop2 = nn.Dropout(dropout)
        self.pool2 = nn.MaxPool1d(2,2)
        self.conv3 = nn.Conv1d(
            self.tcnn_hidden_dim , self.tcnn_hidden_dim , kernel_size=7, padding=3)

        self.norm3 = nn.BatchNorm1d(self.tcnn_hidden_dim )
        self.drop3 = nn.Dropout(dropout)
        self.conv4 = nn.Conv1d(
            self.tcnn_hidden_dim , self.tcnn_hidden_dim , kernel_size=7, padding=2)
        
        self.norm4 = nn.BatchNorm1d(self.tcnn_hidden_dim )
        self.drop4 = nn.Dropout(dropout)
        self.pool4 = nn.AdaptiveMaxPool1d(1)

        self.lin1 = nn.Linear(self.tcnn_hidden_dim, self.mlp_hidden_dim)
        self.lin2 = nn.Linear(self.mlp_hidden_dim, embed_dim)
        self.lin3 = nn.Linear(embed_dim , embed_dim)
        
        self.act = nn.ReLU()

    def forward(self, text):
        embedded = self.embedding(text).permute(0, 2, 1)
        residual = embedded
        #conv
        
        x = self.conv1(embedded)
        x = self.norm1(x)
        x = self.act(x)
        x = self.drop1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.norm2(x)
        x = self.act(x)
        x = self.drop2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.norm3(x)
        x = self.act(x)
        x = self.drop3(x)
        x = self.conv4(x)
        x = self.norm4(x)
        x = self.act(x)
        x = self.drop4(x)
        # if residual.size(1) != x.size(1):
        #   residual = nn.utils.rnn.pad_sequence([residual, x], batch_first=True)
        x = self.pool4(x)
        x = x.view(x.size(0), -1)
        #ffnn
        x = self.lin1(x)
        x = self.lin2(x)
        # x += residual
        x = self.lin3(x)
        
        
            
        return x


# [2] MobileNet 
# Type / Stride Filter Shape Input Size
# Conv / s2 3 × 3 × 3 × 32 224 × 224 × 3
# Conv dw / s1 3 × 3 × 32 dw 112 × 112 × 32
# Conv / s1 1 × 1 × 32 × 64 112 × 112 × 32
# Conv dw / s2 3 × 3 × 64 dw 112 × 112 × 64
# Conv / s1 1 × 1 × 64 × 128 56 × 56 × 64
# Conv dw / s1 3 × 3 × 128 dw 56 × 56 × 128
# Conv / s1 1 × 1 × 128 × 128 56 × 56 × 128
# Conv dw / s2 3 × 3 × 128 dw 56 × 56 × 128
# Conv / s1 1 × 1 × 128 × 256 28 × 28 × 128
# Conv dw / s1 3 × 3 × 256 dw 28 × 28 × 256
# Conv / s1 1 × 1 × 256 × 256 28 × 28 × 256
# Conv dw / s2 3 × 3 × 256 dw 28 × 28 × 256
# Conv / s1 1 × 1 × 256 × 512 14 × 14 × 256
# 5×
# Conv dw / s1 3 × 3 × 512 dw 14 × 14 × 512
# Conv / s1 1 × 1 × 512 × 512 14 × 14 × 512
# Conv dw / s2 3 × 3 × 512 dw 14 × 14 × 512
# Conv / s1 1 × 1 × 512 × 1024 7 × 7 × 512
# Conv dw / s2 3 × 3 × 1024 dw 7 × 7 × 1024
# Conv / s1 1 × 1 × 1024 × 1024 7 × 7 × 1024
# Avg Pool / s1 Pool 7 × 7 7 × 7 × 1024
# FC / s1 1024 × 1000 1 × 1 × 1024
# Softmax / s1 Classifier 1 × 1 × 1000
# Table 2. Resource Per Layer Type
# Type Mult-Adds Parameters
# Conv 1 × 1 94.86% 74.59%
# Conv DW 3 × 3 3.06% 1.06%
# Conv 3 × 3 1.19% 0.02%
# Fully Connected 0.18% 24.33%

#idea: pass identity vector with batches to encoders (don't process, just pass back), use that to map embeddings to nodes later
class ImageEncoder(nn.Sequential):
    def __init__(self, embed_dim):
        super(ImageEncoder, self).__init__()
        

        self.image = MobileBlock(3, 32, normal_conv=True, stride=1, alpha=0.5)
        self.image2 = MobileBlock(32, 64, stride=1, alpha=0.5)
        self.image3 = MobileBlock(64, 128, stride=2, alpha=0.5)
        self.image4 = MobileBlock(128, 256, stride=1, alpha=0.5)
        self.image5 = MobileBlock(256, 256, stride=2, alpha=0.5)
        self.image6 = MobileBlock(256, 512, stride=1, alpha=0.5)
        self.image7 = MobileBlock(512, 512, stride=2, alpha=0.5)
        self.image8 = MobileBlock(512, 1024, stride=1, alpha=0.5)
        self.middle_conv = nn.Sequential(*[MobileBlock(
                1024, 1024, stride=1, alpha=0.5) for i in range(5)])
        
        self.image9 = MobileBlock(1024, 1024, stride=1, alpha=0.5)
        
        self.image10 = MobileBlock(1024, 2048, stride=2, alpha=0.5)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.lin1 = nn.Linear(1024,embed_dim)
        self.norm = nn.BatchNorm1d(embed_dim)
        self.act = nn.ReLU()
        # middle_conv = [MobileBlock() for i in range(5)]


    def forward(self, batch):
        x = self.image(batch)
        x = self.image2(x)
        x = self.image3(x)
        x = self.image4(x)
        x = self.image5(x)
        x = self.image6(x)
        x = self.image7(x)
        x = self.image8(x)
        x = self.middle_conv(x)
        x = self.image9(x)
        x = self.image10(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.lin1(x)
        x = self.norm(x)
        x = self.act(x)
        return x

        

class MobileBlock(nn.Module):
    def __init__(self, in_channels, embedding_size, stride=1, normal_conv=False, alpha=0.5, dilation=1):
        super(MobileBlock, self).__init__()
        old_in_channels = in_channels
        in_channels = int(alpha * in_channels)
        embedding_size = int(alpha * embedding_size)
        # depthwise conv
        self.normal_conv = normal_conv
        if self.normal_conv:
            # in_channels = in_channels//2
            pass
        # 3x3x3x32
        self.conv = nn.Conv2d(old_in_channels, in_channels, 3, padding=1, stride=2, dilation=1)
        self.depthwise = nn.Conv2d(in_channels, in_channels, 3, stride=stride,
                                   padding=1, groups=in_channels)
        self.norm1 = nn.BatchNorm2d(in_channels)
        
        # pointwise conv
        self.pointwise = nn.Conv2d(in_channels, embedding_size, 1, stride=1, padding=0)
        self.norm2 = nn.BatchNorm2d(embedding_size)
        self.act = nn.ReLU(inplace=True)
        
    def forward(self, x):
        # print(self.conv.kernel_size)
        if self.normal_conv:
            x = self.conv(x)
        # print(x.size())
        x = self.depthwise(x)
        # print(x.size())
        x = self.norm1(x)
        x = self.act(x)
        
        x = self.pointwise(x)
        # print(x.size())
        x = self.norm2(x)
        return self.act(x)




# [3] temporal conv
# layer filters kernel padding pool
# 1 16 5 2 max(3/3)
# 2 32 5 2 -
# 3 64 5 2 avg(·)
# layer dimensions
# 4 512
# 5 128
# 6 128
class SpatialEncoder(nn.Module):
    def __init__(self, input_dim, embed_dim=128, dropout=0.2):
        super(SpatialEncoder, self).__init__()
        
        # temp cnn
        self.conv1 = nn.Conv1d(in_channels=input_dim, out_channels=16, kernel_size=5, padding=2)
        self.pool1 = nn.MaxPool1d(kernel_size=3, stride=3, ceil_mode=True)
        self.norm1 = nn.BatchNorm1d(16)
        self.drop1 = nn.Dropout(dropout)
        
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=5, padding=2)
        self.norm2 = nn.BatchNorm1d(32)
        self.drop2 = nn.Dropout(dropout)
        
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        self.pool3 = nn.AdaptiveAvgPool1d(1)
        self.norm3 = nn.BatchNorm1d(64)
        self.drop3 = nn.Dropout(dropout)
        
        # dense
        self.lin1 = nn.Linear(64, 512)
        self.lin2 = nn.Linear(512, 128)
        self.lin3 = nn.Linear(128, embed_dim)
        self.act = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, spatial):
        # x = self.pad(spatial, batch_first=True)
        x = self.conv1(spatial)
        x = self.pool1(x)
        x = self.norm1(x)
        x = self.act(x)
        x = self.drop1(x)

        
        x = self.conv2(x)
        x = self.norm2(x)
        x = self.act(x)
        x = self.drop2(x)
        
        x = self.conv3(x)
        x = self.pool3(x)
        x = self.norm3(x)
        x = self.act(x)
        x = self.drop3(x)
        
        x = x.view(x.size(0), -1) #flatten
        
        x = self.lin1(x)
        x = self.act(x)
        x = self.dropout(x)
        
        x = self.lin2(x)
        x = self.act(x)
        x = self.dropout(x)
        
        x = self.lin3(x)
        x = self.act(x)
        x = self.dropout(x)
        return x

# [4] mlp h == in dim 1st col, out 2nd col
# XSD:gYear 6 2
# XSD:date 10 4
# XSD:dateTime 14 6
class TemporalEncoder(nn.Module):
    def __init__(self, input_dim, dropout=0.2):
        super(TemporalEncoder, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = input_dim
        self.out_dim = 2 if input_dim <= 6 else 4 if input_dim <= 10 else 6

    def forward():
      pass

#[5] one to one encoding for numerical

In [18]:
# spatial_dataset.__getitem__(0).unsqueeze(0)

In [19]:
# for batch in text_dataloader:
#     print(batch.size())

In [20]:
text_encoder_s, text_encoder_m, text_encoder_l = TextEncoder(128, size_type='s'), TextEncoder(128, size_type='m'), TextEncoder(128, size_type='l')

In [35]:
# spatial = SpatialEncoder(2)
# batch = [spatial_dataset.__getitem__(i) for i in range(3)]
for i, batch in enumerate(text_dataloader_l):
    if i % 1000 == 0:
        print(batch.size())
    out = text_encoder_l(batch)
    sz = batch.size()
    if i % 1000 == 0:
        print(out.size())

# [spatial_dataset.__getitem__(i).size() for i in range(10)]

torch.Size([32, 5000])
torch.Size([32, 128])


KeyboardInterrupt: 

In [22]:
spatial = SpatialEncoder(2)
for i, batch in enumerate(spatial_dataloader):
    if i % 1000 == 0:
        print(batch.size())
    out = spatial(batch)
    sz = batch.size()
    if i % 1000 == 0:
        print(out.size())

torch.Size([32, 2, 53])
torch.Size([32, 128])


In [104]:
print(i)

0


In [None]:
image_encoder = ImageEncoder(128)
for i, batch in enumerate(image_dataloader):
    if i< 1000:
        out = image_encoder(batch)
    else:
        break

In [74]:
out.size()

torch.Size([32, 64, 56, 56])

In [None]:
# text_inputs = [string for string in ]

In [None]:
# sz

In [14]:
#todo: map encodings to graph

In [None]:
# A = []
# for i,edge in edge_map.items()[:2]:
#     edge_class_adjacency = adjacency[i]
#     edge_class_adjacency = torch.tensor(edge_class_adjacency, dtype=torch.long).t().to(device)
#     values = torch.ones(edge_class_adjacency.size(1), device=device)
#     A = torch.sparse_coo_tensor(edge_class_adjacency, values, (num_nodes, num_nodes), device=device)

#     A = normalize_adjacency(A)
#     A_list.append(A)

 'isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',