<h1>Exploring the data</h1>

<h2>Loading the data</h2>

In [1]:
import os

data_files_list = ["./export/"+f for f in os.listdir("./export") ]
instance_dict = {}
for dir_str in data_files_list:
    with open(dir_str, 'r') as text_file:
        cnt = 0
        instance = ""
        for line in text_file:
            if cnt < 9:
                if cnt == 0:
                    instance = line.split()[0]
                    instance_dict[instance] = []
                cnt += 1
                continue
            split_line = line.split()
            instance_dict[instance].append([int(i) for i in split_line])
        text_file.close()

ng_dict = {}
cnt = -1
with open("ng_outs.csv", 'r') as text_file:
    for line in text_file:
        if cnt < 2:
            cnt += 1
            continue
        raw_line = line.strip()
        split_line_list = raw_line.split(sep=";")
        instance = split_line_list[3]
        if instance not in ng_dict:
            ng_dict[instance] = [[0 for i in range(101)]]
        ng_dict[instance].append([0] + [int(i) for i in split_line_list[5:-1]])
        if len(split_line_list[5:-1]) != 100:
            print("case found for instance "+instance)
    text_file.close()

<h2>Data preparation</h2>

In [7]:
from math import sqrt
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

In [3]:
complete_graph_list = []
for i in range(101):
    for j in range(101):
        if i != j:
            complete_graph_list.append([i,j])
edge_index = torch.tensor(complete_graph_list, dtype=torch.long).t().contiguous()
n_edges = len(complete_graph_list)

In [4]:
for instance_name in ng_dict:
    for i in range(101):
        for j in range(101):
            if i == j:
                ng_dict[instance_name][i][j] = 0

In [5]:
data_list = []
for instance_name in ng_dict:
    y = torch.tensor(ng_dict[instance_name], dtype=torch.double)
    x = torch.tensor(instance_dict[instance_name], dtype=torch.double)
    attr = [[i] for i in range(n_edges)]
    loc_dict = {(i[0],j[0]): sqrt((i[1]-j[1])**2 + (i[2]-j[2])**2) for i in instance_dict[instance_name] for j in instance_dict[instance_name]}
    cnt = -1
    for i in range(101):
        for j in range(101):
            if i != j:
                cnt += 1
                attr[cnt].append(loc_dict[i,j])
    attr = torch.tensor(attr, dtype=torch.double)
    pos = []
    for i in instance_dict[instance_name]:
        pos.append([i[1], i[2]])
    pos = torch.tensor(pos, dtype=torch.double)
    data_list.append(Data(x=x, y=y, edge_index=edge_index, pos=pos, edge_attr=attr))

In [6]:
# Just to produce edges as an adjacency matrix
complete_adj_matrix_list = [[0 for i in range(101)] for i in range(101)]
for edge in complete_graph_list:
    i, j = edge
    complete_adj_matrix_list[i][j] = 1

In [8]:
class Instances:
    def __init__(self, data_list):
        self.data_list = data_list
    
    def to_torch_geometric(self, start=0, end=-1, batch_size=1):
        return DataLoader(data_list[start:end], batch_size=batch_size)
    
    def to_conv_nets(self, start=0, end=-1, batch_size=1):
        final_data = []
        nodes = []
        nodes_coor = []
        nodes_timew = []
        x_edges = []
        x_edges_values = []
        y_edges = []
        cnt = 0
        current_batch = 0
        for graph in self.data_list[start:end]:
            if cnt >= batch_size:
                cnt = 0
                current_batch += 1
                nodes = torch.tensor(nodes, dtype=torch.long)
                nodes_coor = torch.tensor(nodes_coor, dtype=torch.float)
                nodes_timew = torch.tensor(nodes_timew, dtype=torch.long)
                x_edges = torch.tensor(x_edges, dtype=torch.long)
                x_edges_values = torch.tensor(x_edges_values, dtype=torch.float)
                y_edges = torch.tensor(y_edges, dtype=torch.long)
                final_data.append((x_edges, x_edges_values, nodes, nodes_coor, nodes_timew, y_edges))
                nodes = []
                nodes_coor = []
                nodes_timew = []
                x_edges = []
                x_edges_values = []
                y_edges = []
            nodes.append([i for i in range(101)]) 
            nodes_coor.append(graph.pos.tolist())
            tw = []
            x_raw = graph.x.tolist()
            for i in range(101):
                tw.append([x_raw[i][4], x_raw[i][5]])
            nodes_timew.append(tw)
            x_edges.append(complete_adj_matrix_list)
            dist_matrix = [[0 for _ in range(101)] for _ in range(101)]
            dist_list = [i for _, i in graph.edge_attr.tolist()]
            pos_dist = 0
            for i in range(101):
                for j in range(101):
                    if i != j:
                        dist_matrix[i][j] = dist_list[pos_dist]
                        pos_dist += 1
            x_edges_values.append(dist_matrix)
            y_edges.append(graph.y.tolist()) #TODO: remove the transpose and also the contiguous when generating y
            cnt += 1
        return final_data

In [9]:
data_source = Instances(data_list)

In [10]:
datatorch = data_source.to_conv_nets(start=428, end=429, batch_size=1)