# Generating data files (.pt)
#### Setup instructions
Ensure that FeatureData_FakeMatl exists in order to generate graph.

In [1]:
import import_ipynb
from Visualize_Microstructures import gen_graph, DIR_LOC

import os

import numpy as np
from torch_geometric.utils import from_networkx
import torch
from torch_geometric.loader import DataLoader

GEN_STRUCTURES_FILE_BASE = os.path.join(DIR_LOC, "generated_microstructures", "FeatureData_FakeMatl_")
NUM_MICROSTRUCTURES_START = 0 # Including start
NUM_MICROSTRUCTURES_END = 99 # Not including end

importing Jupyter notebook from Visualize_Microstructures.ipynb


In [2]:
def network_to_pyg_data(file):
    G = gen_graph(file)
    pyg_graph = from_networkx(G, group_node_attrs=["pos"], group_edge_attrs=["weight"])
    pyg_graph.y = pyg_graph["surfaceFeature"]
    del pyg_graph["surfaceFeature"]
    pyg_graph.y = pyg_graph.y.type(torch.LongTensor)

    # Split the data
    train_ratio = 0.2
    num_nodes = pyg_graph.x.shape[0]
    num_train = int(num_nodes * train_ratio)
    idx = [i for i in range(num_nodes)]

    np.random.shuffle(idx)
    train_mask = torch.full_like(pyg_graph.y, False, dtype=bool)
    train_mask[idx[:num_train]] = True
    test_mask = torch.full_like(pyg_graph.y, False, dtype=bool)
    test_mask[idx[num_train:]] = True

    data = pyg_graph
    data.train_mask = train_mask
    data.test_mask = test_mask

    return data

In [3]:
if __name__ == "__main__":
    data_batch = []
    for i in range(NUM_MICROSTRUCTURES_START, NUM_MICROSTRUCTURES_END):
        file = GEN_STRUCTURES_FILE_BASE + str(i) + ".csv"
        print("Loading graph " + str(i) + "...")
        data_batch.append(network_to_pyg_data(file))
    
    print(data_batch)
    # loader to combine data
    print("Combining data...")
    
    loader = DataLoader(data_batch, batch_size=32)
    data = next(iter(loader))
    data

Loading graph 0...
[Data(edge_index=[2, 3406], rot=[244, 3], size=[244], x=[244, 3], edge_attr=[3406, 1], y=[244], train_mask=[244], test_mask=[244])]
Combining data...


In [4]:
torch.save(data, "datasets/data_test.pt")

In [5]:
data

DataBatch(edge_index=[2, 3406], rot=[244, 3], size=[244], x=[244, 3], edge_attr=[3406, 1], y=[244], train_mask=[244], test_mask=[244], batch=[244], ptr=[2])