In [5]:
import os.path as osp
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Linear

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, HeteroData
from d2l import torch as d2l
from torch_geometric.loader import DataLoader
from torch_geometric.data import InMemoryDataset, download_url
from tqdm import tqdm

In [6]:
# Split data to train and test
total_data = pd.read_csv("DoubleBox_Coupler/DoubleBox_Sweep.csv")

test_data = total_data.sample(frac=0.2,axis=0,random_state=2)
train_data = pd.concat([total_data, test_data]).drop_duplicates(keep=False)

train_data.to_csv("DoubleBox_Coupler/DoubleBox_train.csv", index = False)
test_data.to_csv("DoubleBox_Coupler/DoubleBox_test.csv", index = False)

total_data = pd.read_csv("DoubleBox_Coupler/Branchline_Sweep.csv")

test_data = total_data.sample(frac=0.2,axis=0,random_state=2)
train_data = pd.concat([total_data, test_data]).drop_duplicates(keep=False)

train_data.to_csv("DoubleBox_Coupler/Branchline_train.csv", index = False)
test_data.to_csv("DoubleBox_Coupler/Branchline_test.csv", index = False)

In [8]:
# Define functions to generate graphical format datasets

class HybridTrainData(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None, pre_filter=None):
        super(HybridTrainData, self).__init__(root, transform, pre_transform, pre_filter)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return []

    @property
    def processed_file_names(self):
        return ['DoubleBox_Coupler_Train.dataset']

    def download(self):
        pass

    def process(self):
        # Read data into huge `Data` list.
        data_list = []
        
        df = pd.read_csv("DoubleBox_Coupler/DoubleBox_train.csv")
        unique_Seq = pd.DataFrame(data={"Seq":pd.RangeIndex(len(df["Seq"]))})
        df["Seq"] = unique_Seq
        edge_len = 4

        grouped = df.groupby("Seq")
        for Seq, group in tqdm(grouped):
            
            data = HeteroData()
            
            data['ML'].x = torch.tensor(([[group.w1.values[0],edge_len], [group.w2.values[0],group.l2.values[0]], [group.w8.values[0],group.l3.values[0]], [group.w9.values[0],edge_len], 
                       [group.w3.values[0],group.l1.values[0]], [group.w7.values[0],group.l1.values[0]], [group.w10.values[0],group.l1.values[0]], 
                       [group.w4.values[0],edge_len], [group.w5.values[0],group.l2.values[0]], [group.w6.values[0],group.l2.values[0]], [group.w11.values[0],edge_len]]), 
                     dtype=torch.float)

            data['MT'].x = torch.tensor(([[group.w1.values[0],group.w2.values[0],group.w3.values[0]],[group.w2.values[0],group.w8.values[0],group.w7.values[0]],[group.w8.values[0],group.w9.values[0],group.w10.values[0]],
                                         [group.w5.values[0],group.w4.values[0],group.w3.values[0]],[group.w6.values[0],group.w5.values[0],group.w7.values[0]],[group.w11.values[0],group.w6.values[0],group.w10.values[0]]]), dtype=torch.float)
            data['ML'].y = torch.tensor(group.iloc[:,11:51].values[0], dtype=torch.float)
            data['MT', 'to', 'ML'].edge_index = torch.tensor([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5],
                                       [0, 1, 4, 1, 2, 5, 2, 3, 6, 7, 8, 4, 8, 9, 5, 9, 10, 6]], dtype=torch.long)
 
            data = T.ToUndirected()(data)
            data_list.append(data)


        df = pd.read_csv("DoubleBox_Coupler/Branchline_train.csv")
        unique_Seq = pd.DataFrame(data={"Seq":pd.RangeIndex(len(df["Seq"]))})
        df["Seq"] = unique_Seq
        edge_len = 4

        grouped = df.groupby("Seq")
        for Seq, group in tqdm(grouped):
            
            data = HeteroData()
            
            data['ML'].x = torch.tensor(([[group.w1.values[0],edge_len], [group.w2.values[0],group.l2.values[0]], [group.w8.values[0],group.l3.values[0]], [group.w9.values[0],edge_len], 
                       [group.w3.values[0],group.l1.values[0]], [group.w7.values[0],group.l1.values[0]], [group.w10.values[0],group.l1.values[0]], 
                       [group.w4.values[0],edge_len], [group.w5.values[0],group.l2.values[0]], [group.w6.values[0],group.l2.values[0]], [group.w11.values[0],edge_len]]), 
                     dtype=torch.float)

            data['MT'].x = torch.tensor(([[group.w1.values[0],group.w2.values[0],group.w3.values[0]],[group.w2.values[0],group.w8.values[0],group.w7.values[0]],[group.w8.values[0],group.w9.values[0],group.w10.values[0]],
                                         [group.w5.values[0],group.w4.values[0],group.w3.values[0]],[group.w6.values[0],group.w5.values[0],group.w7.values[0]],[group.w11.values[0],group.w6.values[0],group.w10.values[0]]]), dtype=torch.float)
            data['ML'].y = torch.tensor(group.iloc[:,11:51].values[0], dtype=torch.float)
            data['MT', 'to', 'ML'].edge_index = torch.tensor([[0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5],
                                       [0, 1, 4, 1, 2, 2, 3, 6, 7, 8, 4, 8, 9, 9, 10, 6]], dtype=torch.long)
 
            data = T.ToUndirected()(data)
            data_list.append(data)
        

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])


# Generate testing dataset

class HybridTestData(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None, pre_filter=None):
        super(HybridTestData, self).__init__(root, transform, pre_transform, pre_filter)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return []

    @property
    def processed_file_names(self):
        return ['DoubleBox_Coupler_Test.dataset']

    def download(self):
        pass

    def process(self):
        # Read data into huge `Data` list.
        data_list = []
        
        df = pd.read_csv("DoubleBox_Coupler/DoubleBox_test.csv")
        unique_Seq = pd.DataFrame(data={"Seq":pd.RangeIndex(len(df["Seq"]))})
        df["Seq"] = unique_Seq
        edge_len = 4

        grouped = df.groupby("Seq")
        for Seq, group in tqdm(grouped):
            
            data = HeteroData()
            
            data['ML'].x = torch.tensor(([[group.w1.values[0],edge_len], [group.w2.values[0],group.l2.values[0]], [group.w8.values[0],group.l3.values[0]], [group.w9.values[0],edge_len], 
                       [group.w3.values[0],group.l1.values[0]], [group.w7.values[0],group.l1.values[0]], [group.w10.values[0],group.l1.values[0]], 
                       [group.w4.values[0],edge_len], [group.w5.values[0],group.l2.values[0]], [group.w6.values[0],group.l2.values[0]], [group.w11.values[0],edge_len]]), 
                     dtype=torch.float)

            data['MT'].x = torch.tensor(([[group.w1.values[0],group.w2.values[0],group.w3.values[0]],[group.w2.values[0],group.w8.values[0],group.w7.values[0]],[group.w8.values[0],group.w9.values[0],group.w10.values[0]],
                                         [group.w5.values[0],group.w4.values[0],group.w3.values[0]],[group.w6.values[0],group.w5.values[0],group.w7.values[0]],[group.w11.values[0],group.w6.values[0],group.w10.values[0]]]), dtype=torch.float)
            data['ML'].y = torch.tensor(group.iloc[:,11:51].values[0], dtype=torch.float)
            data['MT', 'to', 'ML'].edge_index = torch.tensor([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5],
                                       [0, 1, 4, 1, 2, 5, 2, 3, 6, 7, 8, 4, 8, 9, 5, 9, 10, 6]], dtype=torch.long)
 
            data = T.ToUndirected()(data)
            data_list.append(data)

                
        df = pd.read_csv("DoubleBox_Coupler/Branchline_test.csv")
        unique_Seq = pd.DataFrame(data={"Seq":pd.RangeIndex(len(df["Seq"]))})
        df["Seq"] = unique_Seq
        edge_len = 4

        grouped = df.groupby("Seq")
        for Seq, group in tqdm(grouped):
            
            data = HeteroData()
            
            data['ML'].x = torch.tensor(([[group.w1.values[0],edge_len], [group.w2.values[0],group.l2.values[0]], [group.w8.values[0],group.l3.values[0]], [group.w9.values[0],edge_len], 
                       [group.w3.values[0],group.l1.values[0]], [group.w7.values[0],group.l1.values[0]], [group.w10.values[0],group.l1.values[0]], 
                       [group.w4.values[0],edge_len], [group.w5.values[0],group.l2.values[0]], [group.w6.values[0],group.l2.values[0]], [group.w11.values[0],edge_len]]), 
                     dtype=torch.float)

            data['MT'].x = torch.tensor(([[group.w1.values[0],group.w2.values[0],group.w3.values[0]],[group.w2.values[0],group.w8.values[0],group.w7.values[0]],[group.w8.values[0],group.w9.values[0],group.w10.values[0]],
                                         [group.w5.values[0],group.w4.values[0],group.w3.values[0]],[group.w6.values[0],group.w5.values[0],group.w7.values[0]],[group.w11.values[0],group.w6.values[0],group.w10.values[0]]]), dtype=torch.float)
            data['ML'].y = torch.tensor(group.iloc[:,11:51].values[0], dtype=torch.float)
            data['MT', 'to', 'ML'].edge_index = torch.tensor([[0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5],
                                       [0, 1, 4, 1, 2, 2, 3, 6, 7, 8, 4, 8, 9, 9, 10, 6]], dtype=torch.long)
 
            data = T.ToUndirected()(data)
            data_list.append(data)

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])


In [9]:
dataset_train = HybridTrainData(root = 'DoubleBox_Branchline_data/train/')

Processing...
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2400/2400 [00:01<00:00, 1440.37it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2400/2400 [00:01<00:00, 1377.90it/s]
Done!


In [10]:
dataset_test = HybridTestData(root = 'DoubleBox_Branchline_data/test/')

Processing...
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 600/600 [00:00<00:00, 1435.34it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 600/600 [00:00<00:00, 1411.43it/s]
Done!


In [11]:
dataset_test

HybridTestData(1200)