In [None]:
import sys
sys.path.insert(1, '../Codes_2Dsurface/')
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3"

Load libraries

In [None]:
import torch
from torch_geometric.loader import DataLoader
from data.graph_dataset import TwoDDatasetBuilder, TwoDDatasetLoader, normalize
from networks.gcn import SurfaceRegresionNet
from data.utils import write_zone_to_tec

In [None]:
class objectview(object):
    def __init__(self, d) -> None:
        self.__dict__ = d
    def setattr(self, attr_name, attr_value):
        self.__dict__[attr_name] = attr_value

args = objectview({
    'in_channels': 10,
    'out_channels': 1,
    'hidden_size': 64,
    'aggr': 'sum',
    'act': torch.nn.functional.mish,
    'device': torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'),
    'lr': 5e-8,
    'weight_decay': 5e-3,
    'epoch': 1000,
    'n_data_per_batch': 10,
    'criterion': torch.nn.MSELoss()
})

Build dataset

In [None]:
dataset = TwoDDatasetBuilder(
    raw_dir = '/data1/tam/dataset_surface/yp', # Path to raw data files
    root_dir = '/data1/tam/download_dataset_surface_v1', # Path to store processed data files
    sub_dir = 'processed',
    subjects = 'all',
    time_names = 'all',
    data_type = torch.float32
)

Load dataset

In [None]:
dataset = TwoDDatasetLoader(
    root_dir = '/data1/tam/download_dataset_surface_v1', # Path to store processed data files
    sub_dir = 'processed',
    subjects = 'all',
    time_names = 'all',
    data_type = torch.float32
)

In [None]:
dataset = normalize(
    dataset=dataset,
    sub_dir='normalized',
    scaler_dict = {
        'node_attr': ['minmax_scaler', 0],
        # 'output': ['minmax_scaler', 0]
    }
)

In [None]:
dataset = TwoDDatasetLoader(
    root_dir = '/data1/tam/download_dataset_surface_v1', # Path to store processed data files
    sub_dir = 'normalized',
    subjects = 'all',
    time_names = 'all',
    data_type = torch.float32
)

Model initializing

In [None]:
model = SurfaceRegresionNet(
    in_channels=args.in_channels,
    out_channels=args.out_channels,
    hidden_channels=args.hidden_size, # (hidden size, number of hidden layers)
    aggr=args.aggr,
    act=args.act
)
model.to(args.device)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

Training

In [None]:
def train(model, data, args):
    ##
    x = data.node_attr.float().to(args.device)
    edge_index = data.edge_index.to(args.device)
    output = data.output.unsqueeze(1).float().to(args.device)
    ##
    output_hat = model(x, edge_index)
    loss = args.criterion(output_hat, output)
    loss.backward()
    optimizer.step()
    return loss.item()

def eval(model, data, args):
    ##
    x = data.node_attr.float().to(args.device)
    edge_index = data.edge_index.to(args.device)
    output = data.output.unsqueeze(1).float().to(args.device)
    ##
    with torch.no_grad():
        output_hat = model(x, edge_index)
        loss = args.criterion(output_hat, output)
    return loss.item(), output_hat

In [None]:
train_subjects = ['in01ra_0050-0100m_yp', 'in01rb_0050-0100m_yp', 'is02ra_0050-0100m_yp', \
                  'it01rc_0050-0100m_yp', 'it02ra_0050-0100m_yp', 'it02rc_0050-0100m_yp']
eval_subjects = ['in02ra_0050-0100m_yp', 'in02rb_0050-0100m_yp']
##
train_set = []
for subject in train_subjects:
    train_set += [dataset[i] for i in range(dataset.len()) if (subject in dataset.processed_file_names()[i])]

eval_set = []
for subject in eval_subjects:
    eval_set += [dataset[i] for i in range(dataset.len()) if (subject in dataset.processed_file_names()[i])]
##
train_loader = DataLoader(train_set, batch_size=args.n_data_per_batch)
eval_loader = DataLoader(eval_set, batch_size=args.n_data_per_batch)

In [None]:
# Training
total_train_loss = []
total_eval_loss = []
for epoch in range(args.epoch):
    torch.cuda.empty_cache()
    train_loss = 0
    for i in range(train_loader.__len__()):
        data = next(iter(train_loader))
        train_loss += train(model=model, data=data, args=args)
    train_loss /= train_loader.__len__() # len(train_dataset)
    total_train_loss.append(train_loss)

    eval_loss = 0
    for i in range(eval_loader.__len__()):
        data = next(iter(eval_loader))
        eval_loss += eval(model=model, data=data, args=args)[0]
    eval_loss /= eval_loader.__len__()
    total_eval_loss.append(eval_loss)
    
    print(f'Epoch {epoch}: train loss = {train_loss}; eval loss = {eval_loss}')
    # if (epoch+1) % 20 == 0:
    #     torch.save(model.state_dict(), f'models/parc_test_epoch{epoch+1}.pth')

In [None]:
# raw_dataset = TwoDDatasetLoader(
#     root_dir = '/data1/tam/download_dataset_surface_v1', # Path to store processed data files
#     sub_dir = 'processed',
#     subjects = 'all',
#     time_names = 'all',
#     data_type = torch.float32
# )

print_subjects = ['in02ra_0050-0100m_yp']
##
print_set = []
for subject in print_subjects:
    print_set += [dataset[i] for i in range(dataset.len()) if (subject in dataset.processed_file_names()[i])]


pred_datas = []
scaler = dataset.load_scaler('output')
for i in range(len(print_set)):
    data = print_set[i]
    x = data.node_attr.float().to(args.device)
    edge_index = data.edge_index.to(args.device)
    with torch.no_grad():
        output = model(x, edge_index)
    # output = scaler.inverse_transform(output.detach().cpu().numpy())
    output = output.detach().cpu().numpy()
    data.output = torch.tensor(output.squeeze(1))
    pred_datas.append(data)

write_zone_to_tec('test.dat', pred_datas)