In [1]:
import torch
import h5py
import os
import numpy as np
from torch import nn as nn
from torch.nn import functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader
import time
from d2l import torch as d2l
import random

In [2]:
# get the file name of path
def get_fname(path):
    files = os.listdir(path)
    files.sort()
    return files

In [3]:
data_dir = "../Datasets"

In [4]:
files = get_fname(data_dir)

In [5]:
files[0]

'snapshot_000.hdf5'

In [6]:
def get_fpath(path, fname):
    return os.path.join(path, fname)

In [7]:
f0 = h5py.File(get_fpath(data_dir, files[0]))

In [8]:
def get_hdf5_group(hdf5File):
    group = []
    for key in hdf5File:
        group.append(key)
    return group

In [9]:
get_hdf5_group(f0)[0]

'dataset'

In [10]:
DATA = f0[get_hdf5_group(f0)[0]]

In [11]:
DATA[0]

array([ 1.0000000e+00,  2.9456370e+21,  7.9943233e+00,  1.4509925e+07,
       -1.9568456e+11,  2.4651726e+07, -1.6436408e+08, -4.3989524e+07,
       -1.0778338e+07,  2.7425510e+02,  4.4113613e+04,  1.7417133e+02],
      dtype=float32)

In [12]:
type(DATA[0])

numpy.ndarray

In [13]:
int(len(DATA)*0.1)

24231

In [14]:
# This train test we just to use one snapshot just

In [15]:
# get train set and validation set from the DATA
def reorg_train_vaild(data, vaild_ratio):
    l = len(data)
    # data = random.shuffle(data)
    n = l-int(l*vaild_ratio)
    train_data = data[:n]
    vaild_data = data[n:]
    return train_data, vaild_data

In [16]:
train_data, vaild_data = reorg_train_vaild(DATA, 0.1)

In [17]:
len(train_data), len(vaild_data)

(218080, 24231)

In [18]:
(len(train_data) + len(vaild_data)) == len(DATA)

True

In [19]:
train_data[0]

array([ 1.0000000e+00,  2.9456370e+21,  7.9943233e+00,  1.4509925e+07,
       -1.9568456e+11,  2.4651726e+07, -1.6436408e+08, -4.3989524e+07,
       -1.0778338e+07,  2.7425510e+02,  4.4113613e+04,  1.7417133e+02],
      dtype=float32)

In [20]:
train_data.shape, vaild_data.shape

((218080, 12), (24231, 12))

In [21]:
train_data[:, -3:]

array([[ 2.7425510e+02,  4.4113613e+04,  1.7417133e+02],
       [ 2.8381116e+02,  4.4112398e+04,  1.5363177e+02],
       [ 2.7865091e+02,  4.4125488e+04,  1.6021362e+02],
       ...,
       [ 6.5299835e+01,  4.4108121e+04,  1.9768300e+02],
       [-1.7201406e+01,  4.4019441e+04,  1.9535860e+02],
       [-2.2663403e+01,  4.4019906e+04,  2.0022131e+02]], dtype=float32)

In [22]:
# physcial quantity squence is ids mass rho ie pot sl pos*3 vel*3
def chose_input_output_phy_qua(data, phy_qua):
    assert phy_qua == "mass" or phy_qua == "rho" or phy_qua == "ie" or phy_qua == "pot" or phy_qua == "sl" or phy_qua == "pos" or phy_qua == "vel"
    if phy_qua == "mass":
        return data[:, 1]
    if phy_qua == "rho":
        return data[:, 2]
    if phy_qua == "ie":
        return data[:, 3]
    if phy_qua == "pot":
        return data[:, 4]
    if phy_qua == "rho":
        return data[:, 5]
    if phy_qua == "pos":
        return data[:, -6:-3]
    if phy_qua == "vel":
        return data[:, -3:]

In [23]:
train_data_pos = chose_input_output_phy_qua(train_data, "pos")
train_data_pot = chose_input_output_phy_qua(train_data, "pot")

In [24]:
vaild_data_pos = chose_input_output_phy_qua(vaild_data, "pos")
vaild_data_pot = chose_input_output_phy_qua(vaild_data, "pot")

In [25]:
train_data_pos.shape, train_data_pot.shape

((218080, 3), (218080,))

In [26]:
vaild_data_pos.shape, vaild_data_pot.shape

((24231, 3), (24231,))

In [27]:
train_data_pot = train_data_pot.reshape(-1, 1)

In [28]:
vaild_data_pot = vaild_data_pot.reshape(-1, 1)

In [29]:
train_data_pot.shape, vaild_data_pot.shape

((218080, 1), (24231, 1))

In [30]:
type(train_data_pos), type(train_data_pot), type(vaild_data_pos), type(vaild_data_pot)

(numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)

In [31]:
# norm function for numpy
def data_norm(data):
    mean = data.mean(axis=0, keepdims=True)
    std = data.std(axis=0, keepdims=True)
    return (data-mean)/std

In [32]:
train_data_pos_norm = data_norm(train_data_pos)
train_data_pot_norm = data_norm(train_data_pot)

In [33]:
train_data_pos_norm.shape, train_data_pot_norm.shape

((218080, 3), (218080, 1))

In [34]:
train_data_pos_norm.max(), train_data_pos_norm.min(), train_data_pot_norm.max(), train_data_pot_norm.min()

(3.0629683, -2.3754456, 2.1655304, -2.0422297)

In [35]:
vaild_data_pos_norm = data_norm(vaild_data_pos)
vaild_data_pot_norm = data_norm(vaild_data_pot)

In [36]:
vaild_data_pos_norm.shape, vaild_data_pot_norm.shape

((24231, 3), (24231, 1))

In [37]:
vaild_data_pos_norm.max(), vaild_data_pos_norm.min(), vaild_data_pot_norm.max(), vaild_data_pot_norm.min()

(2.5615253, -2.5647385, 1.7983176, -2.1992207)

In [38]:
# defined SPH output Dataset
class SPHDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    def __len__(self):
        return len(self.features)
    def __getitem__(self, item):
        return self.features[item], self.labels[item]

In [39]:
train_ds = SPHDataset(train_data_pos_norm, train_data_pot_norm)

In [40]:
len(train_ds), train_ds[0]

(218080,
 (array([-1.2238528 , -0.37931347, -0.07987393], dtype=float32),
  array([-0.6781142], dtype=float32)))

In [41]:
vaild_ds = SPHDataset(vaild_data_pos_norm, vaild_data_pot_norm)

In [42]:
len(vaild_ds), vaild_ds[0]

(24231,
 (array([ 0.00879525, -1.0692656 , -1.602093  ], dtype=float32),
  array([-1.3022025], dtype=float32)))

In [43]:
batch_size = 32

In [44]:
train_iter = DataLoader(train_ds, batch_size=batch_size, shuffle=True, drop_last=True)
vaild_iter = DataLoader(vaild_ds, batch_size=batch_size, shuffle=False, drop_last=False)

In [45]:
for i, (feature, label) in enumerate(train_iter):
    if(i < 1):
        print("input: ", feature, feature.shape)
        print("output: ", label, label.shape)
    else:
        break

input:  tensor([[ 7.2784e-02, -6.9264e-01, -2.0209e+00],
        [ 1.3619e+00, -5.3167e-01,  7.1643e-01],
        [-2.7818e-01,  8.1120e-01, -2.2547e-01],
        [-5.4424e-01, -1.1932e+00,  1.1267e+00],
        [-9.1771e-01,  3.2125e-01,  1.6513e+00],
        [-2.2987e+00,  2.6677e-01, -4.2079e-01],
        [-1.3040e+00, -1.8054e+00,  1.2028e-01],
        [-5.3721e-01,  1.2750e-01,  1.8284e+00],
        [ 1.0754e-01, -3.8399e-01, -1.2177e+00],
        [ 9.6819e-01, -1.3230e+00,  5.5644e-01],
        [-8.0251e-01, -9.1646e-01, -2.5240e-01],
        [-1.9497e+00, -6.0451e-01,  6.9974e-01],
        [-2.2589e+00,  4.9913e-01, -2.5884e-01],
        [-6.5395e-01, -6.6124e-01,  2.1727e+00],
        [ 8.3864e-05,  4.1491e-01,  1.8298e-01],
        [ 6.6842e-02,  1.1970e+00, -1.2999e+00],
        [-1.2292e+00, -1.1507e+00,  1.9986e-01],
        [ 1.3664e+00, -1.3482e+00,  1.3490e-01],
        [-6.5044e-01, -1.9805e-01, -1.0795e+00],
        [-2.4921e-02, -5.3118e-01,  6.0448e-01],
        [ 1.

In [46]:
for i, (feature, label) in enumerate(vaild_iter):
    if(i < 1):
        print("input: ", feature, feature.shape)
        print("output: ", label, label.shape)
        # print(label.shape[0])
    else:
        break

input:  tensor([[ 0.0088, -1.0693, -1.6021],
        [-0.0769, -1.0702, -1.4304],
        [-0.0567, -1.0745, -1.4163],
        [ 0.1248, -1.0745, -1.2706],
        [ 0.1246, -1.0745, -1.2705],
        [-0.1123, -1.1200, -1.0838],
        [-0.0707, -1.1223, -0.8347],
        [-0.0708, -1.1223, -0.8347],
        [-0.0536, -1.1079, -0.6372],
        [-0.0536, -1.1079, -0.6372],
        [-0.1204, -1.0929, -0.4649],
        [-0.0273, -1.0743, -0.2836],
        [-0.0932, -1.0950, -0.1286],
        [-0.0952, -1.0948, -0.1152],
        [-0.0620, -1.0956,  0.1140],
        [-0.0558, -1.0885,  0.0902],
        [-0.0421, -1.0774,  0.3002],
        [-0.0421, -1.0774,  0.3002],
        [-0.2098, -1.0637,  0.4722],
        [-0.1396, -1.1101,  0.6268],
        [-0.1395, -1.1101,  0.6268],
        [-0.2313, -1.0987,  0.7891],
        [-0.0563, -1.1016,  0.9311],
        [-0.1616, -1.1051,  1.1315],
        [-0.1599, -1.1048,  1.1294],
        [-0.1582, -1.1046,  1.1279],
        [-0.1330, -1.1142,  1.

In [47]:
len(train_iter), len(vaild_iter)

(6815, 758)

In [48]:
class MyNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(MyNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 10)
        self.fc2 = nn.Linear(10, 12)
        self.fc3 = nn.Linear(12, output_size)
        
    def forward(self, x):
        return self.fc3(F.tanh(self.fc2(F.tanh(self.fc1(x)))))

In [49]:
from visdom import Visdom

In [53]:
# def use_Visdom(visdom_ = True):
#     if visdom_:
#         viz = Visdom()
#         viz.line([0.], [0.], win='train_loss', opts=dict(title='train_loss'))
#         viz.line([[0.0, 1.0]], [0.], win='valid', opts=dict(title='valid_loss&acc.', legend=['loss', 'acc.']))

In [54]:
viz = Visdom()
viz.line([0.], [0.], win='train_loss', opts=dict(title='train_loss'))
viz.line([[0.0, 1.0]], [0.], win='valid', opts=dict(title='valid_loss&acc.', legend=['loss', 'acc.']))

Setting up a new session...


'valid'

In [55]:
global_step = 0. # 记录训练步长

In [56]:
# def get_gpu():
#     assert num_gpus() is not None
#     return gpu()

In [57]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cuda device


In [58]:
model = MyNetwork(3,1).to(device)

loss_fn = nn.MSELoss().to(device)

optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)

In [59]:
epochs = 10

In [60]:
target = 0.05

In [None]:
for epoch in range(epochs):
    model.train()  # train
    for batch_idx, (x, label) in enumerate(train_iter):
        x, label = x.to(device), label.to(device)
        y_hat = model(x)
        train_loss = loss_fn(y_hat, label)
        optimizer.zero_grad()
        train_loss.sum().backward()
        optimizer.step()
        loss = train_loss
        global_step += 1
        viz.line([loss.item()], [global_step], win='train_loss', update='append')
        if batch_idx % 200 == 0:    
            print(epoch, 'loss: {:.4f}'.format(loss.item()))
    model.eval()   # test
    with torch.no_grad():
        correct = 0
        test_loss = 0
        total_correct = 0
        total_num = 0
        test_loss_sum = 0
        for x, label in vaild_iter:
            x, label = x.to(device), label.to(device)
            logits = model(x)
            # label = torch.flatten(label, 1, -1)
            test_loss = loss_fn(logits, label)
            if test_loss < target:
                correct += 1
            test_loss_sum += test_loss 
            # correct = torch.eq(pred,label).float().sum().item()
            total_correct += correct
            total_num += x.size(0)
        test_loss_ = test_loss_sum / total_num
        acc = total_correct / total_num
        # print(type(test_loss), type(acc))
        # visdom 可视化
        viz.line([[test_loss_.item(), acc]], [global_step], win='valid', update='append')
            # if batch_idx % 100 == 0:
        print(epoch, 'test acc: {:.4f}%'.format(acc*100))
        print('test loss: {:.4f}'.format(test_loss))

0 loss: 0.0047
0 loss: 0.0044
0 loss: 0.0029
0 loss: 0.0051
0 loss: 0.0046
0 loss: 0.0025
0 loss: 0.0089
0 loss: 0.0052
0 loss: 0.0058
0 loss: 0.0052
0 loss: 0.0039
0 loss: 0.0055
0 loss: 0.0048
0 loss: 0.0032
0 loss: 0.0055
0 loss: 0.0052
0 loss: 0.0039
0 loss: 0.0037
0 loss: 0.0031
0 loss: 0.0045
0 loss: 0.0050
0 loss: 0.0037
0 loss: 0.0069
0 loss: 0.0036
0 loss: 0.0044
0 loss: 0.0032
0 loss: 0.0019
0 loss: 0.0042
0 loss: 0.0011
0 loss: 0.0031
0 loss: 0.0019
0 loss: 0.0040
0 loss: 0.0024
0 loss: 0.0036
0 loss: 0.0041
0 test acc: 41.9669%
test loss: 1.1648
1 loss: 0.0017
1 loss: 0.0051
1 loss: 0.0027
1 loss: 0.0044
1 loss: 0.0026
1 loss: 0.0039
1 loss: 0.0034
1 loss: 0.0025
1 loss: 0.0031
1 loss: 0.0026
1 loss: 0.0032
1 loss: 0.0023
1 loss: 0.0039
1 loss: 0.0032
1 loss: 0.0034
1 loss: 0.0020
1 loss: 0.0021
1 loss: 0.0017
1 loss: 0.0059
1 loss: 0.0021
1 loss: 0.0034
1 loss: 0.0035
1 loss: 0.0021
1 loss: 0.0016
1 loss: 0.0032
1 loss: 0.0025
1 loss: 0.0030
1 loss: 0.0035
1 loss: 0.0033
1