ここで学習を行う

In [1]:
# Import
import numpy as np
import os
import gc
import tensorflow as tf
import random
from tqdm.notebook import tqdm



In [2]:
# Configure Strategy. Assume TPU...if not set default for GPU
tpu = None
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()

## Constants

学習設定

In [3]:
# Training
validation_files_amount = 1
data_new_load_interval = 6      # Local Training: None
train_files_delta = 15         # Local Training: None
epochs = 75                    # Local Training: 30
batch_size = 8192               # Local Training: 2048
learning_rate = 0.0022          # Local Training: 0.0005
verbose = 0

# Training Batches
train_batch_id_min = 100
train_batch_id_max = 190
train_batch_ids = [*range(train_batch_id_min, train_batch_id_max+1)]
np.random.shuffle(train_batch_ids)
print(train_batch_ids)

# Model Parameters
pulse_count = 96
feature_count = 7
lstm_units = 192
bin_num = 24

c_const = 0.299792458

# Data
base_dir = "/kaggle/input/lstmicecubedatasets/"
file_format = base_dir + 'pp_mpc96_n7_batch_{batch_id:d}.npz'

[127, 175, 184, 164, 189, 113, 152, 136, 185, 141, 130, 182, 190, 155, 163, 148, 178, 162, 166, 171, 138, 114, 108, 180, 172, 132, 140, 169, 128, 145, 137, 104, 129, 158, 121, 101, 173, 157, 170, 100, 107, 134, 176, 143, 150, 103, 133, 165, 160, 119, 186, 123, 117, 144, 120, 149, 183, 118, 142, 147, 159, 122, 125, 115, 153, 109, 167, 124, 110, 146, 135, 188, 187, 168, 106, 154, 139, 151, 126, 156, 131, 174, 181, 102, 111, 112, 177, 179, 161, 116, 105]


In [4]:
# Set Seed
seed = 4242
tf.random.set_seed(seed)
random.seed(seed)
np.random.seed(seed)

## Prepare Metric

誤差関数の設定。予測した方向ベクトル(azimuthとzenith)と真の方向ベクトルの内積をとって、その後角度にもどしたものの絶対値の平均をとっている。

In [5]:
def angular_dist_score(az_true, zen_true, az_pred, zen_pred):
    '''
    calculate the MAE of the angular distance between two directions.
    The two vectors are first converted to cartesian unit vectors,
    and then their scalar product is computed, which is equal to
    the cosine of the angle between the two vectors. The inverse 
    cosine (arccos) thereof is then the angle between the two input vectors
    
    Parameters:
    -----------
    
    az_true : float (or array thereof)
        true azimuth value(s) in radian
    zen_true : float (or array thereof)
        true zenith value(s) in radian
    az_pred : float (or array thereof)
        predicted azimuth value(s) in radian
    zen_pred : float (or array thereof)
        predicted zenith value(s) in radian
    
    Returns:
    --------
    
    dist : float
        mean over the angular distance(s) in radian
    '''
    
    if not (np.all(np.isfinite(az_true)) and
            np.all(np.isfinite(zen_true)) and
            np.all(np.isfinite(az_pred)) and
            np.all(np.isfinite(zen_pred))):
        raise ValueError("All arguments must be finite")
    
    # pre-compute all sine and cosine values
    sa1 = np.sin(az_true)
    ca1 = np.cos(az_true)
    sz1 = np.sin(zen_true)
    cz1 = np.cos(zen_true)
    
    sa2 = np.sin(az_pred)
    ca2 = np.cos(az_pred)
    sz2 = np.sin(zen_pred)
    cz2 = np.cos(zen_pred)
    
    # scalar product of the two cartesian vectors (x = sz*ca, y = sz*sa, z = cz)
    scalar_prod = sz1*sz2*(ca1*ca2 + sa1*sa2) + (cz1*cz2)
    
    # scalar product of two unit vectors is always between -1 and 1, this is against nummerical instability
    # that might otherwise occure from the finite precision of the sine and cosine functions
    scalar_prod =  np.clip(scalar_prod, -1, 1)
    
    # convert back to an angle (in radian)
    return np.average(np.abs(np.arccos(scalar_prod)))

## Define Azimuth and Zenith Bins

zenith,azimuthは連続値だが、回帰問題から分類問題として扱うため、２つの値を離散化している。

In [6]:
# Create Azimuth Edges
azimuth_edges = np.linspace(0, 2 * np.pi, bin_num + 1)
print(azimuth_edges)

# Create Zenith Edges
zenith_edges = []
zenith_edges.append(0)
for bin_idx in range(1, bin_num):
    zenith_edges.append(np.arccos(np.cos(zenith_edges[-1]) - 2 / (bin_num)))
zenith_edges.append(np.pi)
zenith_edges = np.array(zenith_edges)
print(zenith_edges)

[0.         0.26179939 0.52359878 0.78539816 1.04719755 1.30899694
 1.57079633 1.83259571 2.0943951  2.35619449 2.61799388 2.87979327
 3.14159265 3.40339204 3.66519143 3.92699082 4.1887902  4.45058959
 4.71238898 4.97418837 5.23598776 5.49778714 5.75958653 6.02138592
 6.28318531]
[0.         0.41113786 0.58568554 0.72273425 0.84106867 0.94796974
 1.04719755 1.1410209  1.23095942 1.31811607 1.40334825 1.48736624
 1.57079633 1.65422641 1.73824441 1.82347658 1.91063324 2.00057176
 2.0943951  2.19362291 2.30052398 2.41885841 2.55590711 2.73045479
 3.14159265]


## Supporting Functions

pred_to_angleは、学習に使用したモデルの出力層から
出力された24^2この成分を持つベクトルの重みと、離散化された方向ベクトルを重み付けして平均して合成ベクトルをつくり、それを方角に直して最終的な予測値とするときに使う。

y_to_angle_codeは、azimuthとzenithのペアと一対一対応した整数を生成する関数。

In [7]:
angle_bin_zenith0 = np.tile(zenith_edges[:-1], bin_num)
angle_bin_zenith1 = np.tile(zenith_edges[1:], bin_num)
angle_bin_azimuth0 = np.repeat(azimuth_edges[:-1], bin_num)
angle_bin_azimuth1 = np.repeat(azimuth_edges[1:], bin_num)

angle_bin_area = (angle_bin_azimuth1 - angle_bin_azimuth0) * (np.cos(angle_bin_zenith0) - np.cos(angle_bin_zenith1))
angle_bin_vector_sum_x = (np.sin(angle_bin_azimuth1) - np.sin(angle_bin_azimuth0)) * ((angle_bin_zenith1 - angle_bin_zenith0) / 2 - (np.sin(2 * angle_bin_zenith1) - np.sin(2 * angle_bin_zenith0)) / 4)
angle_bin_vector_sum_y = (np.cos(angle_bin_azimuth0) - np.cos(angle_bin_azimuth1)) * ((angle_bin_zenith1 - angle_bin_zenith0) / 2 - (np.sin(2 * angle_bin_zenith1) - np.sin(2 * angle_bin_zenith0)) / 4)
angle_bin_vector_sum_z = (angle_bin_azimuth1 - angle_bin_azimuth0) * ((np.cos(2 * angle_bin_zenith0) - np.cos(2 * angle_bin_zenith1)) / 4)

angle_bin_vector_mean_x = angle_bin_vector_sum_x / angle_bin_area
angle_bin_vector_mean_y = angle_bin_vector_sum_y / angle_bin_area
angle_bin_vector_mean_z = angle_bin_vector_sum_z / angle_bin_area

angle_bin_vector = np.zeros((1, bin_num * bin_num, 3))
angle_bin_vector[:, :, 0] = angle_bin_vector_mean_x
angle_bin_vector[:, :, 1] = angle_bin_vector_mean_y
angle_bin_vector[:, :, 2] = angle_bin_vector_mean_z

def pred_to_angle(pred, epsilon=1e-8):
    # convert prediction to vector　
    pred_vector = (pred.reshape((-1, bin_num * bin_num, 1)) * angle_bin_vector).sum(axis=1)
    
    # normalize
    pred_vector_norm = np.sqrt((pred_vector**2).sum(axis=1))
    mask = pred_vector_norm < epsilon
    pred_vector_norm[mask] = 1
    
    # assign <1, 0, 0> to very small vectors (badly predicted)
    pred_vector /= pred_vector_norm.reshape((-1, 1))
    pred_vector[mask] = np.array([1., 0., 0.])
    
    # convert to angle
    azimuth = np.arctan2(pred_vector[:, 1], pred_vector[:, 0])
    azimuth[azimuth < 0] += 2 * np.pi
    zenith = np.arccos(pred_vector[:, 2])
    
    return azimuth, zenith

def y_to_angle_code(batch_y):
    azimuth_code = (batch_y[:, 0] > azimuth_edges[1:].reshape((-1, 1))).sum(axis=0)
    zenith_code = (batch_y[:, 1] > zenith_edges[1:].reshape((-1, 1))).sum(axis=0)
    angle_code = bin_num * azimuth_code + zenith_code
    
    return angle_code

## Data Loading

In [8]:
def normalize_data(data):
    data[:, :, 0] /= 1000   # time
    data[:, :, 1] /= 300    # charge
    data[:, :, 3:6] /= 600 #space
    data[:, :, 6] /= 60 #vel
    
    return data

def prep_validation_data(validation_files_amount):
    print("Processing Validation Data...")

    # Prepare fixed Validation Set
    val_x = None
    val_y = None
    
    # Summary
    print(train_batch_ids[:validation_files_amount])

    # Loop
    for batch_id in tqdm(train_batch_ids[:validation_files_amount]):
        val_data_file = np.load(file_format.format(batch_id = batch_id))

        if val_x is None:
            val_x = val_data_file["x"][:, :, [0,1,2,3,4,5,7]]
            val_y = val_data_file["y"]
        else:
            val_x = np.append(val_x, val_data_file["x"][:, :, [0,1,2,3,4,5,7]], axis = 0)
            val_y = np.append(val_y, val_data_file["y"], axis = 0)

        val_data_file.close()
        del val_data_file
        _ = gc.collect()

    # Normalize Data
    val_x = normalize_data(val_x)

    # Shape Summary
    print(val_x.shape)
    
    return val_x, val_y

def prep_training_data(start_batch):
    print("Processing Training Data...")
    
    # Placeholders
    train_x = None
    train_y = None
    
    # Summary
    train_ids = random.sample(train_batch_ids[start_batch:], train_files_delta)
    print(train_ids)
    
    # Loop
    for batch_id in tqdm(train_ids):
        train_data_file = np.load(file_format.format(batch_id = batch_id))

        if train_x is None:
            train_x = train_data_file["x"][:, :, [0,1,2,3,4,5,7]]
            train_y = train_data_file["y"]
        else:
            train_x = np.append(train_x, train_data_file["x"][:, :, [0,1,2,3,4,5,7]], axis = 0)
            train_y = np.append(train_y, train_data_file["y"], axis = 0)

        train_data_file.close()
        del train_data_file
        _ = gc.collect()

    # Normalize data
    train_x = normalize_data(train_x)
    
    # Shape Summary
    print(train_x.shape)
    
    # Output Encoding
    trn_y_anglecode = y_to_angle_code(train_y)
        
    return train_x, trn_y_anglecode

## Model

In [9]:
def create_model():
    with strategy.scope(): 
        inputs = tf.keras.layers.Input((pulse_count, feature_count))
        
        x = tf.keras.layers.Masking(mask_value = 0., input_shape = (pulse_count, feature_count))(inputs)
        x = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(lstm_units, return_sequences = True))(x)
        x = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(lstm_units, return_sequences = True))(x)
        x = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(lstm_units))(x)        
        x = tf.keras.layers.Dense(256, activation = 'relu')(x)
        
        outputs = tf.keras.layers.Dense(bin_num**2, activation = 'softmax')(x)

        # Finalize Model
        model = tf.keras.models.Model(inputs = inputs, outputs = outputs)

        # Compile model
        model.compile(loss = 'sparse_categorical_crossentropy',
                      optimizer= tf.keras.optimizers.Adam(learning_rate = learning_rate),
                      metrics = ['accuracy'])
        
        # Show Model Summary
        model.summary()

        return model

## Train Model

In [None]:
# Get Fixed Validation Dataset
val_x, val_y = prep_validation_data(validation_files_amount)

# Create Model
model = create_model()

# For training other than Kaggle environment...provided enough RAM...Load all data
if data_new_load_interval is None and train_files_delta is None:
    print('\nLoading All Train Data')
    start_batch = validation_files_amount
    end_batch = start_batch + (len(train_batch_ids) - validation_files_amount)
    trn_x, trn_y_anglecode = prep_training_data(start_batch, end_batch)

# Epoch Loop
for e in range(epochs):
    print(f'=========== EPOCH: {e}')
    
    # Load new random batch of training files .. delta wise .. on Kaggle or Colab with limited RAM.
    if data_new_load_interval is not None and train_files_delta is not None and e % data_new_load_interval == 0:
        print(f'\nLoading Train Data at epoch: {e}')
        trn_x, trn_y_anglecode = prep_training_data(validation_files_amount)
    
    # Number of batches
    batch_count = trn_x.shape[0] // batch_size

    # Random Shuffle each epoch
    indices = np.arange(trn_x.shape[0])
    np.random.shuffle(indices)
    trn_x = trn_x[indices]
    trn_y_anglecode = trn_y_anglecode[indices]
        
    # Placeholder
    losses = []
    accuracy = []
        
    # Batch Loop
    for batch_index in tqdm(range(batch_count), total = batch_count):
        b_train_x = trn_x[batch_index * batch_size: batch_index * batch_size + batch_size,:]
        b_train_y = trn_y_anglecode[batch_index * batch_size: batch_index * batch_size + batch_size]
        
        metrics = model.train_on_batch(b_train_x, b_train_y)
        losses.append(metrics[0])
        accuracy.append(metrics[1])  
    
    # Save Model
    model.save(f'tpu_pp96_n{feature_count}_bin{bin_num}_batch{batch_size}_epoch{e}.h5')

    # Metrics
    valid_pred = model.predict(val_x, batch_size = batch_size, verbose = verbose)    
    valid_pred_azimuth, valid_pred_zenith = pred_to_angle(valid_pred)
    mae = angular_dist_score(val_y[:, 0], val_y[:, 1], valid_pred_azimuth, valid_pred_zenith)    
    print(f'Total Train Loss: {np.mean(losses):.4f}   Accuracy: {np.mean(accuracy):.4f}  MAE: {mae:.5f}')  
        
    # Memory Cleanup
    gc.collect()

Processing Validation Data...
[127]


  0%|          | 0/1 [00:00<?, ?it/s]

(200000, 96, 7)
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 96, 7)]           0         
                                                                 
 masking (Masking)           (None, 96, 7)             0         
                                                                 
 bidirectional (Bidirectiona  (None, 96, 384)          231552    
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 96, 384)          665856    
 nal)                                                            
                                                                 
 bidirectional_2 (Bidirectio  (None, 384)              665856    
 nal)                                                            
                                             

  0%|          | 0/15 [00:00<?, ?it/s]

(3000000, 96, 7)


  0%|          | 0/366 [00:00<?, ?it/s]

(3000000, 96, 7)


  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.2196   Accuracy: 0.1110  MAE: 1.05390


  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.1638   Accuracy: 0.1191  MAE: 1.04928


  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.1436   Accuracy: 0.1233  MAE: 1.04079


  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.1215   Accuracy: 0.1269  MAE: 1.03982


  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.0844   Accuracy: 0.1328  MAE: 1.03979


  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.0549   Accuracy: 0.1369  MAE: 1.03969

Loading Train Data at epoch: 30
Processing Training Data...
[149, 184, 189, 187, 151, 177, 106, 169, 158, 105, 115, 128, 133, 129, 114]


  0%|          | 0/15 [00:00<?, ?it/s]

(3000000, 96, 7)


  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.0958   Accuracy: 0.1311  MAE: 1.03605


  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.0346   Accuracy: 0.1407  MAE: 1.03666


  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.0381   Accuracy: 0.1404  MAE: 1.03409


  0%|          | 0/366 [00:00<?, ?it/s]

  0%|          | 0/366 [00:00<?, ?it/s]

  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.0543   Accuracy: 0.1381  MAE: 1.03332


  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.0723   Accuracy: 0.1352  MAE: 1.03346


  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.0099   Accuracy: 0.1455  MAE: 1.03261


  0%|          | 0/366 [00:00<?, ?it/s]

  0%|          | 0/366 [00:00<?, ?it/s]

Total Train Loss: 5.2494   Accuracy: 0.1065  MAE: 1.05844


  0%|          | 0/366 [00:00<?, ?it/s]