In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from tensorflow import keras
tf.config.run_functions_eagerly(False)

from tensorflow.keras.optimizers.schedules import CosineDecay
from sklearn.model_selection import GroupShuffleSplit
from tensorflow.keras import layers, models, losses, metrics
from tensorflow.keras import regularizers, callbacks
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LeakyReLU, Reshape, Conv1D, MaxPooling1D, LSTM, Dense, Dropout, BatchNormalization, Conv2D, MaxPooling2D, Flatten, Activation, Add, Input, GlobalAveragePooling1D, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras.utils import plot_model

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def add_remaining_useful_life(df):
    # Get the total number of cycles for each unit
    grouped_by_unit = df.groupby(by="unit_nr")
    max_cycle = grouped_by_unit["time_cycles"].max()

    # Merge the max cycle back into the original frame
    result_frame = df.merge(max_cycle.to_frame(name='max_cycle'), left_on='unit_nr', right_index=True)

    # Calculate remaining useful life for each row
    remaining_useful_life = result_frame["max_cycle"] - result_frame["time_cycles"]
    result_frame["RUL"] = remaining_useful_life

    # Drop max_cycle as it's no longer needed
    result_frame = result_frame.drop("max_cycle", axis=1)
    return result_frame

def add_operating_condition(df):
    df_op_cond = df.copy()

    df_op_cond['setting_1'] = abs(df_op_cond['setting_1'].round())
    df_op_cond['setting_2'] = abs(df_op_cond['setting_2'].round(decimals=2))

    # Converting settings to string and concatanating makes the operating condition into a categorical variable
    df_op_cond['op_cond'] = df_op_cond['setting_1'].astype(str) + '_' + \
                        df_op_cond['setting_2'].astype(str) + '_' + \
                        df_op_cond['setting_3'].astype(str)

    return df_op_cond

def condition_scaler(df_train, df_test, sensor_names):
    # Apply operating condition specific scaling
    scaler = StandardScaler()
    for condition in df_train['op_cond'].unique():
        scaler.fit(df_train.loc[df_train['op_cond']==condition, sensor_names])
        df_train.loc[df_train['op_cond']==condition, sensor_names] = scaler.transform(df_train.loc[df_train['op_cond']==condition, sensor_names])
        df_test.loc[df_test['op_cond']==condition, sensor_names] = scaler.transform(df_test.loc[df_test['op_cond']==condition, sensor_names])
    return df_train, df_test

def exponential_smoothing(df, sensors, n_samples, alpha=0.4):
    df = df.copy()
    # Take the exponential weighted mean
    df[sensors] = df.groupby('unit_nr')[sensors].apply(lambda x: x.ewm(alpha=alpha).mean()).reset_index(level=0, drop=True)

    # Drop first n_samples of each unit_nr to reduce filter delay
    def create_mask(data, samples):
        result = np.ones_like(data)
        result[0:samples] = 0
        return result

    mask = df.groupby('unit_nr')['unit_nr'].transform(create_mask, samples=n_samples).astype(bool)
    df = df[mask]

    return df

def gen_train_data(df, sequence_length, columns):
    data = df[columns].values
    num_elements = data.shape[0]

    for start, stop in zip(range(0, num_elements-(sequence_length-1)), range(sequence_length, num_elements+1)):
        yield data[start:stop, :]

def gen_data_wrapper(df, sequence_length, columns, unit_nrs=np.array([])):
    if unit_nrs.size <= 0:
        unit_nrs = df['unit_nr'].unique()

    data_gen = (list(gen_train_data(df[df['unit_nr']==unit_nr], sequence_length, columns))
               for unit_nr in unit_nrs)
    data_array = np.concatenate(list(data_gen)).astype(np.float32)
    return data_array

def gen_labels(df, sequence_length, label):
    data_matrix = df[label].values
    num_elements = data_matrix.shape[0]

    return data_matrix[sequence_length-1:num_elements, :]

def gen_label_wrapper(df, sequence_length, label, unit_nrs=np.array([])):
    if unit_nrs.size <= 0:
        unit_nrs = df['unit_nr'].unique()

    label_gen = [gen_labels(df[df['unit_nr']==unit_nr], sequence_length, label)
                for unit_nr in unit_nrs]
    label_array = np.concatenate(label_gen).astype(np.float32)
    return label_array

def gen_test_data(df, sequence_length, columns, mask_value):
    if df.shape[0] < sequence_length:
        data_matrix = np.full(shape=(sequence_length, len(columns)), fill_value=mask_value)
        idx = data_matrix.shape[0] - df.shape[0]
        data_matrix[idx:,:] = df[columns].values
    else:
        data_matrix = df[columns].values

    stop = data_matrix.shape[0]
    start = stop - sequence_length
    for i in list(range(1)):
        yield data_matrix[start:stop, :]


def get_data(dataset, sensors, sequence_length, alpha, threshold):
	dir_path = './CMAPSS/'
	train_file = 'train_'+dataset+'.txt'
	test_file = 'test_'+dataset+'.txt'

	index_names = ['unit_nr', 'time_cycles']
	setting_names = ['setting_1', 'setting_2', 'setting_3']
	sensor_names = ['s_{}'.format(i+1) for i in range(0,21)]
	col_names = index_names + setting_names + sensor_names

	train = pd.read_csv((dir_path+train_file), sep=r'\s+', header=None,
					 names=col_names)
	test = pd.read_csv((dir_path+test_file), sep=r'\s+', header=None,
					 names=col_names)
	y_test = pd.read_csv((dir_path+'RUL_'+dataset+'.txt'), sep=r'\s+', header=None,
					 names=['RemainingUsefulLife'])

	train = add_remaining_useful_life(train)
	train['RUL'].clip(upper=threshold, inplace=True)

  #Dropping sensors
	drop_sensors = [element for element in sensor_names if element not in sensors]

  # Scale with respect to the operating condition
	X_train_pre = add_operating_condition(train.drop(drop_sensors, axis=1))
	X_test_pre = add_operating_condition(test.drop(drop_sensors, axis=1))
	X_train_pre, X_test_pre = condition_scaler(X_train_pre, X_test_pre, sensors)

  # Exponential smoothing
	X_train_pre= exponential_smoothing(X_train_pre, sensors, 0, alpha)
	X_test_pre = exponential_smoothing(X_test_pre, sensors, 0, alpha)

  # Train/Validation split
	gss = GroupShuffleSplit(n_splits=1, train_size=0.80, random_state=42)

	for train_unit, val_unit in gss.split(X_train_pre['unit_nr'].unique(), groups=X_train_pre['unit_nr'].unique()):
		train_unit = X_train_pre['unit_nr'].unique()[train_unit]  # gss returns indexes and index starts at 1
		val_unit = X_train_pre['unit_nr'].unique()[val_unit]

		x_train = gen_data_wrapper(X_train_pre, sequence_length, sensors, train_unit)
		y_train = gen_label_wrapper(X_train_pre, sequence_length, ['RUL'], train_unit)

		x_val = gen_data_wrapper(X_train_pre, sequence_length, sensors, val_unit)
		y_val = gen_label_wrapper(X_train_pre, sequence_length, ['RUL'], val_unit)

	# Create sequences for test
	test_gen = (list(gen_test_data(X_test_pre[X_test_pre['unit_nr']==unit_nr], sequence_length, sensors, -99.))
			   for unit_nr in X_test_pre['unit_nr'].unique())
	x_test = np.concatenate(list(test_gen)).astype(np.float32)
	test_unit_ids = X_test_pre['unit_nr'].unique()

	return x_train, y_train, x_val, y_val, x_test, y_test['RemainingUsefulLife'], test_unit_ids

In [4]:
# Choose the subset (FD001, FD002, FD003, FD004)
dataset = 'FD002'

# Sensors to use; sensor 13 is dropped from FD002 and FD004
if(dataset == 'FD001' or 'FD003'):
  sensors = ['s_2', 's_3', 's_4', 's_7', 's_8', 's_9', 's_11', 's_12', 's_13', 's_14', 's_15', 's_17', 's_20', 's_21']
else:
  sensors = ['s_2', 's_3', 's_4', 's_7', 's_8', 's_9', 's_11', 's_12', 's_14', 's_15', 's_17', 's_20', 's_21']

sequence_length = 30
alpha = 0.3
rul_clip_threshold = 125

# Load and process the data
x_train, y_train, x_val, y_val, x_test, y_test, test_unit_ids = get_data(
    dataset=dataset,
    sensors=sensors,
    sequence_length=sequence_length,
    alpha=alpha,
    threshold=rul_clip_threshold
)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train['RUL'].clip(upper=threshold, inplace=True)
  2.68156725]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_train.loc[df_train['op_cond']==condition, sensor_names] = scaler.transform(df_train.loc[df_train['op_cond']==condition, sensor_names])
 -0.1674041 ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_test.loc[df_test['op_cond']==condition, sensor_names] = scaler.transform(df_test.loc[df_test['op_cond']==condition, sensor_names])


In [None]:
def create_lstm_model(input_shape):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Masking(mask_value=0.0),
        layers.LSTM(64, return_sequences=True),
        layers.LSTM(32),
        layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError()])
    return model

rul_model = create_lstm_model(input_shape=(sequence_length, len(sensors)))

history = rul_model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    batch_size=64,
    epochs=20
)

predictions = rul_model.predict(x_test).flatten()
rmse = np.sqrt(np.mean((predictions - y_test)**2))
print(f"Test RMSE: {rmse:.2f}")

Epoch 1/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 55ms/step - loss: 6817.8452 - root_mean_squared_error: 82.5035 - val_loss: 4461.7427 - val_root_mean_squared_error: 66.7963
Epoch 2/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 53ms/step - loss: 4305.6074 - root_mean_squared_error: 65.5862 - val_loss: 2885.5969 - val_root_mean_squared_error: 53.7178
Epoch 3/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 52ms/step - loss: 2795.3992 - root_mean_squared_error: 52.8506 - val_loss: 1843.4581 - val_root_mean_squared_error: 42.9355
Epoch 4/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 53ms/step - loss: 1804.5582 - root_mean_squared_error: 42.4540 - val_loss: 1165.2659 - val_root_mean_squared_error: 34.1360
Epoch 5/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 53ms/step - loss: 1122.4259 - root_mean_squared_error: 33.4884 - val_loss: 763.6387 - val_root_mean_squared_error:

In [None]:
class PositionalEncodingLayer(tf.keras.layers.Layer):
    def call(self, x):
        seq_len = tf.shape(x)[1]
        d_model = tf.shape(x)[2]
        d_model_int = x.shape[-1]  # Static shape for slicing

        position = tf.cast(tf.range(seq_len)[:, tf.newaxis], dtype=tf.float32)
        div_term = tf.exp(
            tf.cast(tf.range(0, d_model_int, 2), tf.float32) *
            -(tf.math.log(10000.0) / tf.cast(d_model_int, tf.float32))
        )
        angle_rads = position * div_term

        sines = tf.sin(angle_rads)
        cosines = tf.cos(angle_rads)

        # Interleave sines and cosines
        pos_encoding = tf.reshape(
            tf.stack([sines, cosines], axis=-1),
            (seq_len, -1)
        )

        # Slice in case of dimension mismatch
        pos_encoding = pos_encoding[:, :d_model_int]

        pos_encoding = pos_encoding[tf.newaxis, ...]  # (1, seq_len, d_model)
        return x + pos_encoding

    def compute_output_shape(self, input_shape):
        return input_shape



# Self-attention block
def self_attention_block(x, num_heads=2, key_dim=32):
    attn_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(x, x)
    x = layers.Add()([x, attn_output])
    x = layers.LayerNormalization()(x)
    return x

# Final model
def create_advanced_cnn_model(input_shape):
    inputs = layers.Input(shape=input_shape)

    # Add positional encoding
    x = PositionalEncodingLayer()(inputs)

    # Dilated Convolutional Layers
    x = layers.Conv1D(128, kernel_size=3, dilation_rate=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv1D(128, kernel_size=3, dilation_rate=2, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv1D(128, kernel_size=3, dilation_rate=4, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)

    # Self-Attention
    x = self_attention_block(x)

    # Global pooling + Dense layers
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.2)(x)

    outputs = layers.Dense(1)(x)

    model = models.Model(inputs, outputs)
    model.compile(
        optimizer='adam',
        loss=losses.Huber(),
        metrics=[metrics.RootMeanSquaredError()]
    )
    return model


In [None]:
cnn_model = create_advanced_cnn_model(input_shape=(x_train.shape[1], x_train.shape[2]))

history = cnn_model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=20,
    batch_size=64,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
    ]
)

predictions = cnn_model.predict(x_test).flatten()
test_rmse = np.sqrt(np.mean((predictions - y_test) ** 2))
print(f"Test RMSE: {test_rmse:.2f}")

Epoch 1/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 60ms/step - loss: 84.2407 - root_mean_squared_error: 111.7238 - val_loss: 31.0359 - val_root_mean_squared_error: 38.0031
Epoch 2/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 55ms/step - loss: 30.7822 - root_mean_squared_error: 39.1894 - val_loss: 20.6228 - val_root_mean_squared_error: 26.0661
Epoch 3/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 55ms/step - loss: 23.1302 - root_mean_squared_error: 29.4339 - val_loss: 13.2701 - val_root_mean_squared_error: 19.0806
Epoch 4/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 53ms/step - loss: 16.5938 - root_mean_squared_error: 21.9889 - val_loss: 12.4040 - val_root_mean_squared_error: 16.7813
Epoch 5/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 53ms/step - loss: 14.9373 - root_mean_squared_error: 20.0033 - val_loss: 11.5782 - val_root_mean_squared_error: 17.2036
Epoch 6/2

In [None]:
def residual_block(x, filters, kernel_size, dilation_rate=1):
    shortcut = x
    # If the input and output shapes are different, apply a 1x1 convolution to match the dimensions
    if x.shape[-1] != filters:
        shortcut = layers.Conv1D(filters, kernel_size=1, padding='same')(shortcut)

    # Convolution layers
    x = layers.Conv1D(filters, kernel_size, padding='same', dilation_rate=dilation_rate, activation='relu')(x)
    x = layers.LayerNormalization()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Conv1D(filters, kernel_size, padding='same', dilation_rate=dilation_rate)(x)
    x = layers.LayerNormalization()(x)

    # Add the residual (skip connection)
    x = layers.Add()([shortcut, x])
    x = layers.Activation('relu')(x)
    return x


def transformer_block(x, num_heads=2, key_dim=32):
    attn_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(x, x)
    x = layers.Add()([x, attn_output])
    x = layers.LayerNormalization()(x)

    ff = layers.Dense(x.shape[-1] * 4, activation='relu')(x)
    ff = layers.Dropout(0.2)(ff)
    ff = layers.Dense(x.shape[-1])(ff)

    x = layers.Add()([x, ff])
    x = layers.LayerNormalization()(x)
    return x

def create_cnn_transformer_model(input_shape):
    inputs = layers.Input(shape=input_shape)

    # CNN
    x = residual_block(inputs, 64, kernel_size=3)
    x = residual_block(x, 128, kernel_size=3, dilation_rate=2)
    x = residual_block(x, 128, kernel_size=3, dilation_rate=4)

    # Transformer
    x = transformer_block(x, num_heads=4, key_dim=32)

    # Output
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(1)(x)

    model = models.Model(inputs, outputs)
    model.compile(
        optimizer='adam',
        loss=losses.Huber(),
        metrics=[metrics.RootMeanSquaredError()]
    )
    return model

In [None]:
cnn_model = create_cnn_transformer_model(input_shape=(x_train.shape[1], x_train.shape[2]))

history = cnn_model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=20,
    batch_size=64,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
    ]
)

predictions = cnn_model.predict(x_test).flatten()
test_rmse = np.sqrt(np.mean((predictions - y_test) ** 2))
print(f"Test RMSE: {test_rmse:.2f}")

Epoch 1/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 145ms/step - loss: 30.7924 - root_mean_squared_error: 41.4033 - val_loss: 12.4341 - val_root_mean_squared_error: 17.0099
Epoch 2/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 134ms/step - loss: 13.0117 - root_mean_squared_error: 17.9854 - val_loss: 10.3699 - val_root_mean_squared_error: 16.5411
Epoch 3/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 131ms/step - loss: 11.9797 - root_mean_squared_error: 16.8088 - val_loss: 11.0178 - val_root_mean_squared_error: 16.8527
Epoch 4/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 132ms/step - loss: 11.4345 - root_mean_squared_error: 16.1369 - val_loss: 11.8163 - val_root_mean_squared_error: 18.1172
Epoch 5/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 128ms/step - loss: 10.9938 - root_mean_squared_error: 15.4612 - val_loss: 10.3930 - val_root_mean_squared_error: 16.5231
Epoc

In [None]:
# Temporal Convolutional Block (with residual connections)
def tcn_block(x, filters, kernel_size, dilation_rate):
    shortcut = x
    # If the number of filters in the input doesn't match the output, apply a 1x1 convolution
    if x.shape[-1] != filters:
        shortcut = layers.Conv1D(filters, 1, padding='same')(x)

    # Apply the dilated convolution
    x = layers.Conv1D(filters, kernel_size, padding='causal', dilation_rate=dilation_rate, activation='relu')(x)
    x = layers.LayerNormalization()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Conv1D(filters, kernel_size, padding='causal', dilation_rate=dilation_rate)(x)
    x = layers.LayerNormalization()(x)

    # Residual connection
    x = layers.Add()([shortcut, x])
    x = layers.Activation('relu')(x)
    return x

# TCN Model (without Time2Vec)
def create_tcn_model(input_shape):
    inputs = layers.Input(shape=input_shape)

    # Apply TCN Blocks
    x = tcn_block(inputs, 64, kernel_size=3, dilation_rate=1)
    x = tcn_block(x, 128, kernel_size=3, dilation_rate=2)
    x = tcn_block(x, 128, kernel_size=3, dilation_rate=4)

    # Global pooling and output layers
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(1)(x)

    model = models.Model(inputs, outputs)
    model.compile(
        optimizer='adam',
        loss=losses.Huber(),
        metrics=[metrics.RootMeanSquaredError()]
    )
    return model


In [None]:
cnn_model = create_tcn_model(input_shape=(x_train.shape[1], x_train.shape[2]))

# Training the model
history = cnn_model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=20,
    batch_size=64,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)]
)

# Prediction
predictions = cnn_model.predict(x_test).flatten()
test_rmse = np.sqrt(np.mean((predictions - y_test) ** 2))
print(f"Test RMSE: {test_rmse}")


Epoch 1/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 77ms/step - loss: 26.0872 - root_mean_squared_error: 35.6328 - val_loss: 13.0142 - val_root_mean_squared_error: 18.8114
Epoch 2/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 70ms/step - loss: 13.2673 - root_mean_squared_error: 18.6690 - val_loss: 12.7163 - val_root_mean_squared_error: 19.2424
Epoch 3/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 69ms/step - loss: 11.5683 - root_mean_squared_error: 16.4082 - val_loss: 14.1488 - val_root_mean_squared_error: 21.3448
Epoch 4/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 68ms/step - loss: 10.8874 - root_mean_squared_error: 15.5387 - val_loss: 12.6933 - val_root_mean_squared_error: 19.0816
Epoch 5/20
[1m585/585[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 70ms/step - loss: 10.1450 - root_mean_squared_error: 14.4437 - val_loss: 12.9155 - val_root_mean_squared_error: 18.9751
Epoch 6/20