In [162]:
import numpy as np

# Load the numpy array from the file
data = np.load('/kaggle/input/training/training_data.npy')
valid_periods = np.load('/kaggle/input/training/valid_periods.npy')
data.shape

(48000, 2776)

In [164]:
new_data = []

num_valid_points = valid_periods[:, 1] - valid_periods[:, 0] + 1

for idx in range(len(data)):  # Use range instead of len
    start_idx = valid_periods[idx][0]

    while start_idx + 209 <= valid_periods[idx][1]:  # Adjust the condition
        # subarray of length 209 to be added to new data
        interval = data[idx, start_idx:start_idx + 209]
        new_data.append(interval)

        start_idx += 209

# Convert the list to a numpy array
data = np.array(new_data)

# Check the shape of the new data
print(data.shape)

(25156, 209)


In [153]:
# Calculate the number of valid points for each function
num_valid_points = valid_periods[:, 1] - valid_periods[:, 0] + 1

# Filter out functions with less than 210 valid points
filtered_data = data[num_valid_points >= 210]

# Check the shape of the filtered data
print(filtered_data.shape)
data = filtered_data

(21970, 2776)


In [165]:
# Shuffle the datasets in unison
perm = np.random.permutation(data.shape[0])
data_shuffled = data[perm]
valid_periods_shuffled = valid_periods[perm]

K = 10000
# Split into training and validation sets
validation_data = data_shuffled[:K]
validation_periods = valid_periods_shuffled[:K]

training_data = data_shuffled[K:]
training_periods = valid_periods_shuffled[K:]

In [166]:
def apply_valid_periods(data, valid_periods):
    """
    Modify each function in 'data' based on the corresponding 'valid_periods'.

    Parameters:
    data (numpy.ndarray): Array of functions, shape (n_samples, n_features).
    valid_periods (numpy.ndarray): Array of valid periods, shape (n_samples, 2).

    Returns:
    numpy.ndarray: Modified data array.
    """
    modified_data = np.zeros_like(data)
    n_samples, n_features = data.shape

    for i in range(n_samples):
        left, right = valid_periods[i]
        # Assuming 'left' and 'right' are indices in the range [0, n_features-1]
        # Adjust them if they are in a different format
        modified_data[i, left:right+1] = data[i, left:right+1]

    return modified_data

# Apply the function to your datasets
modified_training_data = apply_valid_periods(training_data, training_periods)
modified_validation_data = apply_valid_periods(validation_data, validation_periods)
#modified_test_data = apply_valid_periods(test_data, test_periods)


In [167]:
def extract_fixed_intervals(data, interval_length=209):
    """
    For each function in 'data', extract a random interval of 'interval_length'.
    If a function has fewer than 'interval_length' points, pad the rest with zeros.

    Parameters:
    data (numpy.ndarray): Array of functions, shape (n_samples, n_features).
    interval_length (int): Length of the interval to extract.

    Returns:
    numpy.ndarray: Data array with fixed-size intervals extracted.
    """
    n_samples, n_features = data.shape
    modified_data = np.zeros((n_samples, interval_length))

    for i in range(n_samples):
        nonzero_indices = np.nonzero(data[i])[0]

        # Check if there are enough nonzero points
        if nonzero_indices.size >= interval_length:
            start_idx = np.random.choice(nonzero_indices[:-interval_length+1])
            end_idx = start_idx + interval_length
            modified_data[i] = data[i, start_idx:end_idx]
        else:
            # If not enough points, just copy what is available
            modified_data[i, :nonzero_indices.size] = data[i, nonzero_indices]

    return modified_data

# Apply the function to your datasets
final_training_data = extract_fixed_intervals(modified_training_data)
final_validation_data = extract_fixed_intervals(modified_validation_data)
#final_test_data = extract_fixed_intervals(modified_test_data)


In [168]:
final_training_data.shape

(15156, 209)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, Attention, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, TerminateOnNaN, LearningRateScheduler

# Assuming final_training_data and final_validation_data are your datasets
# Reshape data for Transformer input
X_train = final_training_data[:, :-9]
y_train = final_training_data[:, -9:]

X_val = final_validation_data[:, :-9]
y_val = final_validation_data[:, -9:]

# Hyperparameters
d_model = 128  # Dimensionality of the model
num_heads = 64   # Number of attention heads
ff_dim = 128    # Dimensionality of the feed-forward layer

# Input layer
input_layer = Input(shape=(X_train.shape[1],))

# Transformer Encoder
x = Dense(d_model, activation='relu')(input_layer)
x = Dropout(0.1)(x)

x = Dense(ff_dim, activation='relu')(x)
x = Dropout(0.1)(x)

# Add layer normalization
x = LayerNormalization()(x)

# Output layer
output_layer = Dense(9)(x)

# Create and compile the model
model = Model(inputs=input_layer, outputs=output_layer)
optimizer = Adam(lr=0.01)  # Adjust the initial learning rate
model.compile(optimizer=optimizer, loss='mse')

# Callbacks
early_stopping = EarlyStopping(monitor='val_mse', patience=20, restore_best_weights=True)
terminate_on_nan = TerminateOnNaN()

# Learning rate scheduler
def lr_scheduler(epoch, lr):
    if epoch < 100:
        return lr
    else:
        return lr * tf.math.exp(-0.00002)

learning_rate_scheduler = LearningRateScheduler(lr_scheduler)

# Train the model
model.fit(
    X_train, 
    y_train, 
    validation_data=(X_val, y_val), 
    epochs=300, 
    batch_size=256,
    callbacks=[early_stopping, terminate_on_nan, learning_rate_scheduler]
)


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
from sklearn.metrics import mean_squared_error

# Reshape validation data (if required)
X_val_reshaped = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))

# Predict
predictions = model.predict(X_val_reshaped)

# Calculate MSE
mse = mean_squared_error(y_val, predictions)
print(f"Mean Squared Error: {mse}")

In [None]:
model.save(f'{mse}.h5')
