In [None]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Bidirectional
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.regularizers import l2
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters

# Load the data: train, test.csv
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

# Assumed the sequence is main feature
X_train = train_data["sequence"].values
y_train = train_data["target"].values
X_test = test_data["sequence"].values
test_ids = test_data["id"].values

# Tokenized the sequences to model
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(X_train)

# Converting the sequences to sequences of integers here:
X_train_tokenized = tokenizer.texts_to_sequences(X_train)
X_test_tokenized = tokenizer.texts_to_sequences(X_test)

# Padding the sequences to make sure they have the same length
max_sequence_length = max(max(len(seq) for seq in X_train_tokenized), max(len(seq) for seq in X_test_tokenized))
X_train_padded = pad_sequences(X_train_tokenized, maxlen=max_sequence_length)
X_test_padded = pad_sequences(X_test_tokenized, maxlen=max_sequence_length)

# Preprocessing: Standarding the data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train_padded)
X_test_scaled = scaler.transform(X_test_padded)

# Reshaping the input data for LSTM
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

# Defining the model builder function for Keras Tuner
def build_model(hp):
    model = Sequential()
    model.add(Bidirectional(LSTM(units=hp.Int('units_lstm1', min_value=64, max_value=512, step=32),
                                 input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2]),
                                 return_sequences=True, kernel_regularizer=l2(0.001))))
    model.add(Dropout(hp.Float('dropout1', min_value=0.2, max_value=0.6, step=0.1)))
    model.add(Bidirectional(LSTM(units=hp.Int('units_lstm2', min_value=64, max_value=512, step=32),
                                 return_sequences=True, kernel_regularizer=l2(0.001))))
    model.add(Dropout(hp.Float('dropout2', min_value=0.2, max_value=0.6, step=0.1)))
    model.add(Bidirectional(LSTM(units=hp.Int('units_lstm3', min_value=64, max_value=512, step=32),
                                 kernel_regularizer=l2(0.001))))
    model.add(Dropout(hp.Float('dropout3', min_value=0.2, max_value=0.6, step=0.1)))
    model.add(Dense(units=1))

    # Compiling the model
    optimizer = Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4]))
    model.compile(optimizer=optimizer, loss='mse')
    return model

# Instantiate the RandomSearch tuner with enhanced:
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    # Optimizing for validation loss
    max_trials=20,
    # Increasing number of trials
    executions_per_trial=2,
    # Increasing number of executions per trial
    directory='keras_tuner_results',
    project_name='hyperparameter_tuning')

# Performing hyperparameter tuning
tuner.search(X_train_reshaped, y_train,
             epochs=100,
             validation_split=0.2,
             verbose=1)

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Making predictions
predictions = best_model.predict(X_test_reshaped)

# Creating a DataFrame for predictions with id column
prediction_df = pd.DataFrame({"id": test_ids, "target": predictions.flatten()})

# Save predictions to a CSV file
prediction_df.to_csv("predictions.csv", index=False)

Trial 20 Complete [00h 11m 04s]
val_loss: 18.942358016967773

Best val_loss So Far: 18.63950538635254
Total elapsed time: 02h 42m 22s
