In [None]:
model = keras.Sequential([
        layers.Input(shape=(input_shape,)),
        layers.BatchNormalization(),
        layers.Dense(256, activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.001, l2=0.001)),
        layers.Dropout(0.4),
        layers.Dense(128, activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.001, l2=0.001)),
        layers.Dropout(0.3),
        layers.Dense(64, activation='relu'),
        layers.Dense(1, activation='linear')
    ])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='mse',
    metrics=['mae', tf.keras.metrics.R2Score()]
)
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'my_model.keras',
    monitor='val_loss',
    save_best_only=True
)

In [None]:
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.model_selection import KFold

# K-Fold Cross-Validation
def cross_validate_model(X, y, input_shape, n_splits=5):
    # Convert to numpy arrays to ensure indexing works
    X = np.array(X)
    y = np.array(y)

    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    cv_scores = []

    for train_index, val_index in kfold.split(X):
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            batch_size=32,
            callbacks=[early_stopping, model_checkpoint],
            verbose=1
        )

        val_r2 = model.evaluate(X_val, y_val)[2]  # R2 score index
        cv_scores.append(val_r2)

    return np.mean(cv_scores), np.std(cv_scores)
mean_cv_score, std_cv_score = cross_validate_model(X_train, y_train, input_shape)
print(f"Cross-Validation R2: {mean_cv_score:.4f} ± {std_cv_score:.4f}")


In [None]:
X_test_np = np.array(X_test)
y_test_np = np.array(y_test)
model.evaluate(X_test_np, y_test_np)
#print(f"Test Loss: {loss:.4f}")
#print(f"Test MAE: {mae:.4f}")
#print(f"Test r_score: {r_score:.4f}")


In [None]:
predictions = []
for current in X_test:
    reshape_current = current.reshape(1, -1)
    prediction = model.predict(reshape_current)
    predictions.append(prediction)

In [None]:
predictions.append(prediction)
predictions_on_training = []
for current in X_train:
    reshape_current = current.reshape(1, -1)
    prediction = model.predict(reshape_current)
    predictions_on_training.append(prediction)

In [None]:
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
predictions_list = [item[0][0] for item in predictions]
predictions_list_divided_by_two = [item[0][0]/2.0 for item in predictions]

prediction_on_training_list = [item[0][0] for item in predictions_on_training]
true_values_list = y_test.tolist()
true_values_list = y_test.tolist()
true_values_on_training_list = y_train.tolist()
all_sensitivities = y.tolist()

r, p_value = pearsonr(true_values_list, predictions_list)
plt.title(f'Scatter Plot with R={r:.2f}')
plt.scatter(true_values_list, predictions_list)
plt.show()


plt.hist(true_values_list, bins=5, color='skyblue', edgecolor='black')
plt.title("True values distribution")
plt.show()

plt.hist(predictions_list, bins=5, color='skyblue', edgecolor='black')
plt.title("Prediction distribution")
plt.show()

plt.hist(true_values_on_training_list, bins=5, color='skyblue', edgecolor='black')
plt.title("True values on training distribution")
plt.show()


plt.hist(prediction_on_training_list, bins=5, color='skyblue', edgecolor='black')
plt.title("Prediction on training distribution")
plt.show()


plt.hist(all_sensitivities, bins=5, color='skyblue', edgecolor='black')
plt.title("True All sensitivities distribution")
plt.show()

In [None]:
plt.hist(y_train, bins=20, edgecolor='black')
plt.title('Histogram of training data')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()


In [None]:
plt.hist(y_test, bins=20, edgecolor='black')
plt.title('Histogram of test data')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()
