In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.utils import resample

# Load the base model
model_path = r'c:\Users\RDITLADI\Documents\AMPL\code\all_run_dir\moment_run_dir\results_moment_coefficient\saved_models\Cm_moment_estimate_top_0_model.keras'
base_model = tf.keras.models.load_model(model_path)

# Load dataset
data_path = r'c:\Users\RDITLADI\Documents\AMPL\code\all_run_dir\moment_run_dir\moment_data\sc1095_30AUG2023_noZeros.moment.csv'
data = pd.read_csv(data_path)

In [2]:
# Separate features and target variable
X = data[['Temp', 'LWC', 'Mach_env', 'AoA_env', 'Mach_c81', 'AoA_c81']]
y = data['Cm']

In [3]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


In [4]:
# Convert to numpy arrays for model compatibility
X_train = X_train.to_numpy()
y_train = y_train.to_numpy()
X_test = X_test.to_numpy()
y_test = y_test.to_numpy()

In [5]:
# Number of models for the Random Forest ensemble
n_rf_models = 10
n_features = X_train.shape[1]

In [6]:
# Store models
models = []

In [7]:
for i in range(n_rf_models):
    # Randomly select features for each model
    selected_features = np.random.choice(n_features, size=int(np.sqrt(n_features)), replace=False)

In [8]:
    # Resample the training data
X_train_resampled, y_train_resampled = resample(X_train[:, selected_features], y_train, n_samples=len(y_train), random_state=i, replace=True)

In [9]:
    # Define the model architecture to match the number of selected features
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(len(selected_features),)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')



In [10]:
# Train the model on the resampled data
model.fit(X_train_resampled, y_train_resampled, epochs=50, verbose=0)

<keras.src.callbacks.history.History at 0x2e077cd2cf0>

In [11]:
# Store the trained model and the selected features
models.append((model, selected_features))

In [12]:
# Ensemble predictions on test data
ensemble_pred = np.zeros(len(y_test))
for model, selected_features in models:
    ensemble_pred += model.predict(X_test[:, selected_features]).flatten()


[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [13]:
ensemble_pred /= n_rf_models

In [14]:
# Evaluate the ensemble
mse = mean_squared_error(y_test, ensemble_pred)
print(f'Moment Random Forest Ensemble MSE: {mse}')

Ensemble MSE: 0.8469419506860143
