In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.linear_model import LinearRegression

# Load the base model
model_path = r'C:\Users\RDITLADI\Documents\AMPL\code\all_run_dir\lift_run_dir\results_lift_coefficient\saved_models\Cl_lift_estimate_top_0_model.keras'
base_model = tf.keras.models.load_model(model_path)

# Load dataset
data_path = r'c:\Users\RDITLADI\Documents\AMPL\code\all_run_dir\lift_run_dir\lift_data\sc1095_30AUG2023_noZeros.lift.csv'
data = pd.read_csv(data_path)

In [2]:
# Separate features and target variable
X = data[['Temp', 'LWC', 'Mach_env', 'AoA_env', 'Mach_c81', 'AoA_c81']]
y = data['Cl']

In [3]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


In [4]:
# Convert to numpy arrays for model compatibility
X_train = X_train.to_numpy()
y_train = y_train.to_numpy()
X_test = X_test.to_numpy()
y_test = y_test.to_numpy()

In [5]:
# Number of models for the Stacking ensemble
n_stacking_models = 10
n_features = X_train.shape[1]

# Store models
models = []

In [6]:
for i in range(n_stacking_models):
    # Randomly select features for each model
    selected_features = np.random.choice(n_features, size=int(np.sqrt(n_features)), replace=False)


In [7]:
# Resample the training data based on selected features
X_train_resampled, y_train_resampled = resample(X_train[:, selected_features], y_train, n_samples=len(y_train), random_state=i, replace=True)

In [8]:
# Define the model architecture to match the number of selected features
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(len(selected_features),)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')



In [9]:
 # Train the model on the resampled data
model.fit(X_train_resampled, y_train_resampled, epochs=50, verbose=0)

<keras.src.callbacks.history.History at 0x1dd188750a0>

In [10]:
# Generate predictions from each model
meta_features_train = np.zeros((X_train.shape[0], n_stacking_models))
meta_features_test = np.zeros((X_test.shape[0], n_stacking_models))

In [11]:
for i, (model, selected_features) in enumerate(models):
    meta_features_train[:, i] = model.predict(X_train[:, selected_features]).flatten()
    meta_features_test[:, i] = model.predict(X_test[:, selected_features]).flatten()


In [12]:
# Train a meta-model on the predictions
meta_model = LinearRegression()
meta_model.fit(meta_features_train, y_train)

In [13]:
# Generate final predictions using the meta-model
final_predictions = meta_model.predict(meta_features_test)

In [14]:
# Evaluate the ensemble
mse = mean_squared_error(y_test, final_predictions)
print(f'Ensemble MSE: {mse}')

Ensemble MSE: 0.8438903188747219
