In [None]:
import pandas as pd
import numpy as np
data=pd.read_csv("wind_data.csv")

In [None]:
print(data.head())
print(data.shape)

In [None]:
data.isnull().sum()

In [None]:
data=data.dropna()
data.isnull().sum()

In [None]:
data.shape

In [None]:
data[['hour', 'minute']] = data['Tmstamp'].str.split(':', expand=True)
data['hour'] = pd.to_numeric(data['hour'])
data['minute'] = pd.to_numeric(data['minute'])
data.drop(columns=['Tmstamp'], inplace=True)
data.info()

In [None]:
desired_order = [
    'TurbID', 'Day', 'hour', 'minute','Wspd', 'Wdir', 'Etmp', 'Itmp', 'Ndir',
    'Pab1', 'Pab2', 'Pab3', 'Prtv', 'Patv']
data = data.reindex(columns=desired_order)
print(data)

In [None]:
import pandas as pd
import numpy as np
def drop_outliers_iqr(dataframe):
    q1 = dataframe.quantile(0.25)
    q3 = dataframe.quantile(0.75)
    iqr = q3 - q1
    #1.5 is threshold
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    outliers = ((dataframe < lower_bound) | (dataframe > upper_bound)).any(axis=1)
    cleaned_dataframe = dataframe[~outliers]
    return cleaned_dataframe
cleaned_df = drop_outliers_iqr(data)
print("Original DataFrame:")
print(data)
print("\nDataFrame after dropping outliers:")
print(cleaned_df)

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
cleaned_df_normalized = scaler.fit_transform(cleaned_df)
cleaned_df_normalized_df = pd.DataFrame(cleaned_df_normalized, columns=cleaned_df.columns)
print(cleaned_df_normalized_df)

In [None]:
x=cleaned_df_normalized_df.drop(columns=['Patv'])
y=cleaned_df_normalized_df["Patv"]

In [None]:
# wrapper method
# Recursive Feature Elimination (RFE)
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
model = LinearRegression()
rfe = RFE(model, n_features_to_select=6) 
rfe = rfe.fit(x, y)
# Get selected features
selected_features = x.columns[rfe.support_]
df_reduced = x[selected_features]
print(df_reduced)

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(df_reduced,y,test_size=0.1,random_state=42)
x_train.shape,y_test.shape

# vae lstm

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models 
import numpy as np
latent_dim = 6
input_shape = 6
conv_input_shape = (input_shape, 1)

# Encoder
encoder_inputs = layers.Input(shape=conv_input_shape)
x = layers.Conv1D(128, kernel_size=3, activation='elu', padding='same')(encoder_inputs)
x = layers.Conv1D(64, kernel_size=3, activation='elu', padding='same')(x)
x = layers.Conv1D(32, kernel_size=3, activation='elu', padding='same')(x)
x = layers.Flatten()(x)
x = layers.Dense(256, activation=tf.nn.leaky_relu)(x)  # Leaky ReLU activation
x = layers.Dense(128, activation=tf.nn.leaky_relu)(x)  # Leaky ReLU activation
x = layers.Dense(64, activation=tf.nn.leaky_relu)(x)  # Leaky ReLU activation
z_mean = layers.Dense(latent_dim)(x)
z_log_var = layers.Dense(latent_dim)(x)

# Sampler
def sampling(args):
    z_mean, z_log_var = args
    epsilon = tf.random.normal(shape=tf.shape(z_mean))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon
z = layers.Lambda(sampling)([z_mean, z_log_var])

# Decoder
decoder_inputs = layers.Input(shape=(latent_dim,))
x = layers.Dense(64, activation=tf.nn.leaky_relu)(decoder_inputs)  # Leaky ReLU activation
x = layers.Dense(128, activation=tf.nn.leaky_relu)(x)  # Leaky ReLU activation
x = layers.Dense(256, activation=tf.nn.leaky_relu)(x)  # Leaky ReLU activation
x = layers.Dense(input_shape * 32, activation=tf.nn.leaky_relu)(x)  # To match the flattened Conv1D output
x = layers.Reshape((input_shape, 32))(x)
x = layers.Conv1D(32, kernel_size=3, activation='elu', padding='same')(x)
x = layers.Conv1D(64, kernel_size=3, activation='elu', padding='same')(x)
x = layers.Conv1D(128, kernel_size=3, activation='elu', padding='same')(x)
decoder_outputs = layers.Conv1D(1, kernel_size=3, activation='sigmoid', padding='same')(x)
decoder_outputs = layers.Flatten()(decoder_outputs)

encoder = models.Model(encoder_inputs, [z_mean, z_log_var, z], name='encoder')
decoder = models.Model(decoder_inputs, decoder_outputs, name='decoder')

outputs = decoder(encoder(encoder_inputs)[2])
vae = models.Model(encoder_inputs, outputs, name='vae')

# Loss
def vae_loss(x, outputs):
    x_decoded_mean = outputs
    z_log_var = outputs[1]
    z_mean = outputs[0]
    
    reconstruction_loss = tf.reduce_mean(tf.keras.losses.mean_squared_error(x, x_decoded_mean), axis=-1)
    kl_loss = -0.5 * tf.reduce_mean(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=-1)
    return tf.reduce_mean(reconstruction_loss + kl_loss)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

vae.compile(optimizer=optimizer, loss=vae_loss)

vae.fit(x_train, x_train, epochs=200, batch_size=64, validation_data=(x_test, x_test))

latent_train = encoder.predict(x_train)[2]  # Extracting only the z vector
reconstructed_data = decoder.predict(latent_train)
latent_train_array = np.array(latent_train)
sequence_length = latent_train_array.shape[1]
print("Sequence Length:", sequence_length)

In [None]:
# reconstruction loss

In [None]:
reconstructed_test = vae.predict(x_test)
reconstruction_errors_test = np.mean(np.square(x_test - reconstructed_test), axis=1)
print(f"Mean Reconstruction Error for test data: {np.mean(reconstruction_errors_test):.4f}")
reconstructed_train = vae.predict(x_train)
reconstruction_errors_train = np.mean(np.square(x_train - reconstructed_train), axis=1)
print(f"Mean Reconstruction Error for train data: {np.mean(reconstruction_errors_train):.4f}")

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error
original_data_array = x_train.values
original_data_flat = original_data_array.flatten()
reconstructed_data_flat = reconstructed_data.flatten()
mse = mean_squared_error(original_data_flat, reconstructed_data_flat)
print("Mean Squared Error (MSE):", mse)
rmse = np.sqrt(mse)
print("Root Mean Squared Error (RMSE):", rmse)

In [None]:
latent_train_reshaped = latent_train_array.reshape(latent_train_array.shape[0], latent_train_array.shape[1],1)
print(latent_train_reshaped.shape)

In [None]:
latent_test = encoder.predict(x_test)[2]
latent_test_array = np.array(latent_test)
latent_test_reshaped = latent_test_array.reshape(latent_test_array.shape[0], latent_test_array.shape[1],1)
print(latent_test_reshaped.shape)

In [None]:

from sklearn.model_selection import KFold
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Prepare data
latent_data = np.concatenate((latent_train_reshaped, latent_test_reshaped), axis=0)
target_data = np.concatenate((y_train, y_test), axis=0)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

fold = 1
for train_index, test_index in kf.split(latent_data):
    print(f"Training on Fold {fold}...")
    
    x_train_fold, x_test_fold = latent_data[train_index], latent_data[test_index]
    y_train_fold, y_test_fold = target_data[train_index], target_data[test_index]
    
    # Define LSTM model
    vae_lstm = Sequential()
    vae_lstm.add(LSTM(units=150, activation='relu', return_sequences=True, input_shape=(6, 1)))
    vae_lstm.add(Dropout(0.2))
    vae_lstm.add(LSTM(units=150, activation='relu'))
    vae_lstm.add(Dense(units=1))
    
    optimizer = Adam(learning_rate=0.0001)
    vae_lstm.compile(optimizer=optimizer, loss='mean_squared_error')
    
    # Train model
    vae_lstm.fit(x_train_fold, y_train_fold, epochs=10, batch_size=8, verbose=1)
    
    # Evaluate model
    loss = vae_lstm.evaluate(x_test_fold, y_test_fold)
    print(f"Fold {fold} - Test Loss: {loss}")
    
    # Predictions and metrics
    y_pred = vae_lstm.predict(x_test_fold)
    mse = mean_squared_error(y_test_fold, y_pred)
    mae = mean_absolute_error(y_test_fold, y_pred)
    r2 = r2_score(y_test_fold, y_pred)
    
    print(f"Fold {fold} - Mean Squared Error (MSE):", mse)
    print(f"Fold {fold} - Mean Absolute Error (MAE):", mae)
    print(f"Fold {fold} - R-squared (R2) Score:", r2)
    
    fold += 1


In [None]:
vae_lstm.save('vae_lstm.h5')
# from tensorflow.keras.models import load_model
# vae_lstm = load_model('vae_lstm.h5')

# lstm

In [None]:
x_train = x_train.values
y_train = y_train.values
feature=1
x_train = x_train.reshape((x_train.shape[0],x_train.shape[1],feature))
print(x_train.shape)
x_test = x_test.values
y_test = y_test.values
x_test = x_test.reshape((x_test.shape[0],x_test.shape[1],feature))

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
lstm = Sequential()
lstm.add(LSTM(units=150, activation='relu', return_sequences=True, input_shape=(6,1)))
lstm.add(Dropout(0.2))
lstm.add(LSTM(units=150, activation='relu'))
lstm.add(Dense(units=1))
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
lstm.compile(optimizer=optimizer, loss='mean_squared_error')
lstm.fit(x_train, y_train, epochs=10, batch_size=8)
loss = lstm.evaluate(x_test, y_test)
print("Test Loss:", loss)
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
y_pred = lstm.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error (MSE):", mse)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
r2 = r2_score(y_test, y_pred)
print("R-squared (R2) Score:", r2)

In [None]:
lstm.save('lstm.h5')
# from tensorflow.keras.models import load_model
# lstm = load_model('lstm.h5')

# Theoharidis, et. al., 2023 (VAE-ConvLSTM)

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Input, Dense, Lambda, Conv1D, Flatten, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.losses import mean_squared_error
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression

input_shape = (6, 1)
latent_dim = 16

def build_vae(input_shape, latent_dim):
    inputs = Input(shape=input_shape)

    # VAE Encoder
    x = Conv1D(32, 3, activation='relu', padding='same')(inputs)
    x = Conv1D(64, 3, activation='relu', padding='same')(x)
    x = Flatten()(x)

    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)

    # Sampling function
    def sampling(args):
        z_mean, z_log_var = args
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.0)
        return z_mean + K.exp(0.5 * z_log_var) * epsilon

    z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')

    # VAE Decoder
    latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
    x = Dense(np.prod(input_shape))(latent_inputs)
    x = Reshape(input_shape)(x)
    x = Conv1D(64, 3, activation='relu', padding='same')(x)
    outputs = Conv1D(1, 3, activation='sigmoid', padding='same')(x)

    decoder = Model(latent_inputs, outputs, name='decoder')

    outputs = decoder(z)

    vae = Model(inputs, outputs, name='vae')

    # VAE Loss Calculation
    def vae_loss(inputs, outputs):
        reconstruction_loss = K.mean(K.square(inputs - outputs))
        kl_loss = -0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var))
        return reconstruction_loss + kl_loss

    vae.add_loss(vae_loss(inputs, outputs))
    vae.compile(optimizer='adam')

    return encoder, decoder, vae

encoder, decoder, vae = build_vae(input_shape, latent_dim)
vae.summary()

X = df_reduced.values.reshape((df_reduced.shape[0], 6, 1))
y = cleaned_df_normalized_df['Patv'].values

kf = KFold(n_splits=5, shuffle=True, random_state=42)
mse_scores, mae_scores, r2_scores = [], [], []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    vae.fit(X_train, X_train, epochs=50, batch_size=32, verbose=1)

    X_train_encoded = encoder.predict(X_train)[2]
    X_test_encoded = encoder.predict(X_test)[2]

    conv_input = Input(shape=(X_train_encoded.shape[1],))
    dense = Dense(64, activation='relu')(conv_input)
    output = Dense(1)(dense)
    conv_model = Model(conv_input, output)
    conv_model.compile(optimizer='adam', loss='mse')

    conv_model.fit(X_train_encoded, y_train, epochs=50, batch_size=32, verbose=1)

    predictions = conv_model.predict(X_test_encoded)
    mse_scores.append(mean_squared_error(y_test, predictions))
    mae_scores.append(mean_absolute_error(y_test, predictions))
    r2_scores.append(r2_score(y_test, predictions))

print("Mean MSE across folds:", np.mean(mse_scores))
print("Mean MAE across folds:", np.mean(mae_scores))
print("Mean R2 across folds:", np.mean(r2_scores))


# Kaur, et. al., 2023 (Bayesian BiLSTM)

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Input, Dense, Lambda, Conv1D, Flatten, Reshape, Bidirectional, LSTM
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.losses import mean_squared_error
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression

input_shape = (6, 1)
latent_dim = 2

# VAE model definition
def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.0)
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

def build_vae(input_shape, latent_dim):
    inputs = Input(shape=input_shape)

    # Encoder
    x = Conv1D(32, kernel_size=3, activation='relu', padding='same')(inputs)
    x = Flatten()(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)

    z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')

    # Decoder
    latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
    x = Dense(np.prod(input_shape), activation='relu')(latent_inputs)
    x = Reshape(input_shape)(x)
    x = Conv1D(32, kernel_size=3, activation='relu', padding='same')(x)
    outputs = Dense(1, activation='sigmoid')(x)

    decoder = Model(latent_inputs, outputs, name='decoder')

    # VAE model
    outputs = decoder(encoder(inputs)[2])
    vae = Model(inputs, outputs, name='vae')

    # Loss calculation
    reconstruction_loss = mean_squared_error(K.flatten(inputs), K.flatten(outputs))
    reconstruction_loss *= np.prod(input_shape)
    kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    vae_loss = K.mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)

    vae.compile(optimizer=Adam())

    return encoder, decoder, vae

# Build the VAE model
encoder, decoder, vae = build_vae(input_shape, latent_dim)

# Prepare data for training
X = df_reduced.values.reshape((df_reduced.shape[0], 6, 1))
y = cleaned_df_normalized_df['Patv'].values

# 5-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

mae_scores = []
mse_scores = []
r2_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train VAE
    vae.fit(X_train, X_train, epochs=50, batch_size=32, verbose=1)
    
    # Encode training data and test data
    z_train, _, _ = encoder.predict(X_train)
    z_test, _, _ = encoder.predict(X_test)
    
    # Bayesian BiLSTM model
    def build_bayesian_bilstm(input_shape):
        inputs = Input(shape=input_shape)
        x = Bidirectional(LSTM(64, return_sequences=True))(inputs)
        x = Bidirectional(LSTM(32))(x)
        outputs = Dense(1, activation='linear')(x)
        model = Model(inputs, outputs)
        model.compile(optimizer='adam', loss='mse')
        return model
    
    bilstm = build_bayesian_bilstm((latent_dim, 1))
    
    # Reshape latent vectors for BiLSTM input
    z_train_bilstm = z_train.reshape(z_train.shape[0], latent_dim, 1)
    z_test_bilstm = z_test.reshape(z_test.shape[0], latent_dim, 1)
    
    # Train BiLSTM
    bilstm.fit(z_train_bilstm, y_train, epochs=50, batch_size=32, verbose=1)
    
    # Evaluate the model
    y_pred = bilstm.predict(z_test_bilstm)
    
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)
    
    print(f"Fold - MAE: {mae}, MSE: {mse}, R2: {r2}")

# Mean evaluation metrics
print(f"Average MAE: {np.mean(mae_scores)}, Average MSE: {np.mean(mse_scores)}, Average R2: {np.mean(r2_scores)}")



# Harror, et. al., 2022 (GAHD-VAE)

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Input, Dense, Lambda, Conv1D, Flatten, Reshape, Bidirectional, LSTM, Attention, Multiply
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.losses import mean_squared_error
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression


input_shape = (6, 1)
latent_dim = 2

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.0)
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

def attention_block(inputs):
    attention = Attention()([inputs, inputs])
    return Multiply()([inputs, attention])

def build_gahd_vae(input_shape, latent_dim):
    inputs = Input(shape=input_shape)

    # Encoder with attention
    x = Conv1D(32, kernel_size=3, activation='relu', padding='same')(inputs)
    x = attention_block(x)
    x = Flatten()(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)

    z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')

    # Decoder with BiLSTM
    latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
    x = Dense(np.prod(input_shape), activation='relu')(latent_inputs)
    x = Reshape(input_shape)(x)
    x = Bidirectional(LSTM(64, return_sequences=True))(x)
    x = attention_block(x)
    outputs = Dense(1, activation='sigmoid')(x)

    decoder = Model(latent_inputs, outputs, name='decoder')

    # VAE model
    outputs = decoder(encoder(inputs)[2])
    vae = Model(inputs, outputs, name='vae')

    reconstruction_loss = mean_squared_error(K.flatten(inputs), K.flatten(outputs))
    reconstruction_loss *= np.prod(input_shape)
    kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    vae_loss = K.mean(reconstruction_loss + kl_loss)

    vae.add_loss(vae_loss)
    vae.compile(optimizer=Adam())

    return encoder, decoder, vae

encoder, decoder, vae = build_gahd_vae(input_shape, latent_dim)

X = df_reduced.values.reshape((df_reduced.shape[0], 6, 1))
y = cleaned_df_normalized_df['Patv'].values

kf = KFold(n_splits=5, shuffle=True, random_state=42)

mae_scores = []
mse_scores = []
r2_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    vae.fit(X_train, X_train, epochs=50, batch_size=32, verbose=1)

    z_train, _, _ = encoder.predict(X_train)
    z_test, _, _ = encoder.predict(X_test)

    def build_bayesian_bilstm(input_shape):
        inputs = Input(shape=input_shape)
        x = Bidirectional(LSTM(64, return_sequences=True))(inputs)
        x = Bidirectional(LSTM(32))(x)
        outputs = Dense(1, activation='linear')(x)
        model = Model(inputs, outputs)
        model.compile(optimizer='adam', loss='mse')
        return model

    bilstm = build_bayesian_bilstm((latent_dim, 1))

    z_train_bilstm = z_train.reshape(z_train.shape[0], latent_dim, 1)
    z_test_bilstm = z_test.reshape(z_test.shape[0], latent_dim, 1)

    bilstm.fit(z_train_bilstm, y_train, epochs=50, batch_size=32, verbose=1)

    y_pred = bilstm.predict(z_test_bilstm)

    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Fold - MAE: {mae}, MSE: {mse}, R2: {r2}")

print(f"Average MAE: {np.mean(mae_scores)}, Average MSE: {np.mean(mse_scores)}, Average R2: {np.mean(r2_scores)}")


# Atashfaraz, et. al., 2022 (V-LSTM-MLP)

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Input, Dense, Lambda, Conv1D, Flatten, Reshape, LSTM, Bidirectional
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.losses import mean_squared_error
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression

input_shape = (6, 1)
latent_dim = 2

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.0)
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

def build_v_lstm_mlp(input_shape, latent_dim):
    # Encoder (VAE)
    inputs = Input(shape=input_shape)
    x = Conv1D(32, kernel_size=3, activation='relu', padding='same')(inputs)
    x = Flatten()(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)

    z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')

    # Decoder (LSTM-MLP)
    latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
    x = Dense(np.prod(input_shape), activation='relu')(latent_inputs)
    x = Reshape(input_shape)(x)
    x = Bidirectional(LSTM(64, return_sequences=False))(x)
    outputs = Dense(1, activation='linear')(x)

    decoder = Model(latent_inputs, outputs, name='decoder')

    # VAE Model
    outputs = decoder(encoder(inputs)[2])
    vae = Model(inputs, outputs, name='v_lstm_mlp')

    # Loss Calculation
    reconstruction_loss = mean_squared_error(K.flatten(inputs), K.flatten(outputs))
    reconstruction_loss *= np.prod(input_shape)
    kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    vae_loss = K.mean(reconstruction_loss + kl_loss)

    vae.add_loss(vae_loss)
    vae.compile(optimizer=Adam())

    return encoder, decoder, vae

# Build model
encoder, decoder, vae = build_v_lstm_mlp(input_shape, latent_dim)

# Prepare data for training
X = df_reduced.values.reshape((df_reduced.shape[0], 6, 1))
y = cleaned_df_normalized_df['Patv'].values

# 5-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

mae_scores = []
mse_scores = []
r2_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train V-LSTM-MLP
    vae.fit(X_train, X_train, epochs=50, batch_size=32, verbose=1)

    # Encode training data and test data
    z_train, _, _ = encoder.predict(X_train)
    z_test, _, _ = encoder.predict(X_test)

    # Reshape latent vectors for decoder
    y_pred = decoder.predict(z_test)

    # Evaluate the model
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Fold - MAE: {mae}, MSE: {mse}, R2: {r2}")

# Mean evaluation metrics
print(f"Average MAE: {np.mean(mae_scores)}, Average MSE: {np.mean(mse_scores)}, Average R2: {np.mean(r2_scores)}")
