In [None]:
pip install -U kaleido

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd

df_final = pd.read_excel('drive/MyDrive/Tugas Akhir/Dataset/Per_30_Mins_Data Tidal Wave Surabaya for Training.xlsx')

In [None]:
new_header = df_final.iloc[0] #grab the first row for the header
df_final = df_final[1:] #take the data less the header row
df_final.columns = new_header #set the header row as the df header

In [None]:
df_final

In [None]:
# Set Index with Time
df_final = df_final.set_index("Time (UTC)")
#main_data = df_final[['pr2(m)', 'prs(m)', 'rad(m)']].values.tolist()
main_data = df_final[['pr2(m)', 'prs(m)']].values.tolist()
main_data_index = df_final.index.tolist()

In [None]:
monthly = '5'

# Obtain the test set first
test_set = df_final.last('{}M'.format(monthly))
#test_data = test_set[['pr2(m)', 'prs(m)', 'rad(m)']].values.tolist()
test_data = test_set[['pr2(m)', 'prs(m)']].values.tolist()
test_data_index = test_set.index.tolist()

In [None]:
# Get every data previously from the final test set
max_date = test_set.index[0]
train_set = df_final[:max_date]

#train_data = train_set[['pr2(m)', 'prs(m)', 'rad(m)']].values.tolist()
train_data = train_set[['pr2(m)', 'prs(m)']].values.tolist()
train_data_index = train_set.index.tolist()

<h2>Normalization and Windowing Technique</h2>

In [None]:
from sklearn.preprocessing import StandardScaler

# Normalize with standard scaler
scaler = StandardScaler()
scaler.fit(main_data)

In [None]:
# Apply Normalization
train_data = scaler.transform(train_data)
test_data = scaler.transform(test_data)

In [None]:
import numpy as np

# split a univariate sequence into samples
def split_sequence(sequence, n_steps, date):
    X, y, date_list = list(), list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        if date is not None:
            seq_x, seq_y, seq_date = sequence[i:end_ix], sequence[end_ix], date[end_ix]
            X.append(seq_x)
            y.append(seq_y)
            date_list.append(seq_date)
        else:
            seq_x, seq_y= sequence[i:end_ix], sequence[end_ix]
            X.append(seq_x)
            y.append(seq_y)
            #date_list.append(seq_date)

    return np.array(X), np.array(y), np.array(date_list)

n_steps=16 #nyoba2, semakin panjang ambil x step nanti modelnya akan terlalu berpatokan pada data yang terlalu ke belakang. kalo semakin dikit data kekurangan konteks
x_sequence_train, y_sequence_train, train_seq_date = split_sequence(train_data, n_steps=n_steps, date=train_data_index)

In [None]:
x_sequence_train.shape

In [None]:
# Save normalized train and test data to Excel
import pandas as pd

# Convert train and test data back to DataFrame for saving
train_data_normalized_df = pd.DataFrame(train_data, columns=['pr2(m)_normalized', 'prs(m)_normalized'], index=train_data_index)
test_data_normalized_df = pd.DataFrame(test_data, columns=['pr2(m)_normalized', 'prs(m)_normalized'], index=test_data_index)

# Save the train and test normalized data as Excel files
train_data_normalized_df.to_excel('normalized_train_data.xlsx', sheet_name='Train Data')
test_data_normalized_df.to_excel('normalized_test_data.xlsx', sheet_name='Test Data')

# For Google Colab: Add functionality to download the Excel files
from google.colab import files
files.download('normalized_train_data.xlsx')
files.download('normalized_test_data.xlsx')

print("Train and test normalized data have been saved to Excel and are ready for download.")



<h2>Define and Train the Model</h2>

In [None]:
import tensorflow as tf

from tensorflow.keras.layers import Bidirectional

def create_model():
    # Input Layers
    return tf.keras.Sequential([
        tf.keras.layers.Input(shape=(n_steps, 2)),
        Bidirectional(tf.keras.layers.LSTM(128, recurrent_regularizer=tf.keras.regularizers.L1(1e-5), return_sequences=True)),
        tf.keras.layers.Dropout(0.1),
        Bidirectional(tf.keras.layers.LSTM(256, recurrent_regularizer=tf.keras.regularizers.L1(1e-5), return_sequences=False)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(128, activation=tf.keras.activations.swish),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(64, activation=tf.keras.activations.swish),
        tf.keras.layers.Dense(2)
    ])


In [None]:
model = create_model()

In [None]:
def scheduler(epoch:int) -> float:
    if epoch < 4:
        return 0.01
    elif epoch < 30:
        return 0.001
    else:
        return 0.0001

scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=scheduler(0)),
    loss=tf.keras.losses.Huber(),
    metrics=['mae', 'mse']
)

In [None]:
model_history = model.fit(x_sequence_train, y_sequence_train, epochs=5, verbose=1)

<h2>Evaluate</h2>

In [None]:
final_loss = model_history.history['loss'][-1]
final_mae = model_history.history['mae'][-1]
final_mse = model_history.history['mse'][-1]

print("Model Loss : {}".format(final_loss))
print("Final MAE : {}".format(final_mae))
print("Final MSE : {}".format(final_mse))

In [None]:
# Prediction
# Append the final 4
evaluation_list = [x for x in train_data[-n_steps:]]
evaluation_list_data_index = [x for x in train_data_index[-n_steps:]]

for idx, x in enumerate(test_data):
    evaluation_list.append(x)
    evaluation_list_data_index.append(test_data_index[idx])

x_sequence_eval, y_sequence_eval, date_seq_eval = split_sequence(evaluation_list, n_steps=n_steps, date=evaluation_list_data_index)

In [None]:
final_loss_pred, final_mae_pred, final_mse_pred = model.evaluate(x_sequence_eval, y_sequence_eval)
predicted_result = model.predict(x_sequence_eval)

In [None]:
print("Model Loss : {}".format(final_loss_pred))
print("Final MAE : {}".format(final_mae_pred))
print("Final MSE : {}".format(final_mse_pred))

In [None]:
# Reverse Transform
# Retransform the predicted result
predict_result = scaler.inverse_transform(predicted_result)
print(len(predicted_result))

# Get the predicted result
first_sensors, second_sensors, third_sensors = [], [], []
for result in predict_result:
    first_sensors.append(result[0])
    second_sensors.append(result[1])
    #third_sensors.append(result[2])

In [None]:
# Separate by index
#index_1 = df_final[['pr2(m)', 'prs(m)', 'rad(m)']].values.tolist()
index_1 = df_final[['pr2(m)', 'prs(m)']].values.tolist()
value_2 = np.array(df_final[['pr2(m)']].values.tolist()).reshape(-1, )
value_3 = np.array(df_final[['prs(m)']].values.tolist()).reshape(-1, )
#value_4 = np.array(df_final[['rad(m)']].values.tolist()).reshape(-1, )
y = [integer for integer in range(len(value_3))]

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

In [None]:
def make_plot(column_name, datetime_main, datetime_pred, value_main, value_pred, save_path:str):
    fig = make_subplots(rows=1, cols=1)

    trace1 = go.Scatter(x=datetime_main, y=value_main, mode='markers', name='Data Real {}'.format(column_name))
    trace2 = go.Scatter(x=datetime_pred, y=value_pred, mode='lines', name='Data Prediksi {}'.format(column_name))

    fig.add_trace(trace1)
    fig.add_trace(trace2)

    fig.update_layout(title='Prediction of {}'.format(column_name),
                    xaxis_title='Tanggal',
                    yaxis_title='Ketinggian air laut')

    fig.show()

    if save_path != '':
        fig.write_image(save_path)

<h2>Plot and document result</h2>

In [None]:
import os

# Saving Name Folder
saving_dir_name = 'drive/MyDrive/Tugas Akhir/Result'

# Initialize Directory
if os.path.isdir(saving_dir_name) is False:
    os.mkdir(saving_dir_name)

# Document the data
final_txt_eval = "train_loss = {}\ntrain_mse = {}\ntrain_mae = {}\ntest_loss = {}\ntest_mse = {}\ntest_mae = {}".format(
    final_loss,
    final_mse,
    final_mae,
    final_loss_pred,
    final_mse_pred,
    final_mae_pred
)

# Save evaluation result
final_saving_txt_path = os.path.join(saving_dir_name, "plain_evaluation.txt")
with open(final_saving_txt_path, 'w') as f:
    f.write(final_txt_eval)

In [None]:
import matplotlib.pyplot as plt

fig_model_loss = plt.gcf()
# plt.plot(model_history.history['mae'][10:])
# plt.plot(model_history.history['mse'][10:])
# plt.plot(model_history.history['loss'][10:])
plt.plot(model_history.history['mae'])
plt.plot(model_history.history['mse'])
plt.plot(model_history.history['loss'])
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['mae', 'mse', 'loss'], loc='upper left')
plt.savefig(os.path.join(saving_dir_name, 'loss.jpg'))
plt.show()

In [None]:
# First Sensor
make_plot(
    column_name = 'pr2(m)',
    datetime_main = main_data_index,
    datetime_pred = test_data_index,
    value_main=value_2,
    value_pred=first_sensors,
    save_path=os.path.join(saving_dir_name, 'pr2.jpg')
)

In [None]:
# Second Sensor
make_plot(
    column_name = 'prs(m)',
    datetime_main = main_data_index,
    datetime_pred = test_data_index,
    value_main=value_3,
    value_pred=second_sensors,
    save_path=os.path.join(saving_dir_name, 'prs.jpg')
)

In [None]:
# # Third Sensor
# make_plot(
#     column_name = 'rad(m)',
#     datetime_main = main_data_index,
#     datetime_pred = test_data_index,
#     value_main=value_4,
#     value_pred=third_sensors,
#     save_path=os.path.join(saving_dir_name, 'rad.jpg')
# )

<h2>Second Round Evaluation</h2>

In [None]:


def separate_eval(month: int, year: int, save_name: str, evaluate_5_months: bool = False):
    if evaluate_5_months:
        #Time frame for evaluation: 5 months.
        start_date = pd.Timestamp(year=year, month=month, day=1) - pd.DateOffset(months=4)
        end_date = pd.Timestamp(year=year, month=month, day=1) + pd.DateOffset(months=1)

        #Filter data for a 5-month time range.
        selected_row_data = df_final.loc[
            (df_final.index >= start_date) & (df_final.index < end_date)
        ][['pr2(m)', 'prs(m)']]
    else:
        #Specific month evaluation
        selected_row_data = df_final.loc[
            (df_final.index.month == month) & (df_final.index.year == year)
        ][['pr2(m)', 'prs(m)']]

    #Add 15 previous data points to provide context.
    minimal_row_data = selected_row_data.index[0]
    append_data = df_final.loc[:minimal_row_data][-15:][['pr2(m)', 'prs(m)']]

    #Combine the main data and the additional data
    merged_dataframe = pd.concat([selected_row_data, append_data]).sort_index()

    #data conversion for evaluation
    evaluation_list_data_index = selected_row_data.index.tolist()
    selected_row_data_final = merged_dataframe.values.tolist()
    selected_row_data_final = scaler.transform(selected_row_data_final)

    #Convert the data into a sequence for prediction.
    x_sequence_eval, y_sequence_eval, date_seq_eval = split_sequence(
        selected_row_data_final, n_steps=n_steps, date=None
    )

    #Prediction and Evaluation
    predicted_value = model.predict(x_sequence_eval)
    predicted_value = scaler.inverse_transform(predicted_value)
    final_loss_pred, final_mae_pred, final_mse_pred = model.evaluate(x_sequence_eval, y_sequence_eval)

    print("Model Loss : {}".format(final_loss_pred))
    print("Final MAE : {}".format(final_mae_pred))
    print("Final MSE : {}".format(final_mse_pred))

    #Save the evaluation results to a file
    final_txt_eval = "test_loss = {}\ntest_mse = {}\ntest_mae = {}".format(
        final_loss_pred, final_mse_pred, final_mae_pred
    )
    final_saving_txt_path = os.path.join(saving_dir_name, "{}_evaluation.txt".format(save_name))
    with open(final_saving_txt_path, 'w') as f:
        f.write(final_txt_eval)

    #make plot for sensor
    first_sensors, second_sensors = [], []
    for result in predicted_value:
        first_sensors.append(result[0])
        second_sensors.append(result[1])

    # Plot sensor pr2
    make_plot(
        column_name='pr2(m)',
        datetime_main=evaluation_list_data_index,
        datetime_pred=evaluation_list_data_index,
        value_main=selected_row_data['pr2(m)'].values.tolist(),
        value_pred=first_sensors,
        save_path=os.path.join(saving_dir_name, '{}_pr2.jpg'.format(save_name))
    )

    # Plot sensor prs
    make_plot(
        column_name='prs(m)',
        datetime_main=evaluation_list_data_index,
        datetime_pred=evaluation_list_data_index,
        value_main=selected_row_data['prs(m)'].values.tolist(),
        value_pred=second_sensors,
        save_path=os.path.join(saving_dir_name, '{}_prs.jpg'.format(save_name))
    )


In [None]:
separate_eval(month=4, year=2024, save_name="evaluasi_5_bulan_pertama", evaluate_5_months=True)

In [None]:
separate_eval(month=1, year=2023, save_name="evaluasi_5_bulan_pertama", evaluate_5_months=True)

In [None]:
separate_eval(month=6, year=2023, save_name="evaluasi_5_bulan_kedua", evaluate_5_months=True)

In [None]:
separate_eval(month=11, year=2023, save_name="evaluasi_5_bulan_ketiga", evaluate_5_months=True)

In [None]:
separate_eval(month=4, year=2024, save_name="evaluasi_5_bulan_keempat", evaluate_5_months=True)

In [None]:
separate_eval(month=9, year=2023, save_name="bulan_21")

In [None]:
# bulan 21-25 9/23 hingga 01/24

# Bulan 22
separate_eval(month=10, year=2023, save_name="bulan_22")

In [None]:
# Bulan 23
separate_eval(month=11, year=2023, save_name="bulan_23")

In [None]:
# Bulan 24
separate_eval(month=12, year=2023, save_name="bulan_24")

In [None]:
# Bulan 25
separate_eval(month=1, year=2024, save_name="bulan_25")

In [None]:
# Ensure df_final is defined (for example, from a CSV file)
try:
    # Replace 'your_data_file.csv' with the actual data file name
    df_final = pd.read_csv('your_data_file.csv')
    print("df_final loaded successfully. Columns available:", df_final.columns)
except FileNotFoundError:
    print("File 'your_data_file.csv' not found. Please upload the file or check the path.")
    # Example fallback definition for demonstration purposes
    data = {'pr2(m)': [0.1, 0.2, 0.3], 'prs(m)': [0.4, 0.5, 0.6]}
    df_final = pd.DataFrame(data)
    print("Sample df_final created:", df_final)


In [None]:
# Assuming `predicted_result` contains the predictions and has 2 columns
# Modify column names if necessary
predictions_df = pd.DataFrame(predicted_result, columns=['Predicted Value 1', 'Predicted Value 2'])

# Save to Excel
predictions_df.to_excel('predicted_results.xlsx', index=False)
print("Predicted results have been saved to 'predicted_results.xlsx'")

In [None]:

from google.colab import files

# Download the saved Excel file
files.download('predicted_results.xlsx')
