In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pykalman import KalmanFilter
from sklearn.preprocessing import StandardScaler
from scipy.signal import detrend
from sklearn.preprocessing import MinMaxScaler
import keras
from sklearn.impute import KNNImputer
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, recall_score, precision_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation, Embedding, Input
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from keras.models import Model
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from scikeras.wrappers import KerasRegressor, KerasClassifier
from sklearn.model_selection import GridSearchCV
from skopt import BayesSearchCV
from talos import Scan
import seaborn as sns
import openpyxl
from sklearn.model_selection import train_test_split
import random as rn
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.dates as mdates
# Setting seed for reproducability
np.random.seed(1234)  
PYTHONHASHSEED = 0
%matplotlib inline


Eksplorasi Awal

In [None]:
# Memuat data dari file CSV
data_path = 'G:/My Drive/Skripsi/Data Kotor.xlsx'

# Memuat data dari file yang diunggah
sheet_name = 'Data + UPT Terdekat'

data = pd.read_excel(data_path, sheet_name=sheet_name)
data['Tanggal'] = pd.to_datetime(data['Tanggal'])
dt = data.loc[(data['Tanggal'] >= '2015-05-01') & (data['Tanggal'] <= '2024-04-30')]
print("Start date is: ", dt['Tanggal'].min())
print("Start date is: ", dt['Tanggal'].max())

In [None]:
dt.describe()

In [None]:
# Mengecek data yang hilang
jumlah_missing_data = dt.isnull().sum()
jumlah_missing_data

In [None]:
len(dt['ss'])

In [None]:
dt.replace(8888, np.nan, inplace=True)
# Mengecek data yang hilang
jumlah_missing_data = dt.isnull().sum()
jumlah_missing_data

In [None]:
dt.describe()

In [None]:
# Mengatur opsi tampilan untuk menampilkan semua baris
pd.set_option('display.max_rows', None)

# Menampilkan semua baris yang memiliki data hilang
data_hilang = dt[dt.isnull().any(axis=1)]
print(data_hilang)

In [None]:
# Menampilkan kolom 'Tanggal' dan 'ss' yang memiliki data hilang
data_hilang_ss = dt[dt['ss'].isnull()][['Tanggal', 'ss']]
print(data_hilang_ss)

In [None]:
# Menampilkan kolom 'Tanggal' dan 'rr' yang memiliki data hilang
data_hilang_rr = dt[dt['rr'].isnull()][['Tanggal', 'rr']]
print(data_hilang_rr)


In [None]:

# Plot the time series for 'ss' and 'rr'
plt.figure(figsize=(14, 6))

# Plot 'ss'
plt.subplot(2, 1, 1)
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
ax.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.plot(dt['Tanggal'], dt['ss'], label='ss')
plt.title('Time Series Plot for ss')
plt.xlabel('Date')
plt.ylabel('ss')
plt.legend()

# Plot 'rr'
plt.subplot(2, 1, 2)
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
ax.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.plot(dt['Tanggal'], dt['rr'], label='rr', color='orange')
plt.title('Time Series Plot for rr')
plt.xlabel('Date')
plt.ylabel('rr')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Menambahkan kolom tahun dan bulan
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Membuat visualisasi boxplot
plt.figure(figsize=(30, 30))

# Mendapatkan daftar unik tahun dan bulan
years = dt['Year'].unique()
months = range(1, 13)

# Menentukan palet warna untuk tahun-tahun tertentu
colors = {year: '#FF6347' if year in [2012, 2013, 2014] else '#4682B4' for year in years}

for i, year in enumerate(sorted(years)):
    plt.subplot(len(years), 1, i + 1)
    year_data = dt[dt['Year'] == year]
    
    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data['Month'].values:
            year_data = year_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    # Membuat boxplot dengan seaborn
    sns.boxplot(x='Month', y='ss', data=year_data, palette=[colors[year]], ax=plt.gca())
    plt.title(f'Durasi Sinar Matahari Tahun {year}')
    plt.suptitle('')
    plt.xlabel('Bulan')
    plt.ylabel('Durasi Sinar Matahari (jam)')

plt.tight_layout()
plt.show()

In [None]:
# Menambahkan kolom tahun dan bulan
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Membuat visualisasi boxplot
plt.figure(figsize=(30, 30))

# Mendapatkan daftar unik tahun dan bulan
years = dt['Year'].unique()
months = range(1, 13)

# Menentukan palet warna untuk tahun-tahun tertentu
colors = {year: '#FF6347' if year in [2012, 2013, 2014] else '#4682B4' for year in years}

for i, year in enumerate(sorted(years)):
    plt.subplot(len(years), 1, i + 1)
    year_data = dt[dt['Year'] == year]
    
    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data['Month'].values:
            year_data = year_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    # Membuat boxplot dengan seaborn
    sns.boxplot(x='Month', y='rr', data=year_data, palette=[colors[year]], ax=plt.gca())
    plt.title(f'Durasi Sinar Matahari Tahun {year}')
    plt.suptitle('')
    plt.xlabel('Bulan')
    plt.ylabel('Durasi Sinar Matahari (jam)')

plt.tight_layout()
plt.show()

In [None]:
# Menambahkan kolom tahun dan bulan
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Membuat visualisasi boxplot
plt.figure(figsize=(30, 30))

# Mendapatkan daftar unik tahun dan bulan
years = dt['Year'].unique()
# Mendapatkan daftar unik bulan dan tahun
months = range(1, 13)
years = dt['Year'].unique()
colors = ['#FF6347' if year in [2012, 2013, 2014] else '#4682B4' for year in years]
for i, month in enumerate(months):
    plt.subplot(3, 4, i + 1)  # Membuat grid 3x4 untuk 12 bulan
    month_data = dt[dt['Month'] == month]
    
    # Menyisipkan tahun yang tidak ada dengan NaN
    for year in years:
        if year not in month_data['Year'].values:
            month_data = month_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    sns.boxplot(x='Year', y='ss', data=month_data, palette=colors, ax=plt.gca())
    plt.title(f'Boxplot Durasi Sinar Matahari Bulan = {month}')
    plt.suptitle('')
    plt.xlabel('Tahun')
    plt.ylabel('Durasi Sinar Matahari (jam)')

plt.tight_layout()
plt.show()

In [None]:
# Menambahkan kolom tahun dan bulan
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Membuat visualisasi boxplot
plt.figure(figsize=(30, 30))

# Mendapatkan daftar unik tahun dan bulan
years = dt['Year'].unique()
# Mendapatkan daftar unik bulan dan tahun
months = range(1, 13)
years = dt['Year'].unique()
colors = ['#FF6347' if year in [2012, 2013, 2014] else '#4682B4' for year in years]
for i, month in enumerate(months):
    plt.subplot(3, 4, i + 1)  # Membuat grid 3x4 untuk 12 bulan
    month_data = dt[dt['Month'] == month]
    
    # Menyisipkan tahun yang tidak ada dengan NaN
    for year in years:
        if year not in month_data['Year'].values:
            month_data = month_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    sns.boxplot(x='Year', y='rr', data=month_data, palette=colors, ax=plt.gca())
    plt.title(f'Boxplot Curah Hujan Bulan = {month}')
    plt.suptitle('')
    plt.xlabel('Tahun')
    plt.ylabel('Curah Hujan (jam)')

plt.tight_layout()
plt.show()

Interpolasi

In [None]:
# Memuat data dari file CSV
data_path = 'G:/My Drive/Skripsi/Imputasi.xlsx'

# Memuat data dari file yang diunggah
sheet_name = 'Fix'

data = pd.read_excel(data_path, sheet_name=sheet_name)
data['Tanggal'] = pd.to_datetime(data['Tanggal'])
# Memfilter data berdasarkan rentang tanggal
dataf = data.loc[(data['Tanggal'] >= '2015-05-01') & (data['Tanggal'] <= '2024-04-30')]
dataf.head()

In [None]:
dataf.describe()

In [None]:
# Mengidentifikasi data hilang di kolom 'rr'
dataf.replace(8888, np.nan, inplace=True)
missing_rr = dataf['rr'].isnull()
# Menampilkan periode data hilang berurutan


In [None]:
dataf.describe()

In [None]:
jumlah_missing_data = dataf.isnull().sum()
jumlah_missing_data

In [None]:
# Rentang tanggal untuk pengecualian interpolasi
start_date = '2020-09-15'
end_date = '2020-09-27'
dataf['Tanggal'] = pd.to_datetime(dataf['Tanggal'])

# Mask untuk data di luar rentang yang telah ditentukan
mask = ~dataf['Tanggal'].between(start_date, end_date)

# Interpolasi linear untuk titik data hilang di luar rentang yang telah ditentukan
dataf.loc[mask, 'rr'] = dataf.loc[mask, 'rr'].interpolate(method='linear')

In [None]:

jumlah_missing_data = dataf.isnull().sum()
jumlah_missing_data

In [None]:
dataf.describe()

In [None]:
# Simpan DataFrame ke file Excel
excel_path = 'G:\My Drive\Skripsi\Hasil Interpolasi.xlsx'  # Nama file dan lokasi disimpan
dataf.to_excel(excel_path, sheet_name='sheet')

print("Data telah disimpan ke Excel di lokasi:", excel_path)

LSTM - Imputasi SS

In [None]:
# Memuat data dari file CSV
data_path = 'G:/My Drive/Skripsi/Hasil Interpolasi.xlsx'

data1 = pd.read_excel(data_path)

data1['Tanggal'] = pd.to_datetime(data1['Tanggal'])

30

In [None]:
def to_sequences(x, y, seq_size=1):
    x_values = []
    y_values = []
    
    for i in range(len(x) - seq_size):
        x_values.append(x.iloc[i:(i + seq_size)].values)
        y_values.append(y.iloc[i + seq_size])
    
    return np.array(x_values), np.array(y_values)

# Menentukan panjang sekuens (seq_size)
seq_size = 30

In [None]:
# Memisahkan data menjadi train dan test
train_ss = data1.loc[(data1['Tanggal'] >= '2015-05-01') & (data1['Tanggal'] <= '2020-09-14')]
test_ss = data1.loc[(data1['Tanggal'] >= '2020-09-28') & (data1['Tanggal'] <= '2024-04-30')]

# Normalisasi data menggunakan MinMaxScaler
scaler_ss = MinMaxScaler()
train_ss['ss'] = scaler_ss.fit_transform(train_ss[['ss']])
test_ss['ss'] = scaler_ss.transform(test_ss[['ss']])

# Konversi menjadi sekuens untuk LSTM
trainX_ss, trainY_ss = to_sequences(train_ss[['ss']], train_ss['ss'], seq_size)
testX_ss, testY_ss = to_sequences(test_ss[['ss']], test_ss['ss'], seq_size)

# Reshape input menjadi [samples, time steps, features]
trainX_ss = np.reshape(trainX_ss, (trainX_ss.shape[0], seq_size, 1))
testX_ss = np.reshape(testX_ss, (testX_ss.shape[0], seq_size, 1))

In [None]:
from tensorflow.keras.layers import LSTM, Dropout, RepeatVector, TimeDistributed, Dense, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.activations import swish
import random
# Set seeds
seed_value = 1
np.random.seed(seed_value)
random.seed(seed_value)
tf.random.set_seed(seed_value)

optimizer = Adam(learning_rate=0.0001)

model = Sequential()
# Lapisan LSTM pertama dengan return_sequences=True
model.add(LSTM(50, activation='tanh', input_shape=(trainX_ss.shape[1], trainX_ss.shape[2]), return_sequences=True))

# # Tambahkan lapisan LSTM kedua juga dengan return_sequences=True
# model.add(LSTM(25, activation='tanh', kernel_regularizer=l2(0.02), return_sequences=True))
# model.add(Dropout(rate=0.2))

# Lapisan output
model.add(Dense(trainX_ss.shape[2]))

model.compile(optimizer=optimizer, loss='mse')

model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_ss = model.fit(trainX_ss, trainY_ss, epochs=50, batch_size=32, validation_data=(testX_ss, testY_ss), verbose=1, callbacks=[early_stopping])

# Plot for loss
import matplotlib.pyplot as plt

plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_ss.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_ss.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Show the plot
plt.show()


trainPredict_ss_1 = model.predict(trainX_ss)
testPredict_ss_1 = model.predict(testX_ss)
testY_reshaped_ss_1 = testY_ss.reshape(-1, testY_ss.shape[-1])
testPredict_reshaped_ss_1 = testPredict_ss_1.reshape(-1, testPredict_ss_1.shape[-1])

def calculate_rmse(y_true, y_pred):
    """
    Menghitung Root Mean Square Error (RMSE).
    """
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_ss_1 = np.mean(testPredict_ss_1, axis=1).reshape(-1)
# Memastikan bahwa testY adalah 1D array untuk konsistensi
testY_reshaped_ss_1 = testY_ss.flatten()  # Ini mungkin tidak perlu, tetapi disertakan untuk kejelasan
# Menghitung RMSE
rmse = sqrt(mean_squared_error(testY_reshaped_ss_1, testPredict_reshaped_ss_1))
print("RMSE:", rmse)


In [None]:
from tensorflow.keras.layers import LSTM, Dropout, RepeatVector, TimeDistributed, Dense, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.activations import swish
import random
# Set seeds
seed_value = 1
np.random.seed(seed_value)
random.seed(seed_value)
tf.random.set_seed(seed_value)

optimizer = Adam(learning_rate=0.0001)

model = Sequential()
# Lapisan LSTM pertama dengan return_sequences=True
model.add(LSTM(50, activation='tanh', input_shape=(trainX_ss.shape[1], trainX_ss.shape[2]), return_sequences=True))

# # Tambahkan lapisan LSTM kedua juga dengan return_sequences=True
# model.add(LSTM(25, activation='tanh', kernel_regularizer=l2(0.02), return_sequences=True))
model.add(Dropout(rate=0.1))

# Lapisan output
model.add(Dense(trainX_ss.shape[2]))

model.compile(optimizer=optimizer, loss='mse')

model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_ss = model.fit(trainX_ss, trainY_ss, epochs=50, batch_size=32, validation_data=(testX_ss, testY_ss), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_ss.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_ss.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Show the plot
plt.show()

trainPredict_ss_1 = model.predict(trainX_ss)
testPredict_ss_1 = model.predict(testX_ss)
testY_reshaped_ss_1 = testY_ss.reshape(-1, testY_ss.shape[-1])
testPredict_reshaped_ss_1 = testPredict_ss_1.reshape(-1, testPredict_ss_1.shape[-1])

def calculate_rmse(y_true, y_pred):
    """
    Menghitung Root Mean Square Error (RMSE).
    """
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_ss_1 = np.mean(testPredict_ss_1, axis=1).reshape(-1)
# Memastikan bahwa testY adalah 1D array untuk konsistensi
testY_reshaped_ss_1 = testY_ss.flatten()  # Ini mungkin tidak perlu, tetapi disertakan untuk kejelasan
# Menghitung RMSE
rmse = sqrt(mean_squared_error(testY_reshaped_ss_1, testPredict_reshaped_ss_1))
print("RMSE:", rmse)


90

In [None]:
def to_sequences(x, y, seq_size=1):
    x_values = []
    y_values = []
    
    for i in range(len(x) - seq_size):
        x_values.append(x.iloc[i:(i + seq_size)].values)
        y_values.append(y.iloc[i + seq_size])
    
    return np.array(x_values), np.array(y_values)

# Menentukan panjang sekuens (seq_size)
seq_size = 90

# Memisahkan data menjadi train dan test
train_ss = data1.loc[(data1['Tanggal'] >= '2015-05-01') & (data1['Tanggal'] <= '2020-09-14')]
test_ss = data1.loc[(data1['Tanggal'] >= '2020-09-28') & (data1['Tanggal'] <= '2024-04-30')]

# Normalisasi data menggunakan MinMaxScaler
scaler_ss = MinMaxScaler()
train_ss['ss'] = scaler_ss.fit_transform(train_ss[['ss']])
test_ss['ss'] = scaler_ss.transform(test_ss[['ss']])

# Konversi menjadi sekuens untuk LSTM
trainX_ss, trainY_ss = to_sequences(train_ss[['ss']], train_ss['ss'], seq_size)
testX_ss, testY_ss = to_sequences(test_ss[['ss']], test_ss['ss'], seq_size)

# Reshape input menjadi [samples, time steps, features]
trainX_ss = np.reshape(trainX_ss, (trainX_ss.shape[0], seq_size, 1))
testX_ss = np.reshape(testX_ss, (testX_ss.shape[0], seq_size, 1))

In [None]:
from tensorflow.keras.layers import LSTM, Dropout, RepeatVector, TimeDistributed, Dense, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.activations import swish
import random
# Set seeds
seed_value = 1
np.random.seed(seed_value)
random.seed(seed_value)
tf.random.set_seed(seed_value)

optimizer = Adam(learning_rate=0.0001)

model = Sequential()
# Lapisan LSTM pertama dengan return_sequences=True
model.add(LSTM(50, activation='tanh', input_shape=(trainX_ss.shape[1], trainX_ss.shape[2]), return_sequences=True))

# # Tambahkan lapisan LSTM kedua juga dengan return_sequences=True
# model.add(LSTM(25, activation='tanh', kernel_regularizer=l2(0.02), return_sequences=True))
# model.add(Dropout(rate=0.2))

# Lapisan output
model.add(Dense(trainX_ss.shape[2]))

model.compile(optimizer=optimizer, loss='mse')

model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_ss = model.fit(trainX_ss, trainY_ss, epochs=50, batch_size=32, validation_data=(testX_ss, testY_ss), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_ss.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_ss.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Determine the number of epochs from history
num_epochs = len(history_ss.history['loss'])

# Customize x-axis ticks to show multiples of 5 up to the number of epochs
plt.xticks(np.arange(0, num_epochs + 1, step=5))

# Show the plot
plt.show()

trainPredict_ss_1 = model.predict(trainX_ss)
testPredict_ss_1 = model.predict(testX_ss)
testY_reshaped_ss_1 = testY_ss.reshape(-1, testY_ss.shape[-1])
testPredict_reshaped_ss_1 = testPredict_ss_1.reshape(-1, testPredict_ss_1.shape[-1])

def calculate_rmse(y_true, y_pred):
    """
    Menghitung Root Mean Square Error (RMSE).
    """
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_ss_1 = np.mean(testPredict_ss_1, axis=1).reshape(-1)
# Memastikan bahwa testY adalah 1D array untuk konsistensi
testY_reshaped_ss_1 = testY_ss.flatten()  # Ini mungkin tidak perlu, tetapi disertakan untuk kejelasan
# Menghitung RMSE
rmse = sqrt(mean_squared_error(testY_reshaped_ss_1, testPredict_reshaped_ss_1))
print("RMSE:", rmse)


In [None]:
from tensorflow.keras.layers import LSTM, Dropout, RepeatVector, TimeDistributed, Dense, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.activations import swish
import random
# Set seeds
seed_value = 1
np.random.seed(seed_value)
random.seed(seed_value)
tf.random.set_seed(seed_value)

optimizer = Adam(learning_rate=0.0001)

model = Sequential()
# Lapisan LSTM pertama dengan return_sequences=True
model.add(LSTM(50, activation='tanh', input_shape=(trainX_ss.shape[1], trainX_ss.shape[2]), return_sequences=True))

# # Tambahkan lapisan LSTM kedua juga dengan return_sequences=True
# model.add(LSTM(25, activation='tanh', kernel_regularizer=l2(0.02), return_sequences=True))
model.add(Dropout(rate=0.1))

# Lapisan output
model.add(Dense(trainX_ss.shape[2]))

model.compile(optimizer=optimizer, loss='mse')

model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_ss = model.fit(trainX_ss, trainY_ss, epochs=50, batch_size=32, validation_data=(testX_ss, testY_ss), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_ss.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_ss.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Determine the number of epochs from history
num_epochs = len(history_ss.history['loss'])

# Customize x-axis ticks to show multiples of 5 up to the number of epochs
plt.xticks(np.arange(0, num_epochs + 1, step=5))

# Show the plot
plt.show()

trainPredict_ss_1 = model.predict(trainX_ss)
testPredict_ss_1 = model.predict(testX_ss)
testY_reshaped_ss_1 = testY_ss.reshape(-1, testY_ss.shape[-1])
testPredict_reshaped_ss_1 = testPredict_ss_1.reshape(-1, testPredict_ss_1.shape[-1])

def calculate_rmse(y_true, y_pred):
    """
    Menghitung Root Mean Square Error (RMSE).
    """
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_ss_1 = np.mean(testPredict_ss_1, axis=1).reshape(-1)
# Memastikan bahwa testY adalah 1D array untuk konsistensi
testY_reshaped_ss_1 = testY_ss.flatten()  # Ini mungkin tidak perlu, tetapi disertakan untuk kejelasan
# Menghitung RMSE
rmse = sqrt(mean_squared_error(testY_reshaped_ss_1, testPredict_reshaped_ss_1))
print("RMSE:", rmse)


180

In [None]:
def to_sequences(x, y, seq_size=1):
    x_values = []
    y_values = []
    
    for i in range(len(x) - seq_size):
        x_values.append(x.iloc[i:(i + seq_size)].values)
        y_values.append(y.iloc[i + seq_size])
    
    return np.array(x_values), np.array(y_values)

# Menentukan panjang sekuens (seq_size)
seq_size = 180

# Memisahkan data menjadi train dan test
train_ss = data1.loc[(data1['Tanggal'] >= '2015-05-01') & (data1['Tanggal'] <= '2020-09-14')]
test_ss = data1.loc[(data1['Tanggal'] >= '2020-09-28') & (data1['Tanggal'] <= '2024-04-30')]

# Normalisasi data menggunakan MinMaxScaler
scaler_ss = MinMaxScaler()
train_ss['ss'] = scaler_ss.fit_transform(train_ss[['ss']])
test_ss['ss'] = scaler_ss.transform(test_ss[['ss']])

# Konversi menjadi sekuens untuk LSTM
trainX_ss, trainY_ss = to_sequences(train_ss[['ss']], train_ss['ss'], seq_size)
testX_ss, testY_ss = to_sequences(test_ss[['ss']], test_ss['ss'], seq_size)

# Reshape input menjadi [samples, time steps, features]
trainX_ss = np.reshape(trainX_ss, (trainX_ss.shape[0], seq_size, 1))
testX_ss = np.reshape(testX_ss, (testX_ss.shape[0], seq_size, 1))

In [None]:
from tensorflow.keras.layers import LSTM, Dropout, RepeatVector, TimeDistributed, Dense, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.activations import swish
import random
# Set seeds
seed_value = 1
np.random.seed(seed_value)
random.seed(seed_value)
tf.random.set_seed(seed_value)

optimizer = Adam(learning_rate=0.0001)

model = Sequential()
# Lapisan LSTM pertama dengan return_sequences=True
model.add(LSTM(50, activation='tanh', input_shape=(trainX_ss.shape[1], trainX_ss.shape[2]), return_sequences=True))

# # Tambahkan lapisan LSTM kedua juga dengan return_sequences=True
# model.add(LSTM(25, activation='tanh', kernel_regularizer=l2(0.02), return_sequences=True))
# model.add(Dropout(rate=0.2))

# Lapisan output
model.add(Dense(trainX_ss.shape[2]))

model.compile(optimizer=optimizer, loss='mse')

model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_ss = model.fit(trainX_ss, trainY_ss, epochs=50, batch_size=32, validation_data=(testX_ss, testY_ss), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_ss.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_ss.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Show the plot
plt.show()

trainPredict_ss_1 = model.predict(trainX_ss)
testPredict_ss_1 = model.predict(testX_ss)
testY_reshaped_ss_1 = testY_ss.reshape(-1, testY_ss.shape[-1])
testPredict_reshaped_ss_1 = testPredict_ss_1.reshape(-1, testPredict_ss_1.shape[-1])

def calculate_rmse(y_true, y_pred):
    """
    Menghitung Root Mean Square Error (RMSE).
    """
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_ss_1 = np.mean(testPredict_ss_1, axis=1).reshape(-1)
# Memastikan bahwa testY adalah 1D array untuk konsistensi
testY_reshaped_ss_1 = testY_ss.flatten()  # Ini mungkin tidak perlu, tetapi disertakan untuk kejelasan
# Menghitung RMSE
rmse = sqrt(mean_squared_error(testY_reshaped_ss_1, testPredict_reshaped_ss_1))
print("RMSE:", rmse)


In [None]:
from tensorflow.keras.layers import LSTM, Dropout, RepeatVector, TimeDistributed, Dense, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.activations import swish
import random
# Set seeds
seed_value = 1
np.random.seed(seed_value)
random.seed(seed_value)
tf.random.set_seed(seed_value)

optimizer = Adam(learning_rate=0.0001)

model = Sequential()
# Lapisan LSTM pertama dengan return_sequences=True
model.add(LSTM(50, activation='tanh', input_shape=(trainX_ss.shape[1], trainX_ss.shape[2]), return_sequences=True))

# # Tambahkan lapisan LSTM kedua juga dengan return_sequences=True
# model.add(LSTM(25, activation='tanh', kernel_regularizer=l2(0.02), return_sequences=True))
model.add(Dropout(rate=0.1))

# Lapisan output
model.add(Dense(trainX_ss.shape[2]))

model.compile(optimizer=optimizer, loss='mse')

model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_ss = model.fit(trainX_ss, trainY_ss, epochs=50, batch_size=32, validation_data=(testX_ss, testY_ss), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_ss.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_ss.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Show the plot
plt.show()

trainPredict_ss_1 = model.predict(trainX_ss)
testPredict_ss_1 = model.predict(testX_ss)
testY_reshaped_ss_1 = testY_ss.reshape(-1, testY_ss.shape[-1])
testPredict_reshaped_ss_1 = testPredict_ss_1.reshape(-1, testPredict_ss_1.shape[-1])

def calculate_rmse(y_true, y_pred):
    """
    Menghitung Root Mean Square Error (RMSE).
    """
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_ss_1 = np.mean(testPredict_ss_1, axis=1).reshape(-1)
# Memastikan bahwa testY adalah 1D array untuk konsistensi
testY_reshaped_ss_1 = testY_ss.flatten()  # Ini mungkin tidak perlu, tetapi disertakan untuk kejelasan
# Menghitung RMSE
rmse = sqrt(mean_squared_error(testY_reshaped_ss_1, testPredict_reshaped_ss_1))
print("RMSE:", rmse)


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_ss.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_ss.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Show the plot
plt.show()


In [None]:
model.save('model_lstm_ss_best.h5')

Model terbaik untuk imputasi


In [None]:
from tensorflow.keras.models import load_model
model_ss = load_model('model_lstm_ss_best.h5')

In [None]:
from tensorflow.keras.models import load_model
model_ss = load_model('model_lstm_ss_best.h5')


missing_data = data1.loc[(data1['Tanggal'] >= '2020-09-15') & (data1['Tanggal'] <= '2020-09-27')]

# Ambil data sebelum dan sesudah periode yang hilang
data_before_missing = data1.loc[data1['Tanggal'] < '2020-09-15']
data_after_missing = data1.loc[data1['Tanggal'] > '2020-09-27']

# Gabungkan data sebelum dan sesudah periode yang hilang
data_combined = pd.concat([data_before_missing, data_after_missing])

seq_size = 90
X = []
y = []

# Loop untuk menyiapkan data training dengan window
for i in range(len(data_combined) - seq_size):
    X.append(data_combined['ss'].values[i:i + seq_size])
    y.append(data_combined['ss'].values[i + seq_size])

X = np.array(X)
y = np.array(y)

# Reshape input untuk model LSTM
X = X.reshape((X.shape[0], X.shape[1], 1))

# Prediksi data yang hilang secara berkelanjutan
predicted_missing_data = []

# Gunakan data terakhir sebelum periode yang hilang sebagai seed untuk prediksi
future_sequence = X[-1].reshape(1, seq_size, 1)

# Jumlah titik yang ingin diprediksi
n_points = len(missing_data)

for _ in range(n_points):
    future_pred = model_ss.predict(future_sequence)
    predicted_missing_data.append(future_pred[0, -1, 0])
    
    # Update future_sequence untuk iterasi berikutnya
    future_sequence = np.append(future_sequence[:, 1:, :], future_pred[:, -1:, :], axis=1)

# Transform hasil prediksi ke skala asli
predicted_missing_data = scaler_ss.inverse_transform(np.array(predicted_missing_data).reshape(-1, 1))

# Gabungkan hasil prediksi dengan data asli
missing_data['ss'] = predicted_missing_data

data1_imputed = pd.concat([data_before_missing, missing_data, data_after_missing])
data1_imputed = data1_imputed.sort_values(by='Tanggal').reset_index(drop=True)
imputed_data_only = data1_imputed.loc[(data1_imputed['Tanggal'] >= '2020-09-15') & (data1_imputed['Tanggal'] <= '2020-09-27')]
print("Data yang diimputasi pada rentang tanggal 2020-09-15 hingga 2020-09-27:")
print(imputed_data_only)



In [None]:
# Simpan DataFrame ke file Excel
excel_path = 'G:\My Drive\Skripsi\imputasi_ss.xlsx'  # Nama file dan lokasi disimpan
data.to_excel(excel_path, sheet_name='Durasi Sinar Matahari')

print("Data telah disimpan ke Excel di lokasi:", excel_path)

LSTM - Imputasi RR

In [None]:
# Memuat data dari file CSV
data_path = 'G:/My Drive/Skripsi/Hasil Interpolasi.xlsx'

data1 = pd.read_excel(data_path)

data1['Tanggal'] = pd.to_datetime(data1['Tanggal'])

30

In [None]:
def to_sequences(x, y, seq_size=1):
    x_values = []
    y_values = []
    
    for i in range(len(x) - seq_size):
        x_values.append(x.iloc[i:(i + seq_size)].values)
        y_values.append(y.iloc[i + seq_size])
    
    return np.array(x_values), np.array(y_values)

# Menentukan panjang sekuens (seq_size)
seq_size = 30

# Memisahkan data menjadi train dan test
train_rr = data1.loc[(data1['Tanggal'] >= '2015-05-01') & (data1['Tanggal'] <= '2020-09-14')]
test_rr = data1.loc[(data1['Tanggal'] >= '2020-09-28') & (data1['Tanggal'] <= '2024-04-30')]

# Normalisasi data menggunakan MinMaxScaler
scaler_rr = MinMaxScaler()
train_rr['rr'] = scaler_rr.fit_transform(train_rr[['rr']])
test_rr['rr'] = scaler_rr.transform(test_rr[['rr']])

# Konversi menjadi sekuens untuk LSTM
trainX_rr, trainY_rr = to_sequences(train_rr[['rr']], train_rr['rr'], seq_size)
testX_rr, testY_rr = to_sequences(test_rr[['rr']], test_rr['rr'], seq_size)

# Reshape input menjadi [samples, time steps, features]
trainX_rr = np.reshape(trainX_rr, (trainX_rr.shape[0], seq_size, 1))
testX_rr = np.reshape(testX_rr, (testX_rr.shape[0], seq_size, 1))

In [None]:
# Set optimizer
optimizer = Adam(learning_rate=0.0001)

# Build model LSTM dengan Bidirectional layer
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(trainX_rr.shape[1], trainX_rr.shape[2]), return_sequences=True))
# model.add(Dropout(rate=0.3))
model.add(Dense(1))

# Compile model
model.compile(optimizer=optimizer, loss='mse')

# Print summary model
model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_rr = model.fit(trainX_rr, trainY_rr, epochs=50, batch_size=32, validation_data=(testX_rr, testY_rr), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_rr.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_rr.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Determine the number of epochs from history
num_epochs = len(history_rr.history['loss'])

# Customize x-axis ticks to show multiples of 5 up to the number of epochs
plt.xticks(np.arange(0, num_epochs + 1, step=5))

# Show the plot
plt.show()

trainPredict_rr = model.predict(trainX_rr)
testPredict_rr = model.predict(testX_rr)
testY_reshaped_rr = testY_rr.reshape(-1, testY_rr.shape[-1])
testPredict_reshaped_rr = testPredict_rr.reshape(-1, testPredict_rr.shape[-1])

def calculate_rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_rr = np.mean(testPredict_rr, axis=1).reshape(-1)
testY_reshaped_rr = testY_rr.flatten()
rmse = sqrt(mean_squared_error(testY_reshaped_rr, testPredict_reshaped_rr))
print("RMSE:", rmse)

In [None]:
# Set optimizer
optimizer = Adam(learning_rate=0.0001)

# Build model LSTM dengan Bidirectional layer
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(trainX_rr.shape[1], trainX_rr.shape[2]), return_sequences=True))
model.add(Dropout(rate=0.1))
model.add(Dense(1))

# Compile model
model.compile(optimizer=optimizer, loss='mse')

# Print summary model
model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_rr = model.fit(trainX_rr, trainY_rr, epochs=50, batch_size=32, validation_data=(testX_rr, testY_rr), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_rr.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_rr.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Determine the number of epochs from history
num_epochs = len(history_rr.history['loss'])

# Customize x-axis ticks to show multiples of 5 up to the number of epochs
plt.xticks(np.arange(0, num_epochs + 1, step=5))

# Show the plot
plt.show()

trainPredict_rr = model.predict(trainX_rr)
testPredict_rr = model.predict(testX_rr)
testY_reshaped_rr = testY_rr.reshape(-1, testY_rr.shape[-1])
testPredict_reshaped_rr = testPredict_rr.reshape(-1, testPredict_rr.shape[-1])

def calculate_rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_rr = np.mean(testPredict_rr, axis=1).reshape(-1)
testY_reshaped_rr = testY_rr.flatten()
rmse = sqrt(mean_squared_error(testY_reshaped_rr, testPredict_reshaped_rr))
print("RMSE:", rmse)

In [None]:
# Set optimizer
optimizer = Adam(learning_rate=0.0001)

# Build model LSTM dengan Bidirectional layer
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(trainX_rr.shape[1], trainX_rr.shape[2]), kernel_regularizer=l2(0.01), return_sequences=True))
# model.add(Dropout(rate=0.2))
model.add(Dense(1, kernel_regularizer=l2(0.01)))

# Compile model
model.compile(optimizer=optimizer, loss='mse')

# Print summary model
model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_rr = model.fit(trainX_rr, trainY_rr, epochs=50, batch_size=32, validation_data=(testX_rr, testY_rr), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_rr.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_rr.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Determine the number of epochs from history
num_epochs = len(history_rr.history['loss'])

# Customize x-axis ticks to show multiples of 5 up to the number of epochs
plt.xticks(np.arange(0, num_epochs + 1, step=5))

# Show the plot
plt.show()

trainPredict_rr = model.predict(trainX_rr)
testPredict_rr = model.predict(testX_rr)
testY_reshaped_rr = testY_rr.reshape(-1, testY_rr.shape[-1])
testPredict_reshaped_rr = testPredict_rr.reshape(-1, testPredict_rr.shape[-1])

def calculate_rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_rr = np.mean(testPredict_rr, axis=1).reshape(-1)
testY_reshaped_rr = testY_rr.flatten()
rmse = sqrt(mean_squared_error(testY_reshaped_rr, testPredict_reshaped_rr))
print("RMSE:", rmse)



90

In [None]:
def to_sequences(x, y, seq_size=1):
    x_values = []
    y_values = []
    
    for i in range(len(x) - seq_size):
        x_values.append(x.iloc[i:(i + seq_size)].values)
        y_values.append(y.iloc[i + seq_size])
    
    return np.array(x_values), np.array(y_values)

# Menentukan panjang sekuens (seq_size)
seq_size = 90

# Memisahkan data menjadi train dan test
train_rr = data1.loc[(data1['Tanggal'] >= '2015-05-01') & (data1['Tanggal'] <= '2020-09-14')]
test_rr = data1.loc[(data1['Tanggal'] >= '2020-09-28') & (data1['Tanggal'] <= '2024-04-30')]

# Normalisasi data menggunakan MinMaxScaler
scaler_rr = MinMaxScaler()
train_rr['rr'] = scaler_rr.fit_transform(train_rr[['rr']])
test_rr['rr'] = scaler_rr.transform(test_rr[['rr']])

# Konversi menjadi sekuens untuk LSTM
trainX_rr, trainY_rr = to_sequences(train_rr[['rr']], train_rr['rr'], seq_size)
testX_rr, testY_rr = to_sequences(test_rr[['rr']], test_rr['rr'], seq_size)

# Reshape input menjadi [samples, time steps, features]
trainX_rr = np.reshape(trainX_rr, (trainX_rr.shape[0], seq_size, 1))
testX_rr = np.reshape(testX_rr, (testX_rr.shape[0], seq_size, 1))

In [None]:
# Set optimizer
optimizer = Adam(learning_rate=0.0001)

# Build model LSTM dengan Bidirectional layer
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(trainX_rr.shape[1], trainX_rr.shape[2]), return_sequences=True))
# model.add(Dropout(rate=0.2))
model.add(Dense(1))

# Compile model
model.compile(optimizer=optimizer, loss='mse')

# Print summary model
model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_rr = model.fit(trainX_rr, trainY_rr, epochs=50, batch_size=32, validation_data=(testX_rr, testY_rr), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_rr.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_rr.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Determine the number of epochs from history
num_epochs = len(history_rr.history['loss'])

# Customize x-axis ticks to show multiples of 5 up to the number of epochs
plt.xticks(np.arange(0, num_epochs + 1, step=5))

# Show the plot
plt.show()

trainPredict_rr = model.predict(trainX_rr)
testPredict_rr = model.predict(testX_rr)
testY_reshaped_rr = testY_rr.reshape(-1, testY_rr.shape[-1])
testPredict_reshaped_rr = testPredict_rr.reshape(-1, testPredict_rr.shape[-1])

def calculate_rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_rr = np.mean(testPredict_rr, axis=1).reshape(-1)
testY_reshaped_rr = testY_rr.flatten()
rmse = sqrt(mean_squared_error(testY_reshaped_rr, testPredict_reshaped_rr))
print("RMSE:", rmse)

In [None]:
# Set optimizer
optimizer = Adam(learning_rate=0.0001)

# Build model LSTM dengan Bidirectional layer
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(trainX_rr.shape[1], trainX_rr.shape[2]), return_sequences=True))
model.add(Dropout(rate=0.2))
model.add(Dense(1))

# Compile model
model.compile(optimizer=optimizer, loss='mse')

# Print summary model
model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_rr = model.fit(trainX_rr, trainY_rr, epochs=50, batch_size=32, validation_data=(testX_rr, testY_rr), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_rr.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_rr.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Determine the number of epochs from history
num_epochs = len(history_rr.history['loss'])

# Customize x-axis ticks to show multiples of 5 up to the number of epochs
plt.xticks(np.arange(0, num_epochs + 1, step=5))

# Show the plot
plt.show()

trainPredict_rr = model.predict(trainX_rr)
testPredict_rr = model.predict(testX_rr)
testY_reshaped_rr = testY_rr.reshape(-1, testY_rr.shape[-1])
testPredict_reshaped_rr = testPredict_rr.reshape(-1, testPredict_rr.shape[-1])

def calculate_rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_rr = np.mean(testPredict_rr, axis=1).reshape(-1)
testY_reshaped_rr = testY_rr.flatten()
rmse = sqrt(mean_squared_error(testY_reshaped_rr, testPredict_reshaped_rr))
print("RMSE:", rmse)

In [None]:
# Set optimizer
optimizer = Adam(learning_rate=0.0001)

# Build model LSTM dengan Bidirectional layer
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(trainX_rr.shape[1], trainX_rr.shape[2]), kernel_regularizer=l2(0.01), return_sequences=True))
# model.add(Dropout(rate=0.2))
model.add(Dense(1, kernel_regularizer=l2(0.01)))

# Compile model
model.compile(optimizer=optimizer, loss='mse')

# Print summary model
model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_rr = model.fit(trainX_rr, trainY_rr, epochs=50, batch_size=32, validation_data=(testX_rr, testY_rr), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_rr.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_rr.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Determine the number of epochs from history
num_epochs = len(history_rr.history['loss'])

# Customize x-axis ticks to show multiples of 5 up to the number of epochs
plt.xticks(np.arange(0, num_epochs + 1, step=5))

# Show the plot
plt.show()

trainPredict_rr = model.predict(trainX_rr)
testPredict_rr = model.predict(testX_rr)
testY_reshaped_rr = testY_rr.reshape(-1, testY_rr.shape[-1])
testPredict_reshaped_rr = testPredict_rr.reshape(-1, testPredict_rr.shape[-1])

def calculate_rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_rr = np.mean(testPredict_rr, axis=1).reshape(-1)
testY_reshaped_rr = testY_rr.flatten()
rmse = sqrt(mean_squared_error(testY_reshaped_rr, testPredict_reshaped_rr))
print("RMSE:", rmse)



180

In [None]:
def to_sequences(x, y, seq_size=1):
    x_values = []
    y_values = []
    
    for i in range(len(x) - seq_size):
        x_values.append(x.iloc[i:(i + seq_size)].values)
        y_values.append(y.iloc[i + seq_size])
    
    return np.array(x_values), np.array(y_values)

# Menentukan panjang sekuens (seq_size)
seq_size = 180

# Memisahkan data menjadi train dan test
train_rr = data1.loc[(data1['Tanggal'] >= '2015-05-01') & (data1['Tanggal'] <= '2020-09-14')]
test_rr = data1.loc[(data1['Tanggal'] >= '2020-09-28') & (data1['Tanggal'] <= '2024-04-30')]

# Normalisasi data menggunakan MinMaxScaler
scaler_rr = MinMaxScaler()
train_rr['rr'] = scaler_rr.fit_transform(train_rr[['rr']])
test_rr['rr'] = scaler_rr.transform(test_rr[['rr']])

# Konversi menjadi sekuens untuk LSTM
trainX_rr, trainY_rr = to_sequences(train_rr[['rr']], train_rr['rr'], seq_size)
testX_rr, testY_rr = to_sequences(test_rr[['rr']], test_rr['rr'], seq_size)

# Reshape input menjadi [samples, time steps, features]
trainX_rr = np.reshape(trainX_rr, (trainX_rr.shape[0], seq_size, 1))
testX_rr = np.reshape(testX_rr, (testX_rr.shape[0], seq_size, 1))

In [None]:
# Set optimizer
optimizer = Adam(learning_rate=0.0001)

# Build model LSTM dengan Bidirectional layer
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(trainX_rr.shape[1], trainX_rr.shape[2]), return_sequences=True))
# model.add(Dropout(rate=0.2))
model.add(Dense(1))

# Compile model
model.compile(optimizer=optimizer, loss='mse')

# Print summary model
model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_rr = model.fit(trainX_rr, trainY_rr, epochs=50, batch_size=32, validation_data=(testX_rr, testY_rr), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_rr.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_rr.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Determine the number of epochs from history
num_epochs = len(history_rr.history['loss'])

# Customize x-axis ticks to show multiples of 5 up to the number of epochs
plt.xticks(np.arange(0, num_epochs + 1, step=5))

# Show the plot
plt.show()

trainPredict_rr = model.predict(trainX_rr)
testPredict_rr = model.predict(testX_rr)
testY_reshaped_rr = testY_rr.reshape(-1, testY_rr.shape[-1])
testPredict_reshaped_rr = testPredict_rr.reshape(-1, testPredict_rr.shape[-1])

def calculate_rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_rr = np.mean(testPredict_rr, axis=1).reshape(-1)
testY_reshaped_rr = testY_rr.flatten()
rmse = sqrt(mean_squared_error(testY_reshaped_rr, testPredict_reshaped_rr))
print("RMSE:", rmse)

In [None]:
# Set optimizer
optimizer = Adam(learning_rate=0.0001)

# Build model LSTM dengan Bidirectional layer
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(trainX_rr.shape[1], trainX_rr.shape[2]), return_sequences=True))
model.add(Dropout(rate=0.2))
model.add(Dense(1))

# Compile model
model.compile(optimizer=optimizer, loss='mse')

# Print summary model
model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_rr = model.fit(trainX_rr, trainY_rr, epochs=50, batch_size=32, validation_data=(testX_rr, testY_rr), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_rr.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_rr.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Determine the number of epochs from history
num_epochs = len(history_rr.history['loss'])

# Customize x-axis ticks to show multiples of 5 up to the number of epochs
plt.xticks(np.arange(0, num_epochs + 1, step=5))

# Show the plot
plt.show()

trainPredict_rr = model.predict(trainX_rr)
testPredict_rr = model.predict(testX_rr)
testY_reshaped_rr = testY_rr.reshape(-1, testY_rr.shape[-1])
testPredict_reshaped_rr = testPredict_rr.reshape(-1, testPredict_rr.shape[-1])

def calculate_rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_rr = np.mean(testPredict_rr, axis=1).reshape(-1)
testY_reshaped_rr = testY_rr.flatten()
rmse = sqrt(mean_squared_error(testY_reshaped_rr, testPredict_reshaped_rr))
print("RMSE:", rmse)

In [None]:
# Set optimizer
optimizer = Adam(learning_rate=0.0001)

# Build model LSTM dengan Bidirectional layer
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(trainX_rr.shape[1], trainX_rr.shape[2]),kernel_regularizer=l2(0.01), return_sequences=True))
# model.add(Dropout(rate=0.2))
model.add(Dense(1, kernel_regularizer=l2(0.01)))

# Compile model
model.compile(optimizer=optimizer, loss='mse')

# Print summary model
model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training model
history_rr = model.fit(trainX_rr, trainY_rr, epochs=50, batch_size=32, validation_data=(testX_rr, testY_rr), verbose=1, callbacks=[early_stopping])

# Plot for loss
plt.figure(figsize=(7, 4))

# Plot training and validation loss with thicker lines
plt.plot(history_rr.history['loss'], label='Training Loss', linewidth=2.5)
plt.plot(history_rr.history['val_loss'], label='Validation Loss', linewidth=2.5)

# Set axis labels and their font size
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)

# Set the size of tick labels
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Set the legend and its font size
plt.legend(fontsize=20)

# Determine the number of epochs from history
num_epochs = len(history_rr.history['loss'])

# Customize x-axis ticks to show multiples of 5 up to the number of epochs
plt.xticks(np.arange(0, num_epochs + 1, step=5))

# Show the plot
plt.show()

trainPredict_rr = model.predict(trainX_rr)
testPredict_rr = model.predict(testX_rr)
testY_reshaped_rr = testY_rr.reshape(-1, testY_rr.shape[-1])
testPredict_reshaped_rr = testPredict_rr.reshape(-1, testPredict_rr.shape[-1])

def calculate_rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_rr = np.mean(testPredict_rr, axis=1).reshape(-1)
testY_reshaped_rr = testY_rr.flatten()
rmse = sqrt(mean_squared_error(testY_reshaped_rr, testPredict_reshaped_rr))
print("RMSE:", rmse)

In [None]:
model.save('model_lstm_rr_best.h5')

Model terbaik untuk imputasi

In [None]:
from tensorflow.keras.models import load_model
model_rr = load_model('model_lstm_rr_best.h5')
predictions_rr = model_rr.predict(testX_rr)

def calculate_rmse(y_true, y_pred):
    """
    Menghitung Root Mean Square Error (RMSE).
    """
    mse = mean_squared_error(y_true, y_pred)
    return sqrt(mse)

# Mengambil rata-rata dari prediksi untuk setiap sampel
testPredict_reshaped_rr = np.mean(testPredict_rr, axis=1).reshape(-1)
# Memastikan bahwa testY adalah 1D array untuk konsistensi
testY_reshaped_rr = testY_rr.flatten()  # Ini mungkin tidak perlu, tetapi disertakan untuk kejelasan
# Menghitung RMSE
rmse = sqrt(mean_squared_error(testY_reshaped_rr, testPredict_reshaped_rr))
print("RMSE:", rmse)


In [None]:
# Misalkan missing_data adalah data yang hilang dalam dataframe data1
missing_data = data1.loc[(data1['Tanggal'] >= '2020-09-15') & (data1['Tanggal'] <= '2020-09-27')]

# Ambil data sebelum dan sesudah periode yang hilang
data_before_missing = data1.loc[data1['Tanggal'] < '2020-09-15']
data_after_missing = data1.loc[data1['Tanggal'] > '2020-09-27']

# Gabungkan data sebelum dan sesudah periode yang hilang
data_combined = pd.concat([data_before_missing, data_after_missing])

# Buat input untuk model LSTM
seq_size = 180  # Sesuaikan dengan window size atau sequence size yang Anda gunakan
X = []
y = []

# Loop untuk menyiapkan data training dengan window
for i in range(len(data_combined) - seq_size):
    X.append(data_combined['rr'].values[i:i + seq_size])
    y.append(data_combined['rr'].values[i + seq_size])

X = np.array(X)
y = np.array(y)

# Reshape input untuk model LSTM
X = X.reshape((X.shape[0], X.shape[1], 1))

# Prediksi data yang hilang secara berkelanjutan
predicted_missing_data = []

# Gunakan data terakhir sebelum periode yang hilang sebagai seed untuk prediksi
future_sequence = X[-1].reshape(1, seq_size, 1)

# Jumlah titik yang ingin diprediksi
n_points = len(missing_data)

for _ in range(n_points):
    future_pred = model_rr.predict(future_sequence)
    predicted_missing_data.append(future_pred[0, -1, 0])
    
    # Update future_sequence untuk iterasi berikutnya
    future_sequence = np.append(future_sequence[:, 1:, :], future_pred[:, -1:, :], axis=1)

# Transform hasil prediksi ke skala asli
predicted_missing_data = scaler_rr.inverse_transform(np.array(predicted_missing_data).reshape(-1, 1))

# Gabungkan hasil prediksi dengan data asli
missing_data['rr'] = predicted_missing_data

data1_imputed = pd.concat([data_before_missing, missing_data, data_after_missing])
data1_imputed = data1_imputed.sort_values(by='Tanggal').reset_index(drop=True)
imputed_data_only = data1_imputed.loc[(data1_imputed['Tanggal'] >= '2020-09-15') & (data1_imputed['Tanggal'] <= '2020-09-27')]
print("Data yang diimputasi pada rentang tanggal 2020-09-15 hingga 2020-09-27:")
print(imputed_data_only)



Eksplorasi Data setelah Imputasi

In [None]:
# Memuat data dari file CSV
data_path = 'G:/My Drive/Skripsi/Hasil Imputasi.xlsx'


data = pd.read_excel(data_path)
#data['Tanggal'] = pd.datetime(data['Tanggal'])

print(data.dtypes)

In [None]:
data['Tanggal'] = pd.to_datetime(data['Tanggal'])
df = data.loc[(data['Tanggal'] >= '2015-05-01') & (data['Tanggal'] <= '2024-04-30')]
print("Start date is: ", df['Tanggal'].min())
print("Start date is: ", df['Tanggal'].max())

In [None]:
df.describe()

In [None]:
# Mengecek data yang hilang
jumlah_missing_data = dt.isnull().sum()
jumlah_missing_data

In [None]:
# Menambahkan kolom tahun dan bulan
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Membuat visualisasi boxplot
plt.figure(figsize=(20, 30))

# Mendapatkan daftar unik tahun dan bulan
years = dt['Year'].unique()
months = range(1, 13)

# Menentukan palet warna untuk tahun-tahun tertentu
colors = {year: '#FF6347' if year in [2012, 2013, 2014] else '#4682B4' for year in years}
special_color = '#32CD32'  # Warna khusus untuk bulan September 2020

for i, year in enumerate(sorted(years)):
    plt.subplot(len(years), 1, i + 1)
    year_data = dt[dt['Year'] == year]
    
    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data['Month'].values:
            year_data = year_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    # Menyusun palet warna untuk tahun ini
    box_colors = [special_color if (year == 2020 and month == 9) else colors[year] for month in months]
    
    # Membuat boxplot dengan seaborn
    sns.boxplot(x='Month', y='ss', data=year_data, palette=box_colors, ax=plt.gca())
    plt.title(f'Durasi Sinar Matahari Tahun {year}')
    plt.xlabel('Bulan')
    plt.ylabel('Durasi Sinar Matahari (jam)')

plt.tight_layout()
plt.show()

In [None]:
# Menambahkan kolom tahun dan bulan
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Membuat visualisasi boxplot
plt.figure(figsize=(20, 30))

# Mendapatkan daftar unik tahun dan bulan
years = dt['Year'].unique()
months = range(1, 13)

# Menentukan palet warna untuk tahun-tahun tertentu
colors = {year: '#FF6347' if year in [2012, 2013, 2014] else '#4682B4' for year in years}
special_color = '#32CD32'  # Warna khusus untuk bulan September 2020

for i, year in enumerate(sorted(years)):
    plt.subplot(len(years), 1, i + 1)
    year_data = dt[dt['Year'] == year]
    
    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data['Month'].values:
            year_data = year_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    # Menyusun palet warna untuk tahun ini
    box_colors = [special_color if (year == 2020 and month == 9) else colors[year] for month in months]
    
    # Membuat boxplot dengan seaborn
    sns.boxplot(x='Month', y='rr', data=year_data, palette=box_colors, ax=plt.gca())
    plt.title(f'Curah Hujan Tahun {year}')
    plt.xlabel('Bulan')
    plt.ylabel('Curah Hujan (mm)')

plt.tight_layout()
plt.show()

In [None]:
# Menambahkan kolom tahun dan bulan
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Membuat visualisasi boxplot
plt.figure(figsize=(30, 30))

# Mendapatkan daftar unik tahun dan bulan
years = dt['Year'].unique()
# Mendapatkan daftar unik bulan dan tahun
months = range(1, 13)
years = dt['Year'].unique()
colors = ['#FF6347' if year in [2012, 2013, 2014] else '#4682B4' for year in years]
for i, month in enumerate(months):
    plt.subplot(3, 4, i + 1)  # Membuat grid 3x4 untuk 12 bulan
    month_data = dt[dt['Month'] == month]
    
    # Menyisipkan tahun yang tidak ada dengan NaN
    for year in years:
        if year not in month_data['Year'].values:
            month_data = month_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    sns.boxplot(x='Year', y='ss', data=month_data, palette=colors, ax=plt.gca())
    plt.title(f'Boxplot Durasi Sinar Matahari Bulan = {month}')
    plt.suptitle('')
    plt.xlabel('Tahun')
    plt.ylabel('Durasi Sinar Matahari (jam)')

plt.tight_layout()
plt.show()

In [None]:
# Menambahkan kolom tahun dan bulan
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Membuat visualisasi boxplot
plt.figure(figsize=(30, 30))

# Mendapatkan daftar unik tahun dan bulan
years = dt['Year'].unique()
# Mendapatkan daftar unik bulan dan tahun
months = range(1, 13)
years = dt['Year'].unique()
colors = ['#FF6347' if year in [2012, 2013, 2014] else '#4682B4' for year in years]
for i, month in enumerate(months):
    plt.subplot(3, 4, i + 1)  # Membuat grid 3x4 untuk 12 bulan
    month_data = dt[dt['Month'] == month]
    
    # Menyisipkan tahun yang tidak ada dengan NaN
    for year in years:
        if year not in month_data['Year'].values:
            month_data = month_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    sns.boxplot(x='Year', y='rr', data=month_data, palette=colors, ax=plt.gca())
    plt.title(f'Boxplot Durasi Sinar Matahari Bulan = {month}')
    plt.suptitle('')
    plt.xlabel('Tahun')
    plt.ylabel('Durasi Sinar Matahari (jam)')

plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Menambahkan kolom tahun dan bulan untuk data sebelum imputasi
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Menambahkan kolom tahun dan bulan untuk data setelah imputasi
df['Year'] = df['Tanggal'].dt.year
df['Month'] = df['Tanggal'].dt.month

# Membuat visualisasi boxplot gabungan
plt.figure(figsize=(20, 60))

# Mendapatkan daftar unik tahun dan bulan
years = dt['Year'].unique()
months = range(1, 13)

# Menentukan palet warna untuk tahun-tahun tertentu
colors_before = {year: '#FF6347' if year in [2012, 2013, 2014] else '#4682B4' for year in years}
colors_after = {year: '#FF6347' if year in [2012, 2013, 2014] else '#4682B4' for year in years}
special_color = '#32CD32'  # Warna khusus untuk bulan September 2020

for i, year in enumerate(sorted(years)):
    # Plot sebelum imputasi
    plt.subplot(len(years), 2, 2*i + 1)
    year_data = dt[dt['Year'] == year]
    
    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data['Month'].values:
            year_data = year_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    sns.boxplot(x='Month', y='ss', data=year_data, palette=[colors_before[year]], ax=plt.gca())
    plt.title(f'Durasi Sinar Matahari Tahun {year} (Sebelum Imputasi)')
    plt.xlabel('Bulan')
    plt.ylabel('Durasi Sinar Matahari (jam)')

    # Plot setelah imputasi
    plt.subplot(len(years), 2, 2*i + 2)
    year_data = df[df['Year'] == year]
    
    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data['Month'].values:
            year_data = year_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    box_colors = [special_color if (year == 2020 and month == 9) else colors_after[year] for month in months]
    sns.boxplot(x='Month', y='ss', data=year_data, palette=box_colors, ax=plt.gca())
    plt.title(f'Durasi Sinar Matahari Tahun {year} (Setelah Imputasi)')
    plt.xlabel('Bulan')
    plt.ylabel('Durasi Sinar Matahari (jam)')

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Menambahkan kolom tahun dan bulan untuk data sebelum imputasi
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Menambahkan kolom tahun dan bulan untuk data setelah imputasi
df['Year'] = df['Tanggal'].dt.year
df['Month'] = df['Tanggal'].dt.month

# Membuat visualisasi boxplot gabungan
plt.figure(figsize=(18, 18))

# Mendapatkan daftar unik tahun dan bulan
years = sorted(dt['Year'].unique())
months = range(1, 13)

# Menentukan palet warna untuk tahun-tahun tertentu
colors_before = '#FF6347'  # Warna sebelum imputasi (tomato)
colors_after = '#4682B4'   # Warna setelah imputasi (steelblue)
special_color = '#32CD32'  # Warna khusus untuk bulan September 2020

# Menentukan jumlah baris dan kolom
num_years = len(years)
num_cols = 4
num_rows = (num_years + num_cols - 1) // num_cols

for i, year in enumerate(years):
    row = i // num_cols
    col = i % num_cols

    # Plot sebelum imputasi
    plt.subplot(num_rows * 2, num_cols, 2 * (row * num_cols + col) + 1)
    year_data = dt[dt['Year'] == year]
    
    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data['Month'].values:
            year_data = year_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    # Menyusun palet warna untuk tahun ini
    box_colors_before = [special_color if (year == 2020 and month == 9) else colors_before for month in months]
    
    sns.boxplot(x='Month', y='ss', data=year_data, palette=box_colors_before, ax=plt.gca())
    plt.title(f'{year} (Sebelum Imputasi)', fontsize=12)
    plt.xlabel('Bulan', fontsize=10)
    plt.ylabel('Durasi Sinar Matahari (jam)', fontsize=10)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10)

    # Plot setelah imputasi
    plt.subplot(num_rows * 2, num_cols, 2 * (row * num_cols + col) + 2)
    year_data = df[df['Year'] == year]
    
    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data['Month'].values:
            year_data = year_data.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)
    
    box_colors_after = [special_color if (year == 2020 and month == 9) else colors_after for month in months]
    sns.boxplot(x='Month', y='ss', data=year_data, palette=box_colors_after, ax=plt.gca())
    plt.title(f'{year} (Setelah Imputasi)', fontsize=12)
    plt.xlabel('Bulan', fontsize=10)
    plt.ylabel('Durasi Sinar Matahari (jam)', fontsize=10)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10)

# Tambahkan legenda di bawah plot
import matplotlib.patches as mpatches

# Buat elemen legenda
before_patch = mpatches.Patch(color=colors_before, label='Sebelum Imputasi')
after_patch = mpatches.Patch(color=colors_after, label='Setelah Imputasi')
special_patch = mpatches.Patch(color=special_color, label='September 2020')

# Tambahkan legenda di bawah plot
plt.figlegend(handles=[before_patch, after_patch, special_patch], loc='lower center', ncol=3, fontsize=12, frameon=False, bbox_to_anchor=(0.5, -0.05))

# Mengurangi jarak antara plot dan legenda
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.subplots_adjust(bottom=0.18)

plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Menambahkan kolom tahun dan bulan untuk data sebelum imputasi
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Menambahkan kolom tahun dan bulan untuk data setelah imputasi
df['Year'] = df['Tanggal'].dt.year
df['Month'] = df['Tanggal'].dt.month

# Menentukan ukuran kertas (14 inch) dan margin (433 pixel ≈ 1.52 inch)
paper_width = 14
paper_height = 14
margin_inches = 1.52

# Menentukan jumlah baris dan kolom
years = sorted(dt['Year'].unique())
num_years = len(years)
num_cols = 4
num_rows = (num_years + num_cols - 1) // num_cols

# Membuat visualisasi boxplot gabungan
fig, axes = plt.subplots(num_rows * 2, num_cols, figsize=(paper_width, paper_height))

# Menentukan palet warna untuk tahun-tahun tertentu
colors_before = '#FF6347'  # Warna sebelum imputasi (tomato)
colors_after = '#4682B4'   # Warna setelah imputasi (steelblue)
special_color = '#32CD32'  # Warna khusus untuk bulan September 2020

months = range(1, 13)

for i, year in enumerate(years):
    row = i // num_cols
    col = i % num_cols

    # Plot sebelum imputasi
    ax_before = axes[2 * row, col]
    year_data_before = dt[dt['Year'] == year]

    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data_before['Month'].values:
            year_data_before = year_data_before.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)

    # Menyusun palet warna untuk tahun ini
    box_colors_before = [special_color if (year == 2020 and month == 9) else colors_before for month in months]

    sns.boxplot(x='Month', y='ss', data=year_data_before, palette=box_colors_before, ax=ax_before)
    ax_before.set_title(f'{year} (Sebelum Imputasi)', fontsize=10)
    ax_before.set_xlabel('Bulan')
    ax_before.set_ylabel('jam')

    # Plot setelah imputasi
    ax_after = axes[2 * row + 1, col]
    year_data_after = df[df['Year'] == year]

    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data_after['Month'].values:
            year_data_after = year_data_after.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)

    box_colors_after = [special_color if (year == 2020 and month == 9) else colors_after for month in months]
    sns.boxplot(x='Month', y='ss', data=year_data_after, palette=box_colors_after, ax=ax_after)
    ax_after.set_title(f'{year} (Setelah Imputasi)', fontsize=10)
    ax_after.set_xlabel('Bulan')
    ax_after.set_ylabel('jam')

plt.tight_layout(rect=[margin_inches/paper_width, margin_inches/paper_height, 1-margin_inches/paper_width, 1-margin_inches/paper_height])
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Menambahkan kolom tahun dan bulan untuk data sebelum imputasi
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Menambahkan kolom tahun dan bulan untuk data setelah imputasi
df['Year'] = df['Tanggal'].dt.year
df['Month'] = df['Tanggal'].dt.month

# Menentukan ukuran kertas (14 inch) dan margin (433 pixel ≈ 1.52 inch)
paper_width = 14
paper_height = 14
margin_inches = 1.52

# Menentukan jumlah baris dan kolom
years = sorted(dt['Year'].unique())
num_years = len(years)
num_cols = 4
num_rows = (num_years * 2 + num_cols - 1) // num_cols  # Dua kali jumlah tahun untuk before & after

# Membuat visualisasi boxplot gabungan
fig, axes = plt.subplots(num_rows, num_cols, figsize=(paper_width, paper_height))

# Menentukan palet warna untuk tahun-tahun tertentu
colors_before = '#FF6347'  # Warna sebelum imputasi (tomato)
colors_after = '#4682B4'   # Warna setelah imputasi (steelblue)
special_color = '#32CD32'  # Warna khusus untuk bulan September 2020

months = range(1, 13)

for i, year in enumerate(years):
    row = i // (num_cols // 2)
    col_before = (i % (num_cols // 2)) * 2
    col_after = col_before + 1

    # Plot sebelum imputasi
    ax_before = axes[row, col_before]
    year_data_before = dt[dt['Year'] == year]

    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data_before['Month'].values:
            year_data_before = year_data_before.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)

    # Menyusun palet warna untuk tahun ini
    box_colors_before = [special_color if (year == 2020 and month == 9) else colors_before for month in months]

    sns.boxplot(x='Month', y='ss', data=year_data_before, palette=box_colors_before, ax=ax_before)
    ax_before.set_title(f'{year} (Sebelum Imputasi)', fontsize=10)
    ax_before.set_xlabel('Bulan')
    ax_before.set_ylabel('jam')

    # Plot setelah imputasi
    ax_after = axes[row, col_after]
    year_data_after = df[df['Year'] == year]

    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data_after['Month'].values:
            year_data_after = year_data_after.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)

    box_colors_after = [special_color if (year == 2020 and month == 9) else colors_after for month in months]
    sns.boxplot(x='Month', y='ss', data=year_data_after, palette=box_colors_after, ax=ax_after)
    ax_after.set_title(f'{year} (Setelah Imputasi)', fontsize=10)
    ax_after.set_xlabel('Bulan')
    ax_after.set_ylabel('jam')

plt.tight_layout(rect=[margin_inches/paper_width, margin_inches/paper_height, 1-margin_inches/paper_width, 1-margin_inches/paper_height])
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Menambahkan kolom tahun dan bulan untuk data sebelum imputasi
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Menambahkan kolom tahun dan bulan untuk data setelah imputasi
df['Year'] = df['Tanggal'].dt.year
df['Month'] = df['Tanggal'].dt.month

# Menentukan ukuran kertas (14 inch) dan margin (433 pixel ≈ 1.52 inch)
paper_width = 14
paper_height = 7  # Mengurangi tinggi kertas karena hanya menampilkan satu tahun
margin_inches = 1.52

# Mengambil data hanya untuk tahun 2020
year = 2020

# Membuat visualisasi boxplot gabungan untuk tahun 2020
fig, axes = plt.subplots(1, 2, figsize=(paper_width, paper_height))

# Menentukan palet warna
colors_before = '#FF6347'  # Warna sebelum imputasi (tomato)
colors_after = '#4682B4'   # Warna setelah imputasi (steelblue)
special_color = '#32CD32'  # Warna khusus untuk bulan September 2020

months = range(1, 13)

# Plot sebelum imputasi
ax_before = axes[0]
year_data_before = dt[dt['Year'] == year]

# Menyisipkan bulan yang tidak ada dengan NaN
for month in months:
    if month not in year_data_before['Month'].values:
        year_data_before = year_data_before.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)

# Menyusun palet warna untuk tahun ini
box_colors_before = [special_color if (month == 9) else colors_before for month in months]

sns.boxplot(x='Month', y='ss', data=year_data_before, palette=box_colors_before, ax=ax_before)
ax_before.set_xlabel('Bulan', fontsize=12)
ax_before.set_ylabel('Durasi Sinar Matahari (jam)', fontsize=13)
ax_before.tick_params(axis='both', which='major', labelsize=13)

# Plot setelah imputasi
ax_after = axes[1]
year_data_after = df[df['Year'] == year]

# Menyisipkan bulan yang tidak ada dengan NaN
for month in months:
    if month not in year_data_after['Month'].values:
        year_data_after = year_data_after.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)

box_colors_after = [special_color if (month == 9) else colors_after for month in months]
sns.boxplot(x='Month', y='ss', data=year_data_after, palette=box_colors_after, ax=ax_after)
ax_after.set_xlabel('Bulan', fontsize=12)
ax_after.set_ylabel('Durasi Sinar Matahari (jam)', fontsize=13)
ax_after.tick_params(axis='both', which='major', labelsize=13)

# Menambahkan legenda
handles = [
    plt.Line2D([0], [0], color=colors_before, lw=4, label='Sebelum Imputasi'),
    plt.Line2D([0], [0], color=special_color, lw=4, label='Data Hilang Berurutan'),
    plt.Line2D([0], [0], color=colors_after, lw=4, label='Setelah Imputasi')
]
fig.legend(handles=handles, loc='lower center', ncol=3, fontsize=13)

# Menyesuaikan tata letak untuk memberi ruang bagi legenda
plt.tight_layout(rect=[margin_inches/paper_width, margin_inches/paper_height, 1-margin_inches/paper_width, 1-(margin_inches+1)/paper_height])
plt.subplots_adjust(bottom=0.15)  # Mengurangi jarak di bawah plot

plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Menambahkan kolom tahun dan bulan untuk data sebelum imputasi
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Menambahkan kolom tahun dan bulan untuk data setelah imputasi
df['Year'] = df['Tanggal'].dt.year
df['Month'] = df['Tanggal'].dt.month

# Menentukan ukuran kertas (14 inch) dan margin (433 pixel ≈ 1.52 inch)
paper_width = 14
paper_height = 7  # Mengurangi tinggi kertas karena hanya menampilkan satu tahun
margin_inches = 1.52

# Mengambil data hanya untuk tahun 2020
year = 2020

# Membuat visualisasi boxplot gabungan untuk tahun 2020
fig, axes = plt.subplots(1, 2, figsize=(paper_width, paper_height))

# Menentukan palet warna
colors_before = '#FF6347'  # Warna sebelum imputasi (tomato)
colors_after = '#4682B4'   # Warna setelah imputasi (steelblue)
special_color = '#32CD32'  # Warna khusus untuk bulan September 2020

months = range(1, 13)

# Plot sebelum imputasi
ax_before = axes[0]
year_data_before = dt[dt['Year'] == year]

# Menyisipkan bulan yang tidak ada dengan NaN
for month in months:
    if month not in year_data_before['Month'].values:
        year_data_before = year_data_before.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)

# Menyusun palet warna untuk tahun ini
box_colors_before = [special_color if (month == 9) else colors_before for month in months]

sns.boxplot(x='Month', y='rr', data=year_data_before, palette=box_colors_before, ax=ax_before)
ax_before.set_xlabel('Bulan', fontsize=12)
ax_before.set_ylabel('Curah Hujan (mm)', fontsize=13)
ax_before.tick_params(axis='both', which='major', labelsize=13)

# Plot setelah imputasi
ax_after = axes[1]
year_data_after = df[df['Year'] == year]

# Menyisipkan bulan yang tidak ada dengan NaN
for month in months:
    if month not in year_data_after['Month'].values:
        year_data_after = year_data_after.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)

box_colors_after = [special_color if (month == 9) else colors_after for month in months]
sns.boxplot(x='Month', y='rr', data=year_data_after, palette=box_colors_after, ax=ax_after)
ax_after.set_xlabel('Bulan', fontsize=12)
ax_after.set_ylabel('Curah Hujan (mm)', fontsize=13)
ax_after.tick_params(axis='both', which='major', labelsize=13)

# Menambahkan legenda
handles = [
    plt.Line2D([0], [0], color=colors_before, lw=4, label='Sebelum Imputasi'),
    plt.Line2D([0], [0], color=special_color, lw=4, label='Data Hilang Berurutan'),
    plt.Line2D([0], [0], color=colors_after, lw=4, label='Setelah Imputasi')
]
fig.legend(handles=handles, loc='lower center', ncol=3, fontsize=13)

# Menyesuaikan tata letak untuk memberi ruang bagi legenda
plt.tight_layout(rect=[margin_inches/paper_width, margin_inches/paper_height, 1-margin_inches/paper_width, 1-(margin_inches+1)/paper_height])
plt.subplots_adjust(bottom=0.15)  # Mengurangi jarak di bawah plot

plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Menambahkan kolom tahun dan bulan untuk data sebelum imputasi
dt['Year'] = dt['Tanggal'].dt.year
dt['Month'] = dt['Tanggal'].dt.month

# Menambahkan kolom tahun dan bulan untuk data setelah imputasi
df['Year'] = df['Tanggal'].dt.year
df['Month'] = df['Tanggal'].dt.month

# Menentukan ukuran kertas (14 inch) dan margin (433 pixel ≈ 1.52 inch)
paper_width = 14
paper_height = 14
margin_inches = 1.52

# Menentukan jumlah baris dan kolom
years = sorted(dt['Year'].unique())
num_years = len(years)
num_cols = 4
num_rows = (num_years * 2 + num_cols - 1) // num_cols  # Dua kali jumlah tahun untuk before & after

# Membuat visualisasi boxplot gabungan
fig, axes = plt.subplots(num_rows, num_cols, figsize=(paper_width, paper_height))

# Menentukan palet warna untuk tahun-tahun tertentu
colors_before = '#FF6347'  # Warna sebelum imputasi (tomato)
colors_after = '#4682B4'   # Warna setelah imputasi (steelblue)
special_color = '#32CD32'  # Warna khusus untuk bulan September 2020

months = range(1, 13)

for i, year in enumerate(years):
    row = i // (num_cols // 2)
    col_before = (i % (num_cols // 2)) * 2
    col_after = col_before + 1

    # Plot sebelum imputasi
    ax_before = axes[row, col_before]
    year_data_before = dt[dt['Year'] == year]

    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data_before['Month'].values:
            year_data_before = year_data_before.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)

    # Menyusun palet warna untuk tahun ini
    box_colors_before = [special_color if (year == 2020 and month == 9) else colors_before for month in months]

    sns.boxplot(x='Month', y='rr', data=year_data_before, palette=box_colors_before, ax=ax_before)
    ax_before.set_title(f'{year}', fontsize=10)
    ax_before.set_xlabel('Bulan')
    ax_before.set_ylabel('Curah Hujan (jam)')

    # Plot setelah imputasi
    ax_after = axes[row, col_after]
    year_data_after = df[df['Year'] == year]

    # Menyisipkan bulan yang tidak ada dengan NaN
    for month in months:
        if month not in year_data_after['Month'].values:
            year_data_after = year_data_after.append({'Tanggal': pd.Timestamp(year=year, month=month, day=1), 'ss': np.nan, 'rr': np.nan, 'Year': year, 'Month': month}, ignore_index=True)

    box_colors_after = [special_color if (year == 2020 and month == 9) else colors_after for month in months]
    sns.boxplot(x='Month', y='rr', data=year_data_after, palette=box_colors_after, ax=ax_after)
    ax_after.set_title(f'{year}', fontsize=10)
    ax_after.set_xlabel('Bulan')
    ax_after.set_ylabel('Curah Hujan (mm)')

# Menambahkan legenda
handles = [
    plt.Line2D([0], [0], color=colors_before, lw=4, label='Sebelum Imputasi'),
    plt.Line2D([0], [0], color=colors_after, lw=4, label='Setelah Imputasi'),
    plt.Line2D([0], [0], color=special_color, lw=4, label='Data Hilang Berurutan')
]
fig.legend(handles=handles, loc='lower center', ncol=3, fontsize=12)

# Menyesuaikan tata letak untuk memberi ruang bagi legenda
plt.tight_layout(rect=[margin_inches/paper_width, margin_inches/paper_height, 1-margin_inches/paper_width, 1-(margin_inches+1)/paper_height])
plt.subplots_adjust(bottom=0.07)  # Mengurangi jarak di bawah plot

plt.show()
