# Bawang Merah


In [3]:
import pandas as pd
import numpy as np
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit

# Fungsi untuk memuat data train dan test
def load_data(train_path, test_path):
    df_train = pd.read_csv(train_path, parse_dates=["Date"], index_col="Date")
    df_test = pd.read_csv(test_path, parse_dates=["Date"], index_col="Date")
    return df_train, df_test

# Fungsi untuk preprocessing data, seperti menginterpolasi missing values
def preprocess_data(df_train, df_test):
    # Menginterpolasi missing values
    df_train.interpolate(method='linear', inplace=True)
    df_test.interpolate(method='linear', inplace=True)
    
    # Menghindari masalah kolom yang hilang
    df_test = df_test[df_train.columns]  # Menyusun ulang kolom pada df_test sesuai df_train

    # Log transformasi untuk menangani distribusi harga yang long-tail
    df_train_log = np.log1p(df_train)  # Log transform
    df_test_log = np.log1p(df_test)
    
    return df_train_log, df_test_log

# Fungsi untuk split data menjadi training dan validation
def split_train_validation(df_train, split_ratio=0.8):
    train_size = int(split_ratio * len(df_train))
    return df_train.iloc[:train_size], df_train.iloc[train_size:]

# Fungsi untuk melatih model VAR dengan lag optimal
def train_var_model(df_train_data, max_lag=15):
    model = VAR(df_train_data)
    lag_order = model.select_order(maxlags=max_lag).aic  # Pilih lag optimal berdasarkan AIC
    print(f"Optimal Lag Order: {lag_order}")
    model_fitted = model.fit(lag_order)
    return model_fitted

# Fungsi untuk melakukan evaluasi model pada data validation
def forecast_and_evaluate(model_fitted, df_train_data, df_val_data):
    lag_order = model_fitted.k_ar
    last_values = df_train_data.values[-lag_order:]
    pred_val = model_fitted.forecast(last_values, steps=len(df_val_data))
    mape = mean_absolute_percentage_error(df_val_data, pred_val)
    print(f"✅ MAPE pada validation set: {mape:.4f}")

# Fungsi untuk melakukan prediksi pada data test
def forecast_test(model_fitted, df_train, df_test):
    lag_order = model_fitted.k_ar
    pred_test = model_fitted.forecast(df_train.values[-lag_order:], steps=len(df_test))
    return pd.DataFrame(pred_test, index=df_test.index, columns=df_train.columns)

# Fungsi untuk menyimpan hasil prediksi dalam format submission
def save_submission(forecast_df, df_train, filename="submissionBawangMerah.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Bawang Merah/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Bawang Merah.csv"
test_path = "Harga Bahan Pangan/test/Bawang Merah.csv"

df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)

  self._init_dates(dates, freq)


Optimal Lag Order: 2
✅ MAPE pada validation set: 0.0206
✅ Submission berhasil dibuat: submissionBawangMerah.csv


# Bawang Putih Bonggol

In [4]:
def save_submission(forecast_df, df_train, filename="submissionBawangPutihBonggol.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Bawang Putih Bonggol/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Bawang Putih Bonggol.csv"
test_path = "Harga Bahan Pangan/test/Bawang Putih Bonggol.csv"

df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)


  self._init_dates(dates, freq)


Optimal Lag Order: 2
✅ MAPE pada validation set: 0.0056
✅ Submission berhasil dibuat: submissionBawangPutihBonggol.csv


# Beras Medium 

In [5]:
def save_submission(forecast_df, df_train, filename="submissionBerasMedium.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Beras Medium/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Beras Medium.csv"
test_path = "Harga Bahan Pangan/test/Beras Medium.csv"

df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)


  self._init_dates(dates, freq)


Optimal Lag Order: 15
✅ MAPE pada validation set: 0.0141
✅ Submission berhasil dibuat: submissionBerasMedium.csv


# Beras Premium

In [6]:
def save_submission(forecast_df, df_train, filename="submissionBerasPremium.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Beras Premium/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Beras Premium.csv"
test_path = "Harga Bahan Pangan/test/Beras Premium.csv"

df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)

  self._init_dates(dates, freq)


Optimal Lag Order: 15
✅ MAPE pada validation set: 0.0119
✅ Submission berhasil dibuat: submissionBerasPremium.csv


# Cabai Merah Keriting

In [7]:
def save_submission(forecast_df, df_train, filename="submissionCabaiMerahKeriting.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Cabai Merah Keriting/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Cabai Merah Keriting.csv"
test_path = "Harga Bahan Pangan/test/Cabai Merah Keriting.csv"

df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)

  self._init_dates(dates, freq)


Optimal Lag Order: 15
✅ MAPE pada validation set: 0.0239
✅ Submission berhasil dibuat: submissionCabaiMerahKeriting.csv


# Cabai Rawit Merah

In [8]:
from sklearn.impute import KNNImputer
# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Cabai Rawit Merah.csv"
test_path = "Harga Bahan Pangan/test/Cabai Rawit Merah.csv"

df_train, df_test = load_data(train_path, test_path)

# **🔹 Tangani missing value ekstrem hanya untuk Cabai Rawit Merah**
imputer = KNNImputer(n_neighbors=5)
df_train_imputed = pd.DataFrame(imputer.fit_transform(df_train), columns=df_train.columns)
df_test_imputed = pd.DataFrame(imputer.transform(df_test), columns=df_test.columns)

    # Pastikan tidak ada NaN atau Inf setelah imputasi
df_train_imputed.replace([np.inf, -np.inf], np.nan, inplace=True)
df_test_imputed.replace([np.inf, -np.inf], np.nan, inplace=True)

df_train.fillna(df_train.median(), inplace=True)
df_test.fillna(df_test.median(), inplace=True)

# **Cek apakah masih ada missing values**
if df_train.isnull().sum().sum() > 0 or df_test.isnull().sum().sum() > 0:
    print("⚠️ Warning: Masih ada missing values setelah preprocessing!")
    
def save_submission(forecast_df, df_train, filename="submissionCabaiRawitMerah.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Cabai Rawit Merah/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")
    
    df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)

  self._init_dates(dates, freq)


Optimal Lag Order: 15
✅ MAPE pada validation set: 0.0274
✅ Submission berhasil dibuat: submissionCabaiRawitMerah.csv


# Daging Ayam Ras

In [9]:
def save_submission(forecast_df, df_train, filename="submissionDagingAyamRas.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Daging Ayam Ras/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Daging Ayam Ras.csv"
test_path = "Harga Bahan Pangan/test/Daging Ayam Ras.csv"

df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)

  self._init_dates(dates, freq)


Optimal Lag Order: 15
✅ MAPE pada validation set: 0.0101
✅ Submission berhasil dibuat: submissionDagingAyamRas.csv


# Daging Sapi Murni

In [10]:
def save_submission(forecast_df, df_train, filename="submissionDagingSapiMurni.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Daging Sapi Murni/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Daging Sapi Murni.csv"
test_path = "Harga Bahan Pangan/test/Daging Sapi Murni.csv"

df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)

  self._init_dates(dates, freq)


Optimal Lag Order: 15
✅ MAPE pada validation set: 0.0026
✅ Submission berhasil dibuat: submissionDagingSapiMurni.csv


# Gula Konsumsi

In [11]:
def save_submission(forecast_df, df_train, filename="submissionGulaKonsumsi.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Gula Konsumsi/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Gula Konsumsi.csv"
test_path = "Harga Bahan Pangan/test/Gula Konsumsi.csv"

df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)

  self._init_dates(dates, freq)


Optimal Lag Order: 15
✅ MAPE pada validation set: 0.0110
✅ Submission berhasil dibuat: submissionGulaKonsumsi.csv


# Minyak Goreng Curah

In [None]:
## coba nganu missing value pake knn

from sklearn.impute import KNNImputer

def save_submission(forecast_df, df_train, filename="submissionMinyakGorengCurah.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []

    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Minyak Goreng Curah/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })

    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Minyak Goreng Curah.csv"
test_path = "Harga Bahan Pangan/test/Minyak Goreng Curah.csv"

df_train, df_test = load_data(train_path, test_path)

# **Tangani missing value menggunakan KNN Imputer**
knn_imputer = KNNImputer(n_neighbors=5, weights="uniform")  
df_train[:] = knn_imputer.fit_transform(df_train)
df_test[:] = knn_imputer.transform(df_test)

# **Cek apakah masih ada missing values**
if df_train.isnull().sum().sum() > 0 or df_test.isnull().sum().sum() > 0:
    print("Warning: Masih ada missing values setelah preprocessing!")

df_train, df_test = preprocess_data(df_train, df_test)
df_train_data, df_val_data = split_train_validation(df_train)

model_fitted = train_var_model(df_train_data)
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

forecast_df = forecast_test(model_fitted, df_train, df_test)
save_submission(forecast_df, df_train)

# Minyak Gireng Kemasan Sederhana

In [12]:
def save_submission(forecast_df, df_train, filename="submissionMinyakGorengKemasanSederhana.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Minyak Goreng Kemasan Sederhana/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Minyak Goreng Kemasan Sederhana.csv"
test_path = "Harga Bahan Pangan/test/Minyak Goreng Kemasan Sederhana.csv"

df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)

  self._init_dates(dates, freq)


Optimal Lag Order: 15
✅ MAPE pada validation set: 0.0086
✅ Submission berhasil dibuat: submissionMinyakGorengKemasanSederhana.csv


# Telur Ayam Ras

In [13]:
def save_submission(forecast_df, df_train, filename="submissionMinyakTelurAyamRas.csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Telur Ayam Ras/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Telur Ayam Ras.csv"
test_path = "Harga Bahan Pangan/test/Telur Ayam Ras.csv"

df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)

  self._init_dates(dates, freq)


Optimal Lag Order: 15
✅ MAPE pada validation set: 0.0049
✅ Submission berhasil dibuat: submissionMinyakTelurAyamRas.csv


# Tepung Terigu

In [14]:
def save_submission(forecast_df, df_train, filename="submissionMinyakTepungTerigu(Curah).csv"):
    ordered_provinces = df_train.columns.tolist()
    submission_results = []
    
    for date in forecast_df.index:
        for prov in ordered_provinces:
            submission_results.append({
                "id": f"Tepung Terigu (Curah)/{prov}/{date.date()}",
                "price": round(forecast_df.loc[date, prov], 2)
            })
    
    submission_df = pd.DataFrame(submission_results)
    submission_df.to_csv(filename, index=False)
    print(f"✅ Submission berhasil dibuat: {filename}")

# Pipeline Execution
train_path = "Harga Bahan Pangan/train/Tepung Terigu (Curah).csv"
test_path = "Harga Bahan Pangan/test/Tepung Terigu (Curah).csv"

df_train, df_test = load_data(train_path, test_path)

# Preprocessing data: Log transform dan interpolasi
df_train, df_test = preprocess_data(df_train, df_test)

# Membagi data menjadi train dan validation set
df_train_data, df_val_data = split_train_validation(df_train)

# Melatih model VAR
model_fitted = train_var_model(df_train_data, max_lag=15)

# Evaluasi model menggunakan validation set
forecast_and_evaluate(model_fitted, df_train_data, df_val_data)

# Melakukan prediksi pada data test
forecast_df = forecast_test(model_fitted, df_train, df_test)
forecast_df = np.expm1(forecast_df)

# Menyimpan hasil prediksi dalam format submission
save_submission(forecast_df, df_train)

  self._init_dates(dates, freq)


Optimal Lag Order: 15
✅ MAPE pada validation set: 0.0052
✅ Submission berhasil dibuat: submissionMinyakTepungTerigu(Curah).csv


In [15]:
import numpy as np

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Pastikan df_val_data dan hasil prediksi sudah tersedia
y_true = df_val_data.values  # Data aktual
y_pred = model_fitted.forecast(df_train_data.values, steps=len(df_val_data))  # Prediksi

# Hitung MAPE untuk keseluruhan data
mape_score = mean_absolute_percentage_error(y_true, y_pred)

print(f"MAPE Keseluruhan: {mape_score:.2f}%")

MAPE Keseluruhan: 0.52%


In [17]:
import os
import pandas as pd

# Path folder tempat semua file submission disimpan
folder_path = "1submit"  # Ganti dengan path yang sesuai

# List semua file CSV dalam folder
csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]

# List untuk menyimpan data dari semua file
dataframes = []

# Loop melalui setiap file CSV dan baca ke dalam pandas
for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)
    dataframes.append(df)

# Gabungkan semua DataFrame menjadi satu
merged_df = pd.concat(dataframes, ignore_index=True)

# Simpan ke file CSV baru
merged_df.to_csv(os.path.join(folder_path, "submission_Final.1.csv"), index=False)

print("Semua file CSV berhasil digabungkan menjadi 'submission_Final.1.csv'")


Semua file CSV berhasil digabungkan menjadi 'submission_Final.1.csv'


In [18]:
sub = pd.read_csv("1submit/submission_Final.1.csv")
sub.duplicated().sum()

0