In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

def add_naive_forecast(df, target_col, months_back, start_date, horizon_months, text):
  
    forecast_col = f"Naive: {months_back}-Month for {text}"
    df[forecast_col] = np.nan

    start = pd.to_datetime(start_date)
    end = start + pd.DateOffset(months=horizon_months - 1)
    reference_date = start - pd.DateOffset(months=months_back)

    if reference_date in df.index:
        naive_value = df[target_col][df.index < start_date].iloc[-1]
        forecast_range = (df.index >= start) & (df.index <= end)
        df.loc[forecast_range, forecast_col] = naive_value

    return df


def getMetrics(df_d, df_r, date1, date2, forecast_type):
    month_name = pd.to_datetime(date1).strftime('%B')
    mask = (df_d.index >= date1) & (df_d.index <= date2)
    y_true = df_d.loc[mask, "Actual Rate"]
    y_pred = df_d.loc[mask, forecast_type]
    forecast_type_split = forecast_type.split(" f")
    if not y_true.isna().all() and not y_pred.isna().all():
        df_r.loc[month_name, f"{forecast_type_split[0]}: MAE"] = mean_absolute_error(y_true, y_pred)
        df_r.loc[month_name, f"{forecast_type_split[0]}: RMSE"] = np.sqrt(mean_squared_error(y_true, y_pred))
    
    return df_r


In [3]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller

# Load the dataset
file_path = "../cleandata.csv"
df_clean = pd.read_csv(file_path)

# Convert the 'Date' column to datetime and set it as the index
df_clean['Date'] = pd.to_datetime(df_clean['Date'], format='%b-%Y')
df_clean.set_index('Date', inplace=True)
df_clean = df_clean.sort_index()

# List of variables to test
forecast_data = [
    'LNG 174K CBM (2-stroke dual fuel) Spot Rate (avg., $/day)',
    'LNG Shipping Capacity (CBM)',
    'LNG Carrier Newbuilding Prices (end month, $m)',
    'LNG Orderbook (start month, CBM)',
    'Global price of Natural Gas, Asia (start month, MMBTU)',
    'Global price of Natural Gas, EU (start month, MMBTU)',
    'Global price of Natural Gas, US Henry Hub (start month, MMBTU)',
    'Spread\nEU-US',
    'Spread\nASIA-US',
    'Spread\nASIA-EU'
]

forecast_start_date = pd.to_datetime("2024-01-01")
df_train = df_clean[df_clean.index < forecast_start_date]

adf_results = []
for col in forecast_data:
    series = df_train[col].dropna()
    result = adfuller(series)
    adf_results.append({
        'Variable': col,
        'ADF Statistic': result[0],
        'p-value': result[1],
        'Critical Value 1%': result[4]['1%'],
        'Critical Value 5%': result[4]['5%'],
        'Critical Value 10%': result[4]['10%'],
        'Stationary': result[1] < 0.05
    })


adf_df = pd.DataFrame(adf_results).round(4)
display(adf_df)
#print(adf_df)

df_train = df_train.diff().dropna()
print(len(df_train))


adf_resultsdiff = []
for col in forecast_data:
    series = df_train[col].dropna()
    result = adfuller(series)
    adf_resultsdiff.append({
        'Variable': col,
        'ADF Statistic': result[0],
        'p-value': result[1],
        'Critical Value 1%': result[4]['1%'],
        'Critical Value 5%': result[4]['5%'],
        'Critical Value 10%': result[4]['10%'],
        'Stationary': result[1] < 0.05
    })
adfdiff_df = pd.DataFrame(adf_resultsdiff).round(4)
display(adfdiff_df)
#print(adfdiff_df)

Unnamed: 0,Variable,ADF Statistic,p-value,Critical Value 1%,Critical Value 5%,Critical Value 10%,Stationary
0,LNG 174K CBM (2-stroke dual fuel) Spot Rate (a...,-3.2793,0.0158,-3.5507,-2.9138,-2.5946,True
1,LNG Shipping Capacity (CBM),-1.1407,0.6986,-3.5464,-2.9119,-2.5937,False
2,"LNG Carrier Newbuilding Prices (end month, $m)",-1.2584,0.648,-3.5577,-2.9168,-2.5962,False
3,"LNG Orderbook (start month, CBM)",0.6049,0.9877,-3.5507,-2.9138,-2.5946,False
4,"Global price of Natural Gas, Asia (start month...",-1.5273,0.52,-3.5507,-2.9138,-2.5946,False
5,"Global price of Natural Gas, EU (start month, ...",-1.3566,0.6029,-3.5507,-2.9138,-2.5946,False
6,"Global price of Natural Gas, US Henry Hub (sta...",-2.4634,0.1246,-3.5529,-2.9147,-2.5951,False
7,Spread\nEU-US,-1.4512,0.5575,-3.5507,-2.9138,-2.5946,False
8,Spread\nASIA-US,-1.9563,0.3061,-3.5464,-2.9119,-2.5937,False
9,Spread\nASIA-EU,-4.4291,0.0003,-3.5464,-2.9119,-2.5937,True


59


Unnamed: 0,Variable,ADF Statistic,p-value,Critical Value 1%,Critical Value 5%,Critical Value 10%,Stationary
0,LNG 174K CBM (2-stroke dual fuel) Spot Rate (a...,-5.7216,0.0,-3.5685,-2.9214,-2.5987,True
1,LNG Shipping Capacity (CBM),-8.1677,0.0,-3.5485,-2.9128,-2.5941,True
2,"LNG Carrier Newbuilding Prices (end month, $m)",-1.2697,0.6429,-3.5577,-2.9168,-2.5962,False
3,"LNG Orderbook (start month, CBM)",-3.0779,0.0282,-3.5507,-2.9138,-2.5946,True
4,"Global price of Natural Gas, Asia (start month...",-6.6414,0.0,-3.5507,-2.9138,-2.5946,True
5,"Global price of Natural Gas, EU (start month, ...",-8.32,0.0,-3.5507,-2.9138,-2.5946,True
6,"Global price of Natural Gas, US Henry Hub (sta...",-3.8119,0.0028,-3.5577,-2.9168,-2.5962,True
7,Spread\nEU-US,-8.5117,0.0,-3.5507,-2.9138,-2.5946,True
8,Spread\nASIA-US,-6.9227,0.0,-3.5507,-2.9138,-2.5946,True
9,Spread\nASIA-EU,-9.0803,0.0,-3.5507,-2.9138,-2.5946,True
