In [12]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller, kpss

In [13]:
# Load the dataset
file_path_new = '../datasets/CropSDEData/YIELD_NUTS0_NL.csv'
new_df = pd.read_csv(file_path_new)

# Filter the dataset for 'potato' crop in region 'NL'
crop_name_new = 'potato'
region_code_new = 'NL'
filtered_data_new = new_df[(new_df['CROP'] == crop_name_new) & (new_df['IDREGION'] == region_code_new)]

# Extract the yield values for further analysis
yield_values_new = filtered_data_new['YIELD'].values

In [14]:
# State the hypotheses for the tests
print("=== Hypotheses for ADF and KPSS Tests ===\n")
print("Augmented Dickey-Fuller (ADF) Test:")
print("H0: The series has a unit root (non-stationary)")
print("H1: The series is stationary\n")

print("KPSS Test:")
print("H0: The series is stationary")
print("H1: The series has a unit root (non-stationary)\n")

=== Hypotheses for ADF and KPSS Tests ===

Augmented Dickey-Fuller (ADF) Test:
H0: The series has a unit root (non-stationary)
H1: The series is stationary

KPSS Test:
H0: The series is stationary
H1: The series has a unit root (non-stationary)



In [15]:
# Conduct Augmented Dickey-Fuller (ADF) test
adf_result_new = adfuller(yield_values_new)
print("=== ADF Test (Before Differencing) ===")
print(f"Test Statistic: {adf_result_new[0]}")
print(f"p-value: {adf_result_new[1]}")
print(f"Critical Values: {adf_result_new[4]}")

if adf_result_new[1] < 0.05:
    print("Result: Reject H0 (The series is stationary according to the ADF test)")
else:
    print("Result: Fail to reject H0 (The series is non-stationary according to the ADF test)")

=== ADF Test (Before Differencing) ===
Test Statistic: -3.0242267879728044
p-value: 0.03269303202291595
Critical Values: {'1%': -3.60098336718852, '5%': -2.9351348158036012, '10%': -2.6059629803688282}
Result: Reject H0 (The series is stationary according to the ADF test)


In [16]:
# Conduct KPSS test
kpss_result_new, kpss_p_value_new, kpss_lags_new, kpss_crit_new = kpss(yield_values_new, regression='c')
print("\n=== KPSS Test (Before Differencing) ===")
print(f"Test Statistic: {kpss_result_new}")
print(f"p-value: {kpss_p_value_new}")
print(f"Critical Values: {kpss_crit_new}")

if kpss_p_value_new < 0.05:
    print("Result: Reject H0 (The series is non-stationary according to the KPSS test)")
else:
    print("Result: Fail to reject H0 (The series is stationary according to the KPSS test)")


=== KPSS Test (Before Differencing) ===
Test Statistic: 0.7768700528306137
p-value: 0.01
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Result: Reject H0 (The series is non-stationary according to the KPSS test)


look-up table. The actual p-value is smaller than the p-value returned.

  kpss_result_new, kpss_p_value_new, kpss_lags_new, kpss_crit_new = kpss(yield_values_new, regression='c')


In [17]:
# Perform first-order differencing on the yield values to remove potential trends
differenced_yield_new = pd.Series(yield_values_new).diff().dropna()

In [18]:
# Conduct ADF test on the differenced series
adf_diff_result_new = adfuller(differenced_yield_new)
print("\n\n=== ADF Test (After Differencing) ===")
print(f"Test Statistic: {adf_diff_result_new[0]}")
print(f"p-value: {adf_diff_result_new[1]}")
print(f"Critical Values: {adf_diff_result_new[4]}")

if adf_diff_result_new[1] < 0.05:
    print("Result: Reject H0 (The differenced series is stationary according to the ADF test)")
else:
    print("Result: Fail to reject H0 (The differenced series is non-stationary according to the ADF test)")



=== ADF Test (After Differencing) ===
Test Statistic: -8.364508509960169
p-value: 2.766150297548819e-13
Critical Values: {'1%': -3.584828853223594, '5%': -2.9282991495198907, '10%': -2.6023438271604937}
Result: Reject H0 (The differenced series is stationary according to the ADF test)


In [19]:
# Conduct KPSS test on the differenced series
kpss_diff_result_new, kpss_diff_p_value_new, kpss_diff_lags_new, kpss_diff_crit_new = kpss(differenced_yield_new, regression='c')
print("\n=== KPSS Test (After Differencing) ===")
print(f"Test Statistic: {kpss_diff_result_new}")
print(f"p-value: {kpss_diff_p_value_new}")
print(f"Critical Values: {kpss_diff_crit_new}")

if kpss_diff_p_value_new < 0.05:
    print("Result: Reject H0 (The differenced series is non-stationary according to the KPSS test)")
else:
    print("Result: Fail to reject H0 (The differenced series is stationary according to the KPSS test)")


=== KPSS Test (After Differencing) ===
Test Statistic: 0.2889938963652828
p-value: 0.1
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Result: Fail to reject H0 (The differenced series is stationary according to the KPSS test)


look-up table. The actual p-value is greater than the p-value returned.

  kpss_diff_result_new, kpss_diff_p_value_new, kpss_diff_lags_new, kpss_diff_crit_new = kpss(differenced_yield_new, regression='c')


- Both the ADF and KPSS tests now indicate that the differenced series is stationary.
- The initial conflict between the ADF and KPSS tests has been resolved through differencing, which removed any trend or structural non-stationarity present in the original series.