### Time Series Forecasting

> [From Data Preparation to Stationarity and Predictive Modeling](https://medium.com/data-science-collective/hands-on-time-series-forecasting-43ccbd418c9a)

In [None]:
# 1. Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as m
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose

import warnings
warnings.filterwarnings("ignore")

In [None]:
# 2. Formatting Graph Outputs

# Set Matplotlib to default style for a minimalist look
plt.style.use('default')

# Allow all columns to display without truncation
pd.set_option('display.max_columns', 500)

# Auto-adjust pandas display width for readability
pd.set_option('display.width', None)

# Set default figure size for visible graphs
m.rcParams['figure.figsize'] = (12, 6)

# Set font size for axis labels and tick labels
m.rcParams['axes.labelsize'] = 12
m.rcParams['xtick.labelsize'] = 10
m.rcParams['ytick.labelsize'] = 10

# Make axis titles bold for emphasis
m.rcParams['axes.titleweight'] = 'bold'

# Ensure text color is black for readability
m.rcParams['text.color'] = 'black'

# Increase line width for better visibility
m.rcParams['lines.linewidth'] = 2

# Set axis lines: thin, light gray for subtlety
m.rcParams['axes.axisbelow'] = True
m.rcParams['axes.grid'] = False
m.rcParams['axes.linewidth'] = 0.8
m.rcParams['axes.edgecolor'] = 'gray'

# Add custom grid: light gray, dotted, thin lines
m.rcParams['grid.color'] = 'gray'
m.rcParams['grid.linestyle'] = ':'
m.rcParams['grid.linewidth'] = 0.8
m.rcParams['grid.alpha'] = 0.5

In [None]:
# 3. Loading the Data
df = pd.read_csv('../../data/energy_consumption_dataset.csv', header=None)

# 4. Visualizing the Data
df.head()

In [None]:
# 5. Renaming Columns
df.columns = ['Month', 'Energy_Consumption']

# 6. Visualizing the Data
df.head()

In [None]:
# 7. Statistical summary of `energy_consumption`
df.describe()

In [None]:
# 8. Month with the highest `energy_consumption`
df.max()

In [None]:
# 9. Collecting the full period of the time series  
print('Start of Time Period: {}\\nEnd of Time Period: {}'.format(  
    df.Month.min(), df.Month.max()  
))

In [None]:
# 10. Shape of the dataset  
df.shape

In [None]:
# 11. Object type  
type(df)

In [None]:
# 12. Viewing the data  
df.head()

In [None]:
# 13. Data types  
df.dtypes

In [None]:
# 14. Converting `Month` column from object to datetime  
df['Month'] = pd.to_datetime(df['Month'], format='%Y-%m')

# 15. Viewing the data  
df.head()

In [None]:
# 16. Setting `Month` column as index  
# This will make future operations easier  
df_series = df.set_index('Month')

# 17. Viewing the data  
df_series.head()

In [None]:
# 18. Viewing the index  
df_series.index

In [None]:
# 19. Checking for missing values  
df_series.isnull().sum()

#### Time Series Trend Analysis

In [None]:
# 20. Plot the time series to observe trends
df_series.plot(figsize=(15, 6))
plt.show()

#### Time Series Density Plot

In [None]:
# 21.a Plot density analysis for Energy_Consumption
plt.figure(figsize=(10, 8))

# 21.b Histogram to visualize frequency distribution
plt.subplot(2, 1, 1)
df_series['Energy_Consumption'].hist(
    bins=30, edgecolor='black'
)
plt.title('Energy Consumption - Histogram')

# 21.c KDE plot to analyze distribution shape
plt.subplot(2, 1, 2)
df_series['Energy_Consumption'].plot(
    kind='kde', linewidth=2, color='darkblue'
)
plt.title('Energy Consumption - Density Plot')

plt.tight_layout()
plt.show()

#### Box Plot Series Trend Analysis

In [None]:
# 22.a Define the plotting area for the boxplots
fig, ax = plt.subplots(figsize=(15, 6))

# 22.b Extract year from index and define the target variable
year_index = df_series.index.year
energy_values = df_series['Energy_Consumption']

# 22.c Create a boxplot for each year using Seaborn
sns.boxplot(x=year_index, y=energy_values, ax=ax)

# 22.d Set axis labels
plt.xlabel("\nYear")
plt.ylabel("\nEnergy Consumption")

plt.show()

#### Visualizing Time Series Decomposition

In [None]:
# 23.a Perform multiplicative decomposition of the time series
decomposition_multiplicative = sm.tsa.seasonal_decompose(
    df_series, model='multiplicative', extrapolate_trend='freq'
)

# 23.b Check the object type of the decomposition result
type(decomposition_multiplicative)

In [None]:
# 24.a Configure plot size
plt.rcParams.update({'figure.figsize': (16, 10)})

# 24.b Plot the multiplicative decomposition
decomposition_multiplicative.plot().suptitle('Multiplicative Decomposition', 
fontsize=22)

plt.show()

In [None]:
# 25.a Perform additive decomposition of the time series
decomposition_additive = sm.tsa.seasonal_decompose(
    df_series, model='additive', extrapolate_trend='freq'
)

# 25.b Check the object type of the additive decomposition result
type(decomposition_additive)

In [None]:
# 25.c Set plot size for better visualization
plt.figure(figsize=(16, 10))

# 25.d Plot the additive decomposition with a title
fig = decomposition_additive.plot()
fig.suptitle('Additive Decomposition', fontsize=22)
plt.show()

In [None]:
# 26.a Concatenate the decomposed components of the time series
df_series_reconstructed = pd.concat(
    [
        decomposition_multiplicative.seasonal,  # Seasonal component
        decomposition_multiplicative.trend,     # Trend component
        decomposition_multiplicative.resid,     # Residual (noise) component
        decomposition_multiplicative.observed   # Observed (original) time series
    ], axis=1
)

In [None]:
# 26.b Rename columns to reflect the decomposed components
df_series_reconstructed.columns = ['Seasonality', 'Trend', 'Residuals', 'Observed_Values']

# 26.c Display the first rows of the reconstructed time series DataFrame
df_series_reconstructed.head()

In [None]:
# 27.a Compute the observed value using given factors
observed_value = 0.911256 * 118.588481 * 1.036418

# 27.b Print the computed value
print(observed_value)

#### Time Series Statistical Modeling

Before any predictive modeling, these properties must be ensured to remain constant:

- **Stationarity** — The series must be stationary, meaning its mean and variance do not change over time.
- **No autocorrelation** — Observations should not be strongly correlated with previous values.
- **Normal distribution** — Many models assume that time series values follow a normal distribution.

#### Rolling Statistics Plot (Moving Averages)

A quick way to check stationarity is by plotting rolling statistics.

If the series fails this first test, it is evident that there's an issue, and move straight to data transformation.

In [None]:
# 28. Check the type of the dataset
type(df_series)

In [None]:
# 29. Compute rolling statistics for trend analysis

# 29.a Calculate the 12-month moving average
rolling_mean = df_series['Energy_Consumption'].rolling(
    window=12).mean()


# 29.b Calculate the 12-month rolling standard deviation
rolling_std = df_series['Energy_Consumption'].rolling(
    window=12).std()

In [None]:
# 31. Plot rolling statistics for trend and variance analysis
plt.figure(figsize=(12, 6))

# 31.a Plot original time series values
plt.plot(df_series['Energy_Consumption'], color='blue', label='Original')

# 31.b Plot moving average (rolling mean)
plt.plot(rolling_mean, color='red', label='Moving Average')

# 31.c Plot rolling standard deviation
plt.plot(rolling_std, color='black', label='Standard Deviation')

# 31.d Configure legend and title
plt.legend(loc='best')
plt.title('Rolling Statistics - Moving Average & Standard Deviation')
plt.show()

#### Autocorrelation and Partial Autocorrelation Plots

Autocorrelation happens when a value in the series is correlated with its past values.

To validate this, we’ll use:

- **ACF (Autocorrelation Function)** plot
- **PACF (Partial Autocorrelation Function)** plot

In [None]:
# 32. Import functions for ACF and PACF plots
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# 33. Set global figure size for visualization
plt.rcParams.update({'figure.figsize': (16, 10)})


# 34. Plot the ACF (Autocorrelation Function)

# 34.a Define subplot for ACF plot
plt.subplot(2, 1, 1)


# 34.b Generate ACF plot with 30 lags
plot_acf(df_series['Energy_Consumption'], 
         ax=plt.gca(), lags=30)


# 35. Plot the PACF (Partial Autocorrelation Function)

# 35.a Define subplot for PACF plot
plt.subplot(2, 1, 2)

# 35.b Generate PACF plot with 30 lags
plot_pacf(df_series['Energy_Consumption'], 
          ax=plt.gca(), lags=30, method='ywm')

plt.show()

#### Augmented Dickey-Fuller Test

The ADF test is a hypothesis test used to check whether a time series is stationary.

If a series is integrated (non-stationary), the lagged values (`y(t-1)`) will not provide information for predicting changes in `y` at time `t`.

The test evaluates two hypotheses:

- **Null hypothesis (H₀)**: The time series is non-stationary.
- **Alternative hypothesis (H₁)**: The time series is stationary.

##### Decision Rule
- If the `p-value < 0.05`, we reject `H₀`, meaning the series is stationary.
- If the `p-value ≥ 0.05`, we fail to reject `H₀`, meaning the series is not stationary.

In [None]:
# 36. Import Dickey-Fuller test function
from statsmodels.tsa.stattools import adfuller

# 37. Perform the Augmented Dickey-Fuller (ADF) test
# 37.a Print test header
print('\\nDickey-Fuller Test Results:\\n')
# 37.b Apply ADF test on the time series
adf_test = adfuller(df_series['Energy_Consumption'], 
                     autolag='AIC')
# 38. Format the test output

# 38.a Create a Series with test statistics
df_output = pd.Series(adf_test[0:4], index=[
    'Test Statistic', 'p-value', 
    'Lags Used', 'Observations Used'])

# 38.b Append critical values to output
for key, value in adf_test[4].items():
    df_output[f'Critical Value ({key})'] = value

# 39. Display test results
print(df_output)

In [None]:
# 40. Function to test stationarity in time series
def test_stationarity(series):
    
    # 40.a Compute rolling statistics
    rolling_mean = series.rolling(12).mean()
    rolling_std = series.rolling(12).std()
    
    # 40.b Plot rolling statistics
    plt.figure(figsize=(12, 6))
    plt.plot(series, color='blue', label='Original')
    plt.plot(rolling_mean, color='red', label='Moving Avg')
    plt.plot(rolling_std, color='black', label='Std Dev')
    plt.legend(loc='best')
    plt.title('Rolling Statistics')
    plt.show()
    
    # 40.c Perform the Dickey-Fuller test
    print("\nDickey-Fuller Test Results:\n")
    adf_test = adfuller(series, autolag='AIC')
    
    # 40.d Format the test output
    df_output = pd.Series(adf_test[0:4], index=[
        'Test Statistic', 'p-value', 
        'Lags Used', 'Observations Used'])
    
    # 40.e Append critical values
    for key, value in adf_test[4].items():
        df_output[f'Critical Value ({key})'] = value
    
    # 40.f Display results
    print(df_output)
    
    # 40.g Interpret stationarity based on p-value
    p_value = adf_test[1]
    if p_value < 0.05:
        print("\nThe series is stationary (p < 0.05).")
    else:
        print("\nThe series is non-stationary (p >= 0.05).")

# 41. Execute the stationarity test function
test_stationarity(df_series)

#### Transforming Non-Stationary Time Series

##### Log Transformation

Main goal is to forecast energy consumption using time series.

Log transformation is widely used in data science when adjusting variable scales. The process is straightforward: call the log function, pass the target variable, and execute the transformation.

In [None]:
# 42. Apply log transformation to stabilize variance
df_series['Energy_Consumption_Log'] = np.log(
    df_series['Energy_Consumption'])

# 43. Display the first rows of the transformed dataset
df_series.head()

In [None]:
# 44. Plot the transformed time series
plt.plot(df_series['Energy_Consumption_Log'], 
         color="blue")

In [None]:
# 45. Plot histogram of the transformed series
plt.subplot(2, 1, 1)
plt.hist(df_series['Energy_Consumption_Log'], 
         color="blue")

In [None]:
# 46. Function to test stationarity in time series
def test_stationarity(series):

    # 46.a Import ADF test function
    from statsmodels.tsa.stattools import adfuller
    
    # 46.b Compute rolling statistics
    rolling_mean = series.rolling(12).mean()
    rolling_std = series.rolling(12).std()
    
    # 46.c Plot rolling statistics
    plt.figure(figsize=(12, 6))
    plt.plot(series, color='blue', label='Original')
    plt.plot(rolling_mean, color='red', label='Moving Avg')
    plt.plot(rolling_std, color='black', label='Std Dev')
    plt.legend(loc='best')
    plt.title('Rolling Statistics')
    plt.show()
    
    # 46.d Perform the Dickey-Fuller test
    print("\nDickey-Fuller Test Results:\n")
    adf_test = adfuller(series, autolag='AIC')
    
    # 46.e Format the test output
    df_output = pd.Series(adf_test[0:4], index=[
        'Test Statistic', 'p-value', 
        'Lags Used', 'Observations Used'])
    
    # 46.f Append critical values
    for key, value in adf_test[4].items():
        df_output[f'Critical Value ({key})'] = value
    
    # 46.g Display results
    print(df_output)
    
    # 46.h Interpret stationarity based on p-value
    print("\nConclusion:")
    if df_output['p-value'] > 0.05:
        print("\nThe p-value is > 0.05, failing to reject H₀.")
        print("The series is likely non-stationary.")
    else:
        print("\nThe p-value is < 0.05, rejecting H₀.")
        print("The series is likely stationary.")

# 47. Execute the stationarity test on log-transformed data
test_stationarity(df_series['Energy_Consumption_Log'])

##### Square Root Transformation

In [None]:
# 48.a Apply square root transformation
df_series['Energy_Consumption_Sqrt'] = np.sqrt(
    df_series['Energy_Consumption'])

# 48.b Plot transformed time series
plt.figure(figsize=(12, 6))
plt.plot(df_series.index, 
         df_series['Energy_Consumption_Sqrt'], 
         color="green")

# 48.c Format x-axis for better visualization
plt.xticks(rotation=45)
plt.xlabel("Year")
plt.ylabel("Square Root of Energy Consumption")
plt.title("Transformed Time Series - Square Root")
plt.show()


# 48.c Plot histogram of transformed series
plt.hist(df_series['Energy_Consumption_Sqrt'], 
         color="green")
plt.show()

# 49. Execute stationarity test on sqrt-transformed data
test_stationarity(df_series['Energy_Consumption_Sqrt'])

##### Box-Cox Transformation

Unlike log and square root transformations, which are fixed, Box-Cox allows customization through the lambda parameter, controlling how the transformation is applied. This flexibility helps adjust the transformation without needing more advanced techniques.

In [None]:
# 50. Import Box-Cox transformation function
from scipy.stats import boxcox

# 50.a Apply Box-Cox transformation (λ = 0)
df_series['Energy_Consumption_Box'], _ = boxcox(
    df_series['Energy_Consumption'])
    
# 50.b Display first rows of transformed data
df_series.head()

In [None]:
# 50.c Plot transformed time series
plt.plot(df_series['Energy_Consumption_Box'], 
         color="blue")

In [None]:
# 50.d Plot histogram of transformed series
plt.hist(df_series['Energy_Consumption_Box'], 
         color="blue")
plt.show()

In [None]:
# 51. Import Box-Cox transformation function
from scipy.stats import boxcox

# 51.a Apply Box-Cox transformation and find optimal λ
df_series['Energy_Consumption_Box'], lambda_boxcox = boxcox(
    df_series['Energy_Consumption']
)

# 51.b Display the optimal lambda value
print(f"Optimal Box-Cox lambda: {lambda_boxcox:.4f}")

# 51.c Display first rows of transformed data
df_series.head()

In [None]:
# 51.d Plot transformed time series
plt.figure(figsize=(12, 6))
plt.plot(df_series.index, 
         df_series['Energy_Consumption_Box'], 
         color="blue")

# 51.e Format x-axis for better readability
plt.xticks(rotation=45)
plt.xlabel("Year")
plt.ylabel("Box-Cox Transformed Energy Consumption")
plt.title(f"Box-Cox Transformation (λ={lambda_boxcox:.4f})")
plt.show()

In [None]:
# 51.f Plot histogram of transformed series
plt.figure(figsize=(10, 5))
plt.hist(df_series['Energy_Consumption_Box'], 
         bins=30, color="blue", edgecolor="black")

# 51.g Format histogram for better visualization
plt.xlabel("Transformed Values")
plt.ylabel("Frequency")
plt.title("Histogram of Box-Cox Transformed Data")
plt.show()

In [None]:
# 52.a Execute stationarity test on Box-Cox transformed data
test_stationarity(df_series['Energy_Consumption_Box'])

In [None]:
# 52.b Execute stationarity test on alternative Box-Cox transformed data
test_stationarity(df_series['Energy_Consumption_Box_2'])

##### Smoothing with Simple Moving Average

Two smoothing techniques:

- **Simple Moving Average (SMA)**
- **Exponential Weighted Moving Average (EWMA)**

Calculate the mean of the last K values, depending on the series frequency. A limitation of this method is that the time period must be strictly defined.

In [None]:
# 53.a Compute the mean of the time series
series_mean = df_series.mean()
print(series_mean)

In [None]:
# 53.b Create a copy of the original time series
df_series_copy = df_series.copy()

In [None]:
# 53.c Apply a 12-month moving average
df_series['Energy_Consumption_Smoothed'] = (
    df_series['Energy_Consumption']
    .rolling(12)
    .mean()
)

# 53.d Display the dataset with the smoothed series
df_series

In [None]:
# 54. Display the first 20 records of the time series
df_series.head(20)

In [None]:
# 55.a Plot original time series
plt.figure(figsize=(12, 6))
plt.plot(df_series.index, 
         df_series['Energy_Consumption'], 
         color='green', linewidth=2, 
         label='Original')

# 55.b Customize plot aesthetics
plt.xticks(rotation=45)
plt.xlabel("Year")
plt.ylabel("Energy Consumption")
plt.title("Original Time Series", fontsize=14, 
          fontweight='bold')
plt.grid(False)
plt.legend()
plt.show()

In [None]:
# 55.c Plot smoothed time series
plt.figure(figsize=(12, 6))
plt.plot(df_series.index, 
         df_series['Energy_Consumption_Smoothed'], 
         color='blue', linewidth=2, 
         label='Smoothed')

# 55.d Customize plot aesthetics
plt.xticks(rotation=45)
plt.xlabel("Year")
plt.ylabel("Smoothed Energy Consumption")
plt.title("Smoothed Time Series", fontsize=14, 
          fontweight='bold')
plt.grid(False)
plt.legend()
plt.show()

In [None]:
# 55.e Plot original and smoothed series
plt.figure(figsize=(12, 6))

# 55.f Plot original series
plt.plot(df_series.index, 
         df_series['Energy_Consumption'], 
         color='green', linewidth=2, 
         alpha=0.6, label='Original')

# 55.g Plot smoothed series
plt.plot(df_series.index, 
         df_series['Energy_Consumption_Smoothed'], 
         color='blue', linewidth=2, 
         label='Smoothed')

# 55.h Customize plot aesthetics
plt.xticks(rotation=45)
plt.xlabel("Year")
plt.ylabel("Energy Consumption")
plt.title("Original vs Smoothed", fontsize=14, 
          fontweight='bold')
plt.grid(False)
plt.legend()
plt.show()

In [None]:
# 56.a Compute the difference between original 
# and smoothed series to remove trend
diff_original_smoothed = (
    df_series['Energy_Consumption'] - 
    df_series['Energy_Consumption_Smoothed']
)

# 56.b Display first 20 records
diff_original_smoothed.head(20)

In [None]:
# 56.c Remove NA values for stationarity test
diff_original_smoothed.dropna(inplace=True)

In [None]:
# 58.a Remove NaN and infinite values
diff_original_smoothed = diff_original_smoothed.dropna()
diff_original_smoothed = diff_original_smoothed[
    ~np.isinf(diff_original_smoothed)
]

# 58.b Execute stationarity test
test_stationarity(diff_original_smoothed)

# 58.c Refined plot for rolling statistics
plt.figure(figsize=(12, 6))

# 58.d Plot original series
plt.plot(diff_original_smoothed, color='royalblue', 
         linewidth=1.5, alpha=0.75, label='Original')

# 58.e Compute rolling mean and standard deviation
rolling_mean = diff_original_smoothed.rolling(
    window=12).mean()
rolling_std = diff_original_smoothed.rolling(
    window=12).std()

# 58.f Plot rolling mean and std deviation
plt.plot(rolling_mean, color='red', linewidth=2, 
         label='Moving Average')
plt.plot(rolling_std, color='black', linewidth=2, 
         linestyle='dashed', label='Std Deviation')

# 58.g Customize visualization aesthetics
plt.xticks(rotation=45)
plt.xlabel("Year")
plt.ylabel("Differenced Values")
plt.title("Refined Rolling Statistics", fontsize=14, 
          fontweight='bold')
plt.legend()
plt.grid(False)
plt.show()

##### Exponential Weighted Moving Average (EWMA)

The simple moving average works but causes data loss due to the fixed 12-month window.

An alternative is the exponential weighted moving average (EWMA), which gives more weight to recent values.

Unlike the simple moving average, EWMA doesn’t require a fixed window size, and recent values influence the trend more strongly.

In [None]:
# 59.a Apply exponential smoothing to the original series
df_series['Energy_Consumption_Exp'] = (
    df_series['Energy_Consumption']
    .ewm(alpha=0.2, adjust=True)
    .mean()
)

# 59.b Display the smoothed series
df_series['Energy_Consumption_Exp'].head()

In [None]:
# 59.c Plot original and exponentially smoothed series
plt.figure(figsize=(12, 6))

# 59.d Plot original series
plt.plot(df_series.index, df_series['Energy_Consumption'], 
         color='gray', linewidth=1.5, alpha=0.6, 
         label='Original')

# 59.e Plot exponentially smoothed series
plt.plot(df_series.index, df_series['Energy_Consumption_Exp'], 
         color='magenta', linewidth=2, label='Exp Smoothing')

# 59.f Customize visualization
plt.xticks(rotation=45)
plt.xlabel("Year")
plt.ylabel("Energy Consumption")
plt.title("Exponential Smoothing", fontsize=14, 
          fontweight='bold')
plt.legend()
plt.grid(False)
plt.show()

In [None]:
# 60.a Compute the difference between original 
# and exponentially smoothed series
diff_original_exp_smooth = (
    df_series['Energy_Consumption'] - 
    df_series['Energy_Consumption_Exp']
)

# 60.b Remove NaN values before testing stationarity
diff_original_exp_smooth.dropna(inplace=True)

# 60.c Execute stationarity test
test_stationarity(diff_original_exp_smooth)

#### Differencing to Remove Trend

In [None]:
# 61.a Apply log transformation to normalize scale
df_series['Energy_Consumption_Log'] = np.log(
    df_series['Energy_Consumption']
)

# 61.b Plot log-transformed series
plt.figure(figsize=(12, 6))
plt.plot(df_series.index, 
         df_series['Energy_Consumption_Log'], 
         color='blue', linewidth=1.5, 
         label='Log Transformed')
plt.xlabel("Year")
plt.ylabel("Log Energy Consumption")
plt.title("Log Transformation", fontsize=14, 
          fontweight='bold')
plt.legend()
plt.grid(False)
plt.show()

In [None]:
# 62.a Compute first-order differencing 
df_series['Energy_Consumption_Diff1'] = (
    df_series['Energy_Consumption_Log'] - 
    df_series['Energy_Consumption_Log'].shift()
)

# 62.b Plot first-order differenced series
plt.figure(figsize=(12, 6))
plt.plot(df_series.index, 
         df_series['Energy_Consumption_Diff1'], 
         color='red', linewidth=1.5, 
         label='First-Order Differencing')
plt.xlabel("Year")
plt.ylabel("Differenced Values")
plt.title("First-Order Differencing", fontsize=14, 
          fontweight='bold')
plt.legend()
plt.grid(False)
plt.show()

In [None]:
# 63.a Define function for custom differencing
def diffFunc(dataset, interval=1):
    # Initialize an empty list for differenced values
    diff = []
    
    # Loop through the dataset, computing the difference
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    
    # Return the differenced series as a list
    return diff

# 63.b Apply differencing to the log-transformed series
df_series['Energy_Diff2'] = apply_differencing(
    df_series['Energy_Consumption_Log']
)

# 63.c Align index after differencing
aligned_idx = df_series.index[-len(df_series['Energy_Diff2']):]

# 63.d Plot second-order differenced series
plt.figure(figsize=(12, 6))
plt.plot(aligned_idx, df_series['Energy_Diff2'], 
         color='purple', lw=1.5, label='Diff Order 2')
plt.xlabel("Year")
plt.ylabel("Differenced Values")
plt.title("Second-Order Differencing", fontsize=14, 
          fontweight='bold')
plt.legend()
plt.grid(False)
plt.show()

In [None]:
# 64.a Convert differenced list to DataFrame
df_series_diff2 = pd.DataFrame(
    data=df_series['Energy_Consumption_Diff2'].dropna().values,  
    index=df_series['Energy_Consumption_Diff2'].dropna().index, 
    columns=['Differenced_Series']
)

# 64.b Test stationarity after differencing
test_stationarity(df_series_diff2)

#### Differencing to Remove Seasonality

In [None]:
# 65.a Apply second differencing to remove seasonality
df_series_diff3 = pd.DataFrame(
    data=diffFunc(df_series['Energy_Consumption_Diff2'].dropna().values, interval=2),
    index=df_series['Energy_Consumption_Diff2'].dropna().index[2:],  # Adjust index for second differencing
    columns=['Differenced_Series']
)

# 65.b Plot the second differenced series
plt.figure(figsize=(12, 6))
plt.plot(df_series_diff3.index, df_series_diff3, color='green', lw=1.5, label='Second Differencing - Seasonality Removed')
plt.xlabel("Year")
plt.ylabel("Differenced Values")
plt.title("Second Differencing for Seasonality", fontsize=14, fontweight='bold')
plt.legend()
plt.grid(False)
plt.show()

In [None]:
# 66.a Convert second differenced list to DataFrame
df_series_final = pd.DataFrame(
    data=df_series_diff3.values, 
    index=df_series_diff3.index,  
    columns=['Final_Differenced_Series']
)

# 66.b Test stationarity on the final differenced series
test_stationarity(df_series_final)