In [1]:
import pandas as pd
import numpy as np
from pmdarima import auto_arima
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error, mean_squared_error
import os


# Using energy Source Datasets

# Source of Energy Natural Gas

In [2]:
# Read the Excel file
# Data_Status: Indicates the status of the data. The value "2020F" suggests that it is a forecast for the year 2020.
# State: Represents the state for which the data is recorded (in this case, "CA" for California).
# MSN: Stands for "Monthly State Names" and refers to the specific energy metric or variable being measured. Examples include ARICD, ARICV, ARTCD, ARTCV, ARTXD, WWTXV, WXICD, WXICV, ZWCDP, ZWHDP.
df = pd.read_excel('Datasets/use_energy_source.xlsx' , sheet_name='Natural Gas')


In [6]:
df.head()

Unnamed: 0,State,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,AK,2034,2300,4048,5567,6805,7670,12282,11889,17636,...,339819,347228,332631,329274,333936,330883,343899,346255,343384,373567
1,AL,190706,191533,201250,211348,230316,236949,240755,262055,296690,...,609288,677380,625869,650570,701582,715033,681208,771194,748731,712874
2,AR,222202,215311,235477,252846,278852,277661,289632,295756,321569,...,288906,300572,288045,273009,296826,315617,317530,366939,371388,335437
3,AZ,140275,159093,163461,169533,174623,166063,174448,169085,189965,...,293134,339043,340375,315916,365313,373916,334629,400531,484249,513537
4,CA,1301826,1400174,1483423,1623370,1817241,1813213,1982305,2048734,2187260,...,2196252,2456371,2480792,2409574,2384061,2248939,2190994,2209801,2218732,2143958


# Data statistics

In [7]:
df.head()

Unnamed: 0,State,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,AK,2034,2300,4048,5567,6805,7670,12282,11889,17636,...,339819,347228,332631,329274,333936,330883,343899,346255,343384,373567
1,AL,190706,191533,201250,211348,230316,236949,240755,262055,296690,...,609288,677380,625869,650570,701582,715033,681208,771194,748731,712874
2,AR,222202,215311,235477,252846,278852,277661,289632,295756,321569,...,288906,300572,288045,273009,296826,315617,317530,366939,371388,335437
3,AZ,140275,159093,163461,169533,174623,166063,174448,169085,189965,...,293134,339043,340375,315916,365313,373916,334629,400531,484249,513537
4,CA,1301826,1400174,1483423,1623370,1817241,1813213,1982305,2048734,2187260,...,2196252,2456371,2480792,2409574,2384061,2248939,2190994,2209801,2218732,2143958


In [8]:
df.describe()

Unnamed: 0,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
count,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,...,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0
mean,476360.2,497168.9,528109.3,554581.1,588370.8,606902.2,654068.5,690961.1,738637.3,794537.1,...,959760.4,1002911.0,1030337.0,1052955.0,1083834.0,1092030.0,1079111.0,1198226.0,1239406.0,1212443.0
std,1740756.0,1814250.0,1925089.0,2021051.0,2140440.0,2207766.0,2379287.0,2514807.0,2686385.0,2886087.0,...,3450566.0,3607145.0,3705665.0,3783847.0,3894937.0,3921062.0,3872421.0,4300637.0,4449025.0,4353371.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,158.0,187.0,197.0,167.0,196.0,162.0,163.0,192.0,228.0,199.0
25%,31318.0,34114.25,37731.25,43942.0,51234.5,51533.75,59354.25,61387.5,60639.5,73255.0,...,170686.8,160964.0,173740.2,180092.8,185880.8,184600.5,193376.8,215162.8,229583.2,247024.5
50%,141665.0,152611.0,161025.0,166729.0,174284.5,176422.5,187529.5,192495.0,202037.5,225191.0,...,281447.0,281219.5,297346.5,312619.5,317535.0,321022.0,333121.0,361806.0,374802.0,364268.0
75%,220369.0,226279.2,255762.8,264088.2,292144.8,295237.5,306573.0,313615.8,334390.8,364160.2,...,615745.2,658602.8,645989.0,665446.0,711689.8,729543.0,696526.5,778284.2,787849.2,756134.2
max,12385370.0,12926390.0,13730840.0,14419110.0,15297640.0,15779460.0,17005780.0,17964990.0,19204570.0,20657960.0,...,24953770.0,26075690.0,26788770.0,27376840.0,28179690.0,28392790.0,28056890.0,31153870.0,32224550.0,31523520.0


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 62 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   State   52 non-null     object
 1   1960    52 non-null     int64 
 2   1961    52 non-null     int64 
 3   1962    52 non-null     int64 
 4   1963    52 non-null     int64 
 5   1964    52 non-null     int64 
 6   1965    52 non-null     int64 
 7   1966    52 non-null     int64 
 8   1967    52 non-null     int64 
 9   1968    52 non-null     int64 
 10  1969    52 non-null     int64 
 11  1970    52 non-null     int64 
 12  1971    52 non-null     int64 
 13  1972    52 non-null     int64 
 14  1973    52 non-null     int64 
 15  1974    52 non-null     int64 
 16  1975    52 non-null     int64 
 17  1976    52 non-null     int64 
 18  1977    52 non-null     int64 
 19  1978    52 non-null     int64 
 20  1979    52 non-null     int64 
 21  1980    52 non-null     int64 
 22  1981    52 non-null     int6

In [10]:
df.head()

Unnamed: 0,State,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,AK,2034,2300,4048,5567,6805,7670,12282,11889,17636,...,339819,347228,332631,329274,333936,330883,343899,346255,343384,373567
1,AL,190706,191533,201250,211348,230316,236949,240755,262055,296690,...,609288,677380,625869,650570,701582,715033,681208,771194,748731,712874
2,AR,222202,215311,235477,252846,278852,277661,289632,295756,321569,...,288906,300572,288045,273009,296826,315617,317530,366939,371388,335437
3,AZ,140275,159093,163461,169533,174623,166063,174448,169085,189965,...,293134,339043,340375,315916,365313,373916,334629,400531,484249,513537
4,CA,1301826,1400174,1483423,1623370,1817241,1813213,1982305,2048734,2187260,...,2196252,2456371,2480792,2409574,2384061,2248939,2190994,2209801,2218732,2143958


# Transformation of Data

In [11]:
df_trans = df.melt(id_vars=['State'] , var_name='Year', value_name='Yearly Data')
df_trans['Year'] = pd.to_datetime(df_trans['Year'], format='%Y')

df_trans


Unnamed: 0,State,Year,Yearly Data
0,AK,1960-01-01,2034
1,AL,1960-01-01,190706
2,AR,1960-01-01,222202
3,AZ,1960-01-01,140275
4,CA,1960-01-01,1301826
...,...,...,...
3167,WA,2020-01-01,354969
3168,WI,2020-01-01,571817
3169,WV,2020-01-01,265123
3170,WY,2020-01-01,169076


# Modeling the Data
## Using ARIMA Model

In [12]:
os.makedirs('Plots/use_energy_source_natural_gass/Arima_results_plots',exist_ok=True)

for State in df_trans['State'].unique():
        try:
            
            fig = go.Figure()

            # Get the energy consumption data for the current country and sector
            df_filter = df_trans[df_trans['State'] == State ][['Year', 'Yearly Data']]
            df_filter_index = df_filter.set_index('Year')

            train_data = df_filter[:-5]
            test_data = df_filter[-5:]
            
            # Prepare the data for modeling
            years = df_filter_index.index
            energy_consumption = df_filter_index.values.flatten()

                    # Split the data into training and testing
            # Use all data except the last 5 years for training
            Horizan = -5
            train_data = energy_consumption[:Horizan]
            test_data = energy_consumption[Horizan:]  # Use the last 5 years for testing

            # Fit the auto ARIMA model
            model = auto_arima(train_data, seasonal=False)
            model.fit(train_data)

            # Generate predictions
            predictions = model.predict(n_periods=len(test_data))
            predictions_ahead_in_future = model.predict(n_periods=len(test_data)+15)

            # Calculate evaluation metrics
            mae = mean_absolute_error(test_data, predictions)
            mse = mean_squared_error(test_data, predictions)
            mape = np.mean(np.abs((test_data - predictions) / test_data)) * 100

            print('Mean Absolute Error (MAE):', np.round(mae,2))
            print('Mean Squared Error (MSE):', np.round(mse,2))
            print('Mean Absolute Percentage Error (MAPE):', np.round(mape,2))
            
            # Plot the training data
            fig.add_trace(go.Scatter(
                x=years[:Horizan], y=train_data, mode='lines+markers', name='Training Data'))

            # Plot the predictions
            fig.add_trace(go.Scatter(
                x=years[Horizan:], y=test_data, mode='lines+markers', name='Actual'))
            fig.add_trace(go.Scatter(
                x=years[Horizan:], y=predictions, mode='lines+markers', name='Predicted'))

            fig.add_trace(go.Scatter(
                x=pd.date_range(start = years[Horizan],periods=15,freq='Y'), y=predictions_ahead_in_future, mode='lines+markers', name='Prediction till 2030'))

            # Update the layout
            fig.update_layout(title=f'Coal : Energy Consumption Forecast State : {State} using ARIMA Model',
                            xaxis_title='Year', yaxis_title='Energy Consumption')

            # Show the plot
            fig.show()
            # print(State,msn)
            fig.write_image(f'Plots/use_energy_source_natual_gass/Arima_results_plots/{State}.png')
            # break
        except:
            print('Error occoured in Combination State : {} and MSN : {} Due NaN Value'.format(State,mse))
        

Mean Absolute Error (MAE): 8225.38
Mean Squared Error (MSE): 85148909.51
Mean Absolute Percentage Error (MAPE): 2.37
Mean Absolute Error (MAE): 64528.85
Mean Squared Error (MSE): 6138125041.62
Mean Absolute Percentage Error (MAPE): 9.02
Mean Absolute Error (MAE): 54858.33
Mean Squared Error (MSE): 3848239086.01
Mean Absolute Percentage Error (MAPE): 15.57
Mean Absolute Error (MAE): 68333.0
Mean Squared Error (MSE): 7674390652.2
Mean Absolute Percentage Error (MAPE): 14.74
Mean Absolute Error (MAE): 181576.2
Mean Squared Error (MSE): 34176499564.6
Mean Absolute Percentage Error (MAPE): 8.27
Mean Absolute Error (MAE): 31658.23
Mean Squared Error (MSE): 1118635121.67
Mean Absolute Percentage Error (MAPE): 6.25
Mean Absolute Error (MAE): 15877.11
Mean Squared Error (MSE): 282599197.03
Mean Absolute Percentage Error (MAPE): 5.73
Mean Absolute Error (MAE): 4724.36
Mean Squared Error (MSE): 25644655.44
Mean Absolute Percentage Error (MAPE): 15.78
Mean Absolute Error (MAE): 14059.05
Mean Squar

## Using SARIMA Model

In [13]:
os.makedirs('Plots/use_energy_source_natural_gass/Sarima_results_plots',exist_ok=True)

for State in df_trans['State'].unique():
        try:
            
            fig = go.Figure()

            # Get the energy consumption data for the current country and sector
            df_filter = df_trans[df_trans['State'] == State ][['Year', 'Yearly Data']]
            df_filter_index = df_filter.set_index('Year')

            train_data = df_filter[:-5]
            test_data = df_filter[-5:]
            
            # Prepare the data for modeling
            years = df_filter_index.index
            energy_consumption = df_filter_index.values.flatten()

                    # Split the data into training and testing
            # Use all data except the last 5 years for training
            Horizan = -5
            train_data = energy_consumption[:Horizan]
            test_data = energy_consumption[Horizan:]  # Use the last 5 years for testing

            # Fit the auto ARIMA model
            model = auto_arima(train_data, seasonal=False)
            model.fit(train_data)

            # Generate predictions
            predictions = model.predict(n_periods=len(test_data))
            predictions_ahead_in_future = model.predict(n_periods=len(test_data)+15)

            # Calculate evaluation metrics
            mae = mean_absolute_error(test_data, predictions)
            mse = mean_squared_error(test_data, predictions)
            mape = np.mean(np.abs((test_data - predictions) / test_data)) * 100

            print('Mean Absolute Error (MAE):', np.round(mae,2))
            print('Mean Squared Error (MSE):', np.round(mse,2))
            print('Mean Absolute Percentage Error (MAPE):', np.round(mape,2))
            
            # Plot the training data
            fig.add_trace(go.Scatter(
                x=years[:Horizan], y=train_data, mode='lines+markers', name='Training Data'))

            # Plot the predictions
            fig.add_trace(go.Scatter(
                x=years[Horizan:], y=test_data, mode='lines+markers', name='Actual'))
            fig.add_trace(go.Scatter(
                x=years[Horizan:], y=predictions, mode='lines+markers', name='Predicted'))

            fig.add_trace(go.Scatter(
                x=pd.date_range(start = years[Horizan],periods=15,freq='Y'), y=predictions_ahead_in_future, mode='lines+markers', name='Prediction till 2030'))

            # Update the layout
            fig.update_layout(title=f'Coal : Energy Consumption Forecast State : {State} using SARIMA Model',
                            xaxis_title='Year', yaxis_title='Energy Consumption')

            # Show the plot
            # fig.show()
            # print(State,msn)
            fig.write_image(f'Plots/use_energy_source_natural_gass/Sarima_results_plots/{State}.png')
            # break
        except:
            print('Error occoured in Combination State : {} and MSN : {} Due NaN Value'.format(State,mse))
        # break

Mean Absolute Error (MAE): 8225.38
Mean Squared Error (MSE): 85148909.51
Mean Absolute Percentage Error (MAPE): 2.37


Mean Absolute Error (MAE): 64528.85
Mean Squared Error (MSE): 6138125041.62
Mean Absolute Percentage Error (MAPE): 9.02


Mean Absolute Error (MAE): 54858.33
Mean Squared Error (MSE): 3848239086.01
Mean Absolute Percentage Error (MAPE): 15.57


Mean Absolute Error (MAE): 68333.0
Mean Squared Error (MSE): 7674390652.2
Mean Absolute Percentage Error (MAPE): 14.74


Mean Absolute Error (MAE): 181576.2
Mean Squared Error (MSE): 34176499564.6
Mean Absolute Percentage Error (MAPE): 8.27


Mean Absolute Error (MAE): 31658.23
Mean Squared Error (MSE): 1118635121.67
Mean Absolute Percentage Error (MAPE): 6.25


Mean Absolute Error (MAE): 15877.11
Mean Squared Error (MSE): 282599197.03
Mean Absolute Percentage Error (MAPE): 5.73


Mean Absolute Error (MAE): 4724.36
Mean Squared Error (MSE): 25644655.44
Mean Absolute Percentage Error (MAPE): 15.78


Mean Absolute Error (MAE): 14059.05
Mean Squared Error (MSE): 251158842.58
Mean Absolute Percentage Error (MAPE): 14.52


Mean Absolute Error (MAE): 17687.41
Mean Squared Error (MSE): 585260762.95
Mean Absolute Percentage Error (MAPE): 1.21


Mean Absolute Error (MAE): 20122.0
Mean Squared Error (MSE): 502741426.29
Mean Absolute Percentage Error (MAPE): 2.65


Mean Absolute Error (MAE): 21.11
Mean Squared Error (MSE): 607.07
Mean Absolute Percentage Error (MAPE): 11.29


Mean Absolute Error (MAE): 99986.81
Mean Squared Error (MSE): 12178334923.15
Mean Absolute Percentage Error (MAPE): 24.47


Mean Absolute Error (MAE): 10874.32
Mean Squared Error (MSE): 164161635.12
Mean Absolute Percentage Error (MAPE): 8.64


Mean Absolute Error (MAE): 104942.14
Mean Squared Error (MSE): 14933366160.5
Mean Absolute Percentage Error (MAPE): 9.19


Mean Absolute Error (MAE): 83992.52
Mean Squared Error (MSE): 10331406472.67
Mean Absolute Percentage Error (MAPE): 9.44


Mean Absolute Error (MAE): 21447.8
Mean Squared Error (MSE): 746720002.2
Mean Absolute Percentage Error (MAPE): 6.82


Mean Absolute Error (MAE): 50781.4
Mean Squared Error (MSE): 3671328909.0
Mean Absolute Percentage Error (MAPE): 14.59


Mean Absolute Error (MAE): 255157.91
Mean Squared Error (MSE): 76638923317.81
Mean Absolute Percentage Error (MAPE): 13.6


Mean Absolute Error (MAE): 35794.0
Mean Squared Error (MSE): 2103716906.47
Mean Absolute Percentage Error (MAPE): 8.48


Mean Absolute Error (MAE): 53754.5
Mean Squared Error (MSE): 4255133357.62
Mean Absolute Percentage Error (MAPE): 17.87


Mean Absolute Error (MAE): 6030.4
Mean Squared Error (MSE): 45686460.4
Mean Absolute Percentage Error (MAPE): 12.94


Mean Absolute Error (MAE): 109038.0
Mean Squared Error (MSE): 14910681470.0
Mean Absolute Percentage Error (MAPE): 10.83


Mean Absolute Error (MAE): 35715.11
Mean Squared Error (MSE): 2026942676.57
Mean Absolute Percentage Error (MAPE): 6.86


Mean Absolute Error (MAE): 31722.6
Mean Squared Error (MSE): 1587814639.8
Mean Absolute Percentage Error (MAPE): 9.93


Mean Absolute Error (MAE): 41217.2
Mean Squared Error (MSE): 2243489779.6
Mean Absolute Percentage Error (MAPE): 6.98


Mean Absolute Error (MAE): 10993.85
Mean Squared Error (MSE): 158615642.76
Mean Absolute Percentage Error (MAPE): 12.32


Mean Absolute Error (MAE): 21839.66
Mean Squared Error (MSE): 712185492.84
Mean Absolute Percentage Error (MAPE): 3.81


Mean Absolute Error (MAE): 14126.61
Mean Squared Error (MSE): 371087996.68
Mean Absolute Percentage Error (MAPE): 9.51


Mean Absolute Error (MAE): 21942.25
Mean Squared Error (MSE): 645609231.51
Mean Absolute Percentage Error (MAPE): 11.35


Mean Absolute Error (MAE): 14506.64
Mean Squared Error (MSE): 225887638.82
Mean Absolute Percentage Error (MAPE): 26.66


Mean Absolute Error (MAE): 55447.21
Mean Squared Error (MSE): 6166527688.22
Mean Absolute Percentage Error (MAPE): 7.8


Mean Absolute Error (MAE): 28375.61
Mean Squared Error (MSE): 1296941640.31
Mean Absolute Percentage Error (MAPE): 9.69


Mean Absolute Error (MAE): 17122.51
Mean Squared Error (MSE): 372372574.0
Mean Absolute Percentage Error (MAPE): 5.51


Mean Absolute Error (MAE): 119186.8
Mean Squared Error (MSE): 16345385266.16
Mean Absolute Percentage Error (MAPE): 9.05


Mean Absolute Error (MAE): 172407.47
Mean Squared Error (MSE): 45642395825.79
Mean Absolute Percentage Error (MAPE): 14.0


Mean Absolute Error (MAE): 88679.16
Mean Squared Error (MSE): 11024021092.05
Mean Absolute Percentage Error (MAPE): 10.68


Mean Absolute Error (MAE): 15993.07
Mean Squared Error (MSE): 440726942.93
Mean Absolute Percentage Error (MAPE): 5.56


Mean Absolute Error (MAE): 191868.6
Mean Squared Error (MSE): 57182445025.64
Mean Absolute Percentage Error (MAPE): 11.58


Mean Absolute Error (MAE): 4960.77
Mean Squared Error (MSE): 33751480.0
Mean Absolute Percentage Error (MAPE): 5.06


Mean Absolute Error (MAE): 27599.28
Mean Squared Error (MSE): 1131480611.76
Mean Absolute Percentage Error (MAPE): 8.13


Mean Absolute Error (MAE): 3505.34
Mean Squared Error (MSE): 14797686.66
Mean Absolute Percentage Error (MAPE): 3.84


Mean Absolute Error (MAE): 46445.56
Mean Squared Error (MSE): 3269995230.87
Mean Absolute Percentage Error (MAPE): 11.51


Mean Absolute Error (MAE): 450582.11
Mean Squared Error (MSE): 272148486992.42
Mean Absolute Percentage Error (MAPE): 9.77


Mean Absolute Error (MAE): 10659.32
Mean Squared Error (MSE): 171892926.28
Mean Absolute Percentage Error (MAPE): 4.17


Mean Absolute Error (MAE): 109082.36
Mean Squared Error (MSE): 15046515347.34
Mean Absolute Percentage Error (MAPE): 15.81


Mean Absolute Error (MAE): 784.39
Mean Squared Error (MSE): 986909.38
Mean Absolute Percentage Error (MAPE): 5.61


Mean Absolute Error (MAE): 12690.5
Mean Squared Error (MSE): 260336460.68
Mean Absolute Percentage Error (MAPE): 3.54


Mean Absolute Error (MAE): 49227.84
Mean Squared Error (MSE): 3264652755.2
Mean Absolute Percentage Error (MAPE): 8.68


Mean Absolute Error (MAE): 32801.8
Mean Squared Error (MSE): 1772320303.0
Mean Absolute Percentage Error (MAPE): 13.5


Mean Absolute Error (MAE): 35138.6
Mean Squared Error (MSE): 1479641799.0
Mean Absolute Percentage Error (MAPE): 20.92


Mean Absolute Error (MAE): 1537609.18
Mean Squared Error (MSE): 3396634387385.38
Mean Absolute Percentage Error (MAPE): 4.91
