In [1]:
# Importing necessary libraries for data manipulation, plotting, and time series forecasting.
# - pandas: Used for data manipulation and analysis.
# - matplotlib.pyplot: Used for creating static, interactive, and animated visualizations in Python.
# - prophet: Facebook's tool for time series forecasting.
# - plotly.express: A terse, consistent, high-level API for creating figures.
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
import plotly.express as px
from prophet.plot import plot_plotly, plot_components_plotly


In [2]:
# Loading the dataset 'CrudeOil.csv' into a pandas DataFrame.
# The 'parse_dates' argument specifies that the 'Date' column should be parsed as dates.
# Displaying the first few rows of the DataFrame to get an idea of the data structure.
crude_oil_df = pd.read_csv('CrudeOil.csv', parse_dates=['Date'])
crude_oil_df.head()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2011-12-01,99.699997,109.949997,95.440002,107.07,107.07,11536174
1,2012-03-01,106.82,110.550003,85.860001,86.529999,86.529999,16713943
2,2012-06-01,86.440002,98.290001,77.279999,96.470001,96.470001,17085835
3,2012-09-01,96.379997,100.419998,84.050003,88.910004,88.910004,15437793
4,2012-12-01,88.849998,98.239998,85.209999,92.050003,92.050003,12791085


In [3]:
# Creating a line plot of crude oil prices over time using Plotly Express.
# This plot compares 'Open' and 'Close' prices against the 'Date'.
# The plot is interactive, enhancing data exploration.
fig = px.line(crude_oil_df, x='Date', y=['Open', 'Close'], labels={'value': 'Price', 'variable': 'Price Type'}, title='Crude Oil Prices: Open vs Close')
fig.show()


In [4]:
# Creating a box plot to show the distribution of 'Open' and 'Close' prices of crude oil.
# The dataset is reshaped for plotting purposes using the .melt() method.
# The plot is interactive, facilitated by Plotly Express, showcasing the variability and central tendency of the prices.
fig = px.box(crude_oil_df.melt(id_vars='Date', value_vars=['Open', 'Close'], var_name='Price Type', value_name='Price'), y='Price', color='Price Type', title='Price Distribution')
fig.show()


In [5]:
# Calculating the 30-day moving average of the 'Close' price and adding it as a new column to the DataFrame.
# Plotting the 30-day moving average over time using Plotly Express to visualize the trend of closing prices.
# This allows for an analysis of the general direction of the price movement over a specified period.
rolling_window = 30  # days
crude_oil_df['30d MA Close'] = crude_oil_df['Close'].rolling(window=rolling_window).mean()

fig = px.line(crude_oil_df, x='Date', y='30d MA Close', title=f'{rolling_window}-Day Moving Average of Close Price')
fig.show()


In [6]:
# Preparing the crude oil dataset for time series forecasting with the Prophet library.
# The 'Date' column is renamed to 'ds' and the 'Close' price to 'y', following the convention required by Prophet.
df_prophet = crude_oil_df[['Date', 'Close']].rename(columns={'Date': 'ds', 'Close': 'y'})


In [7]:
# Building a Prophet model with specified seasonality configurations.
# Daily seasonality is turned off, while yearly seasonality is enabled.
# The model is fitted with the prepared data, and future predictions are made for the next 90 days.
model = Prophet(daily_seasonality=False, yearly_seasonality=True)
model.fit(df_prophet)

future = model.make_future_dataframe(periods=90)
forecast = model.predict(future)


01:43:57 - cmdstanpy - INFO - Chain [1] start processing
01:43:57 - cmdstanpy - INFO - Chain [1] done processing


In [8]:
# Visualizing the forecasted crude oil prices using Plotly.
# The forecast is plotted to show predictions along with confidence intervals.
# Additionally, forecast components are plotted to examine trends, yearly seasonality, etc.
fig = plot_plotly(model, forecast)
fig.update_layout(title='Crude Oil Price Forecast', xaxis_title='Date', yaxis_title='Price in USD')
fig.show()

# Optional: To see the components of the forecast, such as trends and seasonality, use plot_components_plotly.
fig_components = plot_components_plotly(model, forecast)
fig_components.show()


In [9]:
# Loading the 'NatGas.csv' dataset into a pandas DataFrame for analysis.
# Similar to the crude oil dataset, the 'Date' column is parsed as dates.
natgas_df = pd.read_csv('NatGas.csv', parse_dates=['Date'])
natgas_df.head()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2011-01-01,4.492,4.879,3.731,4.389,4.389,8089560
1,2011-04-01,4.405,4.983,3.99,4.374,4.374,7985928
2,2011-07-01,4.372,4.612,3.662,3.666,3.666,7358101
3,2011-10-01,3.643,3.978,2.957,2.989,2.989,7749305
4,2012-10-01,3.325,3.933,3.261,3.351,3.351,8648446


In [10]:
# Creating a line plot of natural gas prices over time.
# Both 'Open' and 'Close' prices are plotted against the date to observe trends and fluctuations.
fig = px.line(natgas_df, x='Date', y=['Open', 'Close'], labels={'value': 'Price', 'variable': 'Price Type'}, title='Natural Gas Prices: Open vs Close')
fig.update_layout(xaxis_title="Date", yaxis_title="Price")
fig.show()


In [11]:
# Visualizing the distribution of 'Open' and 'Close' prices for natural gas using a box plot.
# This plot provides insights into the central tendency and variability of the prices.
fig = px.box(natgas_df.melt(id_vars='Date', value_vars=['Open', 'Close'], var_name='Price Type', value_name='Price'), y='Price', color='Price Type', title='Price Distribution for Natural Gas')
fig.update_layout(yaxis_title="Price", xaxis_title="Price Type")
fig.show()


In [12]:
# Calculating the 30-day moving average of the 'Close' prices for natural gas and plotting it.
# This provides a smoothed view of the price trend over time.
rolling_window = 30  # days
natgas_df['MA Close'] = natgas_df['Close'].rolling(window=rolling_window).mean()

fig = px.line(natgas_df, x='Date', y='MA Close', title=f'{rolling_window}-Day Moving Average of Natural Gas Close Prices')
fig.update_layout(xaxis_title="Date", yaxis_title="Moving Average Price")
fig.show()


In [13]:
# Similar to the crude oil data preparation, the natural gas dataset is prepared for forecasting with Prophet.
# The 'Date' column is renamed to 'ds' and 'Close' to 'y' for compatibility with Prophet.
df_prophet = natgas_df[['Date', 'Close']].rename(columns={'Date': 'ds', 'Close': 'y'})


In [14]:
# Building a Prophet model with specific seasonality and change point configurations for natural gas.
# The model is fitted with the prepared natural gas data, and future predictions are made for the next 90 days.
model = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    seasonality_prior_scale=10,
    changepoint_prior_scale=0.05,
    changepoint_range=0.9
)
model.fit(df_prophet)
# Make future predictions
future = model.make_future_dataframe(periods=90)
forecast = model.predict(future)

# Plot the forecast
fig1 = plot_plotly(model, forecast)
fig1.show()

# Plot forecast components
fig2 = plot_components_plotly(model, forecast)
fig2.show()


01:43:58 - cmdstanpy - INFO - Chain [1] start processing
01:43:58 - cmdstanpy - INFO - Chain [1] done processing


In [15]:
# The final set of cells visualize the forecasted natural gas prices and its components (trends, yearly and weekly seasonality).
# This is done using Plotly through the plot_plotly and plot_components_plotly functions from Prophet.
fig = plot_plotly(model, forecast)
fig.update_layout(title='Natural Gas Price Forecast', xaxis_title='Date', yaxis_title='Price')
fig.show()

# Plotting forecast components for detailed analysis.
fig_components = plot_components_plotly(model, forecast)
fig_components.update_layout(title='Forecast Components')
fig_components.show()
