In [None]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas_datareader.data as web
from datetime import datetime

In [157]:
# Load the consumption and prices data

consumption = pd.read_excel("Data/ConsumptionData.xlsx", skiprows=2)
prices = pd.read_excel("Data/MonthlyRetailPrices.xlsx", skiprows=2)

# Cleaning and Transforming Consumption Data

In [158]:
# View Consumption Data

consumption.head(10)

Unnamed: 0,Date,U.S. Product Supplied of Finished Motor Gasoline (Thousand Barrels),East Coast (PADD 1) Product Supplied of Finished Motor Gasoline (Thousand Barrels),Midwest (PADD 2) Product Supplied of Finished Motor Gasoline (Thousand Barrels),Gulf Coast (PADD 3) Product Supplied of Finished Motor Gasoline (Thousand Barrels),Rocky Mountain (PADD 4) Product Supplied of Finished Motor Gasoline (Thousand Barrels),West Coast (PADD 5) Product Supplied of Finished Motor Gasoline (Thousand Barrels)
0,1945-01-15,40310,,,,,
1,1945-02-15,38690,,,,,
2,1945-03-15,42511,,,,,
3,1945-04-15,45351,,,,,
4,1945-05-15,47515,,,,,
5,1945-06-15,47091,,,,,
6,1945-07-15,51409,,,,,
7,1945-08-15,56974,,,,,
8,1945-09-15,54943,,,,,
9,1945-10-15,53309,,,,,


In [159]:
# View Consumption data info (such as data types and non-null counts)

consumption.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 967 entries, 0 to 966
Data columns (total 7 columns):
 #   Column                                                                                  Non-Null Count  Dtype         
---  ------                                                                                  --------------  -----         
 0   Date                                                                                    967 non-null    datetime64[ns]
 1   U.S. Product Supplied of Finished Motor Gasoline (Thousand Barrels)                     967 non-null    int64         
 2   East Coast (PADD 1) Product Supplied of Finished Motor Gasoline (Thousand Barrels)      535 non-null    float64       
 3   Midwest (PADD 2) Product Supplied of Finished Motor Gasoline (Thousand Barrels)         535 non-null    float64       
 4   Gulf Coast (PADD 3) Product Supplied of Finished Motor Gasoline (Thousand Barrels)      535 non-null    float64       
 5   Rocky Mountain (PADD 4)

In [160]:
# Data Cleaning for Consumption Data

consumption1 = consumption.iloc[:, :2] # Keeps only the first two columns
consumption1.columns = ["Date", "Gasoline_Consumption (Thousands of Barrels)"] # Renames columns for easier access
consumption1["Date"] = consumption1["Date"].dt.date # Converts datetime to date only
consumption1["Date"] = pd.to_datetime(consumption1["Date"]).dt.to_period("M").dt.to_timestamp() # Converts date to period format (monthly)
consumption1 = consumption1[consumption1["Date"] >= pd.to_datetime("1993-04-01")] # Filters data to start from April 1993
consumption1["Gasoline_Consumption_Gallons"] = consumption1["Gasoline_Consumption (Thousands of Barrels)"] * 1000 * 42 # Converts consumption from thousands of barrels to gallons
consumption1["Gasoline_Consumption(Millions of Gallons)"] = consumption1["Gasoline_Consumption_Gallons"] / 1_000_000 # Converts consumption from gallons to millions of gallons
consumption1 = consumption1.drop(columns=["Gasoline_Consumption (Thousands of Barrels)", "Gasoline_Consumption_Gallons"]) # Drops unnecessary columns
consumption1.head(10)

Unnamed: 0,Date,Gasoline_Consumption(Millions of Gallons)
579,1993-04-01,9367.638
580,1993-05-01,9876.09
581,1993-06-01,9702.0
582,1993-07-01,10135.65
583,1993-08-01,10238.676
584,1993-09-01,9585.198
585,1993-10-01,9611.616
586,1993-11-01,9491.244
587,1993-12-01,9975.168
588,1994-01-01,9088.044


# Statistics of Consumption Data

In [161]:
# View summary statistics of cleaned consumption data

consumption1.describe()

Unnamed: 0,Date,Gasoline_Consumption(Millions of Gallons)
count,388,388.0
mean,2009-05-16 21:24:07.422680320,11180.369582
min,1993-04-01 00:00:00,7390.866
25%,2001-04-23 12:00:00,10652.5335
50%,2009-05-16 12:00:00,11312.406
75%,2017-06-08 12:00:00,11792.7705
max,2025-07-01 00:00:00,12803.448
std,,836.445638


In [162]:
# View consumption date range and total months of data

print("Earliest date:", consumption1["Date"].min())
print("Latest date:", consumption1["Date"].max())
print("Total months of data:", len(consumption1))

Earliest date: 1993-04-01 00:00:00
Latest date: 2025-07-01 00:00:00
Total months of data: 388


In [163]:
# Calculate and print average monthly consumption, standard deviation, and coefficient of variation

mean = consumption1["Gasoline_Consumption(Millions of Gallons)"].mean()
std = consumption1["Gasoline_Consumption(Millions of Gallons)"].std()
cv = (std / mean) * 100  # coefficient of variation (%)

print(f"Average monthly consumption: {mean:,.2f} million gallons")
print(f"Standard deviation: {std:,.2f} million gallons")
print(f"Coefficient of variation: {cv:.2f}%")

Average monthly consumption: 11,180.37 million gallons
Standard deviation: 836.45 million gallons
Coefficient of variation: 7.48%


# Visualizing Trends in Consumption Data

In [164]:
# Create a line plot of monthly gasoline consumption over time

fig = px.line(
    consumption1,
    x="Date",
    y="Gasoline_Consumption(Millions of Gallons)",
    title="U.S. Monthly Gasoline Consumption (Millions of Gallons)",
    labels={
        "Date": "Date",
        "Gasoline_Consumption(Millions of Gallons)": "Gasoline Consumption (Millions of Gallons)"
    }
)

# Customize appearance
fig.update_traces(line=dict(color="steelblue", width=2))
fig.update_layout(
    template="plotly_white",
    title_x=0.5,
    xaxis=dict(showgrid=True, gridcolor="lightgray"),
    yaxis=dict(showgrid=True, gridcolor="lightgray"),
    hovermode="x unified",
    height=600
)

fig.show()

In [165]:
# Create a smoothed line plot using a 12-month rolling average


# Create 12-month rolling average
consumption1["Rolling_12mo"] = consumption1["Gasoline_Consumption(Millions of Gallons)"].rolling(window=12).mean()

# Create interactive figure
fig = go.Figure()

# Monthly data (light gray)
fig.add_trace(go.Scatter(
    x=consumption1["Date"],
    y=consumption1["Gasoline_Consumption(Millions of Gallons)"],
    mode="lines",
    name="Monthly Data",
    line=dict(color="lightgray", width=1)
))

# 12-month rolling average (steel blue)
fig.add_trace(go.Scatter(
    x=consumption1["Date"],
    y=consumption1["Rolling_12mo"],
    mode="lines",
    name="12-Month Rolling Average",
    line=dict(color="steelblue", width=3)
))

# Layout customization
fig.update_layout(
    title="Smoothed U.S. Gasoline Consumption (12-Month Rolling Average)",
    xaxis_title="Date",
    yaxis_title="Gasoline Consumption (Millions of Gallons)",
    template="plotly_white",
    legend=dict(
        x=0.02, y=0.98,
        bgcolor="rgba(255,255,255,0.7)",
        bordercolor="lightgray",
        borderwidth=1
    ),
    xaxis=dict(showgrid=True, gridcolor="lightgray"),
    yaxis=dict(showgrid=True, gridcolor="lightgray"),
    title_x=0.5,
    hovermode="x unified",
    height=600
)

fig.show()

In [166]:
# Create a histogram to visualize the distribution of monthly gasoline consumption

fig = px.histogram(
    consumption1,
    x="Gasoline_Consumption(Millions of Gallons)",
    nbins=25,
    title="Distribution of Monthly Gasoline Consumption",
    labels={"Gasoline_Consumption(Millions of Gallons)": "Millions of Gallons"},
    color_discrete_sequence=["teal"]
)

# Customize layout
fig.update_traces(marker_line_color="black", marker_line_width=1, opacity=0.7)
fig.update_layout(
    template="plotly_white",
    title_x=0.5,
    xaxis_title="Millions of Gallons",
    yaxis_title="Frequency",
    xaxis=dict(showgrid=True, gridcolor="lightgray"),
    yaxis=dict(showgrid=True, gridcolor="lightgray"),
    hovermode="x unified",
    height=500
)

fig.show()

In [167]:
# Create a line plot to visualize average yearly gasoline consumption

# Extract the Year from the Date column
consumption1["Year"] = consumption1["Date"].dt.year
yearly_avg = consumption1.groupby("Year")["Gasoline_Consumption(Millions of Gallons)"].mean().reset_index()

fig = px.line(
    yearly_avg,
    x="Year",
    y="Gasoline_Consumption(Millions of Gallons)",
    title="Average Yearly Gasoline Consumption",
    labels={
        "Year": "Year",
        "Gasoline_Consumption(Millions of Gallons)": "Average Monthly Consumption (Millions of Gallons)"
    },
)

# Customize the appearance
fig.update_traces(
    mode="lines+markers",
    line=dict(color="navy", width=2),
    marker=dict(size=6, color="navy", line=dict(width=1, color="white"))
)

fig.update_layout(
    template="plotly_white",
    hovermode="x unified",
    title_x=0.5,
    xaxis=dict(showgrid=True, gridcolor="lightgray"),
    yaxis=dict(showgrid=True, gridcolor="lightgray"),
)

fig.show()

In [168]:
# Create a line plot to visualize total yearly gasoline consumption

yearly_total = (
    consumption1.groupby("Year")["Gasoline_Consumption(Millions of Gallons)"]
    .sum()
    .reset_index()
)

fig = px.line(
    yearly_total,
    x="Year",
    y="Gasoline_Consumption(Millions of Gallons)",
    title="Total U.S. Gasoline Consumption per Year",
    labels={
        "Year": "Year",
        "Gasoline_Consumption(Millions of Gallons)": "Total Annual Consumption (Millions of Gallons)"
    },
)

# Customize appearance
fig.update_traces(
    mode="lines+markers",
    line=dict(color="darkgreen", width=2),
    marker=dict(size=6, color="darkgreen", line=dict(width=1, color="white"))
)

fig.update_layout(
    template="plotly_white",
    hovermode="x unified",
    title_x=0.5,
    xaxis=dict(showgrid=True, gridcolor="lightgray"),
    yaxis=dict(showgrid=True, gridcolor="lightgray"),
)

fig.show()

# Can we use imputation to make up missing first 3 months of 1993?

In [169]:
consumption1.set_index("Date", inplace=True)
consumption1["Gasoline_Consumption(Millions of Gallons)"] = (
    consumption1["Gasoline_Consumption(Millions of Gallons)"].interpolate(method="linear")
)
consumption1.reset_index(inplace=True)
# need to rearrange the order of stuff 

In [170]:
consumption1["Rolling_Avg"] = consumption1["Gasoline_Consumption(Millions of Gallons)"].rolling(window=12).mean()

In [171]:
# Create a line plot with highlighted recession/pandemic periods

# Define recession/pandemic periods
highlight_periods = [
    {"start": "2001-03-01", "end": "2001-11-01", "label": "2001 Recession"},
    {"start": "2007-12-01", "end": "2009-06-01", "label": "Great Recession"},
    {"start": "2020-03-01", "end": "2021-03-01", "label": "COVID-19 Pandemic"},
]

# Create figure
fig = go.Figure()

# Add both lines
fig.add_trace(go.Scatter(
    x=consumption1["Date"],
    y=consumption1["Gasoline_Consumption(Millions of Gallons)"],
    mode="lines",
    name="Monthly Consumption",
    line=dict(color="steelblue", width=2)
))

fig.add_trace(go.Scatter(
    x=consumption1["Date"],
    y=consumption1["Rolling_Avg"],
    mode="lines",
    name="12-Month Rolling Average",
    line=dict(color="firebrick", width=2)
))

# Add shaded rectangles for each event
for period in highlight_periods:
    fig.add_vrect(
        x0=period["start"], x1=period["end"],
        fillcolor="gray", opacity=0.2, line_width=0,
        annotation_text=period["label"],
        annotation_position="top left",
        annotation_font_size=10
    )

# Layout styling
fig.update_layout(
    title="U.S. Monthly Gasoline Consumption with Key Economic Events",
    xaxis_title="Date",
    yaxis_title="Millions of Gallons",
    template="plotly_white",
    hovermode="x unified",
    title_x=0.5,
    legend_title_text=""
)

fig.show()

In [172]:
# Create a bar plot to visualize month-to-month changes in gasoline consumption

# Calculate month-to-month percent change
consumption1["Monthly_Change_%"] = consumption1["Gasoline_Consumption(Millions of Gallons)"].pct_change() * 100

# Calculate absolute change (in millions of gallons)
consumption1["Monthly_Change_(Millions)"] = consumption1["Gasoline_Consumption(Millions of Gallons)"].diff()

fig = px.bar(
    consumption1,
    x="Date",
    y="Monthly_Change_(Millions)",
    title="Month-to-Month Change in U.S. Gasoline Consumption",
    labels={"Monthly_Change_(Millions)": "Change (Millions of Gallons)"},
    color="Monthly_Change_(Millions)",
    color_continuous_scale="RdBu",
)

fig.update_layout(
    template="plotly_white",
    title_x=0.5,
    coloraxis_showscale=False,
    xaxis_title="Date",
    yaxis_title="Change in Millions of Gallons",
)
fig.add_hline(y=0, line_color="black", opacity=0.5)
fig.show()

In [173]:
# Create a line plot to visualize month-to-month percentage changes in gasoline consumption

fig = px.line(
    consumption1,
    x="Date",
    y="Monthly_Change_%",
    title="Month-to-Month Percentage Change in U.S. Gasoline Consumption",
    labels={"Monthly_Change_%": "Percent Change (%)"},
)

fig.update_traces(line=dict(color="teal", width=2))
fig.update_layout(
    template="plotly_white",
    title_x=0.5,
    hovermode="x unified",
)
fig.add_hline(y=0, line_color="black", opacity=0.5)
fig.show()

In [None]:
# Create a line plot to visualize U.S. gasoline consumption by month with yearly average

# Ensure months are in proper calendar order
month_order = [
    "January", "February", "March", "April", "May", "June",
    "July", "August", "September", "October", "November", "December"
]

# Create Month column from the date
consumption1["Month"] = consumption1["Date"].dt.month_name()

# Convert to ordered categorical for proper plotting
consumption1["Month"] = pd.Categorical(
    consumption1["Month"],
    categories=month_order,
    ordered=True
)

# Now you can safely plot
fig = px.line(
    consumption1,
    x="Year",
    y="Gasoline_Consumption(Millions of Gallons)",
    color="Month",
    title="U.S. Gasoline Consumption by Month (with Yearly Average)",
    labels={
        "Gasoline_Consumption(Millions of Gallons)": "Millions of Gallons"
    },
    category_orders={"Month": month_order},
    color_discrete_sequence=px.colors.qualitative.Bold
)
fig.show()

In [None]:
# Create a heatmap to visualize gasoline consumption by month and year

# Calculate average monthly consumption across all years
monthly_avg = (
    consumption1.groupby("Month")["Gasoline_Consumption(Millions of Gallons)"]
    .mean()
    .reindex(month_order)
    .reset_index()
)

# Create two subplots (1 row, 2 columns)
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.8, 0.2],
    shared_yaxes=True,
    horizontal_spacing=0.02,
    subplot_titles=("Gasoline Consumption Heatmap", "Average by Month")
)

# --- Heatmap ---
heatmap = go.Heatmap(
    x=consumption1["Year"],
    y=consumption1["Month"],
    z=consumption1["Gasoline_Consumption(Millions of Gallons)"],
    colorscale="YlOrRd",
    colorbar=dict(title="Millions of Gallons"),
)
fig.add_trace(heatmap, row=1, col=1)

# --- Average Monthly Bar Chart ---
bars = go.Bar(
    x=monthly_avg["Gasoline_Consumption(Millions of Gallons)"],
    y=monthly_avg["Month"],
    orientation="h",
    marker_color="firebrick",
    name="Monthly Avg"
)
fig.add_trace(bars, row=1, col=2)

# --- Layout ---
fig.update_layout(
    template="plotly_white",
    title="U.S. Gasoline Consumption Heatmap with Average Monthly Consumption",
    title_x=0.5,
    height=600,
)
fig.update_yaxes(categoryorder="array", categoryarray=month_order[::-1])
fig.show()





In [None]:
# create a heatmap to visualize gasoline consumption by month and year using monthly averages

# Create heatmap
fig = px.density_heatmap(
    consumption1,
    x="Year",
    y="Month",
    z="Gasoline_Consumption(Millions of Gallons)",
    color_continuous_scale="YlOrRd",
    title="U.S. Gasoline Consumption by Month and Year (with Monthly Averages)",
    labels={"Gasoline_Consumption(Millions of Gallons)": "Millions of Gallons"},
)

# Add average text annotations
for i, row in monthly_avg.iterrows():
    fig.add_annotation(
        x=consumption1["Year"].max() + 0.5,
        y=row["Month"],
        text=f"{row['Gasoline_Consumption(Millions of Gallons)']:.0f}",
        showarrow=False,
        font=dict(size=10, color="black")
    )

fig.update_layout(
    template="plotly_white",
    title_x=0.5,
    xaxis_title="Year",
    yaxis_title="Month",
    coloraxis_colorbar=dict(title="Millions of Gallons"),
)
fig.show()

# Prices Data

In [177]:
# View Prices Data

prices.head(10)

Unnamed: 0,Date,U.S. All Grades All Formulations Retail Gasoline Prices (Dollars per Gallon),U.S. All Grades Conventional Retail Gasoline Prices (Dollars per Gallon),U.S. All Grades Reformulated Retail Gasoline Prices (Dollars per Gallon),U.S. Regular All Formulations Retail Gasoline Prices (Dollars per Gallon),U.S. Regular Conventional Retail Gasoline Prices (Dollars per Gallon),U.S. Regular Reformulated Retail Gasoline Prices (Dollars per Gallon),U.S. Midgrade All Formulations Retail Gasoline Prices (Dollars per Gallon),U.S. Midgrade Conventional Retail Gasoline Prices (Dollars per Gallon),U.S. Midgrade Reformulated Retail Gasoline Prices (Dollars per Gallon),U.S. Premium All Formulations Retail Gasoline Prices (Dollars per Gallon),U.S. Premium Conventional Retail Gasoline Prices (Dollars per Gallon),U.S. Premium Reformulated Retail Gasoline Prices (Dollars per Gallon),U.S. No 2 Diesel Retail Prices (Dollars per Gallon),U.S. No 2 Diesel Ultra Low Sulfur (0-15 ppm) Retail Prices (Dollars per Gallon),U.S. No 2 Diesel Low Sulfur (15-500 ppm) Retail Prices (Dollars per Gallon)
0,1990-08-15,,,,,,,,,,,,,,,
1,1990-09-15,,,,1.258,1.258,,,,,,,,,,
2,1990-10-15,,,,1.335,1.335,,,,,,,,,,
3,1990-11-15,,,,1.324,1.324,,,,,,,,,,
4,1990-12-15,,,,,,,,,,,,,,,
5,1991-01-15,,,,,,,,,,,,,,,
6,1991-02-15,,,,1.094,1.094,,,,,,,,,,
7,1991-03-15,,,,1.04,1.04,,,,,,,,,,
8,1991-04-15,,,,1.076,1.076,,,,,,,,,,
9,1991-05-15,,,,1.126,1.126,,,,,,,,,,


In [178]:
# View Prices data info (such as data types and non-null counts)

prices.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 422 entries, 0 to 421
Data columns (total 16 columns):
 #   Column                                                                           Non-Null Count  Dtype         
---  ------                                                                           --------------  -----         
 0   Date                                                                             422 non-null    datetime64[ns]
 1   U.S. All Grades All Formulations Retail Gasoline Prices (Dollars per Gallon)     390 non-null    float64       
 2   U.S. All Grades Conventional Retail Gasoline Prices (Dollars per Gallon)         370 non-null    float64       
 3   U.S. All Grades Reformulated Retail Gasoline Prices (Dollars per Gallon)         370 non-null    float64       
 4   U.S. Regular All Formulations Retail Gasoline Prices (Dollars per Gallon)        419 non-null    float64       
 5   U.S. Regular Conventional Retail Gasoline Prices (Dollars per Gallon)  

In [179]:
# Data Cleaning for Prices Data

prices1 = prices.iloc[:, :2] # Keeps only the first two columns
prices1.columns = ["Date", "Dollars_Per_Gallon"] # Renames columns for easier access
prices1["Date"] = prices1["Date"].dt.date # Converts datetime to date only
prices1["Date"] = pd.to_datetime(prices1["Date"]).dt.to_period("M").dt.to_timestamp() # Converts date to period format (monthly)
prices1 = prices1[prices1["Date"] >= pd.to_datetime("1993-04-01")] # Filters data to start from April 1993
prices1 = prices1[prices1["Date"] <= "2024-12-31"] # Limit data to December 2024 or earlier
prices1.head(10)

Unnamed: 0,Date,Dollars_Per_Gallon
32,1993-04-01,1.078
33,1993-05-01,1.1
34,1993-06-01,1.097
35,1993-07-01,1.078
36,1993-08-01,1.062
37,1993-09-01,1.05
38,1993-10-01,1.092
39,1993-11-01,1.066
40,1993-12-01,1.014
41,1994-01-01,0.998


# Exploring Prices Data

In [180]:
prices1.describe()

Unnamed: 0,Date,Dollars_Per_Gallon
count,381,381.0
mean,2009-01-30 08:37:47.716535552,2.378123
min,1993-04-01 00:00:00,0.962
25%,2001-03-01 00:00:00,1.441
50%,2009-02-01 00:00:00,2.387
75%,2017-01-01 00:00:00,3.148
max,2024-12-01 00:00:00,5.032
std,,0.953819


In [None]:
# Create a line plot of monthly gasoline prices over time

prices1["Year"] = prices1["Date"].dt.year
yearly_price_avg = prices1.groupby("Year")["Dollars_Per_Gallon"].mean().reset_index()

# Create the line chart again
fig = px.line(
    yearly_price_avg,
    x="Year",
    y="Dollars_Per_Gallon",
    title="Average U.S. Gasoline Price per Year (Annotated)",
    labels={
        "Year": "Year",
        "Dollars_Per_Gallon": "Average Price (USD per Gallon)"
    },
)

fig.update_traces(
    mode="lines+markers",
    line=dict(color="crimson", width=2),
    marker=dict(size=6, color="crimson", line=dict(width=1, color="white")),
)

fig.update_layout(
    template="plotly_white",
    hovermode="x unified",
    title_x=0.5,
)

# --- Add vertical event lines and text annotations ---

events = {
    2008: "2008 Financial Crisis",
    2011: "Arab Spring & Oil Tension",
    2015: "U.S. Shale Boom",
    2020: "COVID-19 Lockdowns",
    2022: "Ukraine War & Inflation Spike",
}

for year, label in events.items():
    fig.add_vline(x=year, line_color="gray", line_dash="dash", opacity=0.6)
    fig.add_annotation(
        x=year,
        y=yearly_price_avg.loc[yearly_price_avg["Year"] == year, "Dollars_Per_Gallon"].values[0],
        text=label,
        showarrow=True,
        arrowhead=2,
        ax=30,
        ay=-40,
        bgcolor="white",
        font=dict(size=10, color="black"),
    )

fig.show()

In [None]:
# Create a heatmap to visualize gasoline prices by month and year

# Create Month column from the Date column
prices1["Month"] = prices1["Date"].dt.month_name()

# Calculate average gasoline price per month (across all years)
monthly_price_avg = (
    prices1.groupby("Month")["Dollars_Per_Gallon"]
    .mean()
    .reindex(month_order)
    .reset_index()
)

# Create subplots: heatmap (left), average monthly bar chart (right)
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.8, 0.2],
    shared_yaxes=True,
    horizontal_spacing=0.02,
    subplot_titles=("Gasoline Price Heatmap", "Average by Month")
)

# --- Heatmap ---
heatmap = go.Heatmap(
    x=prices1["Year"],
    y=prices1["Month"],
    z=prices1["Dollars_Per_Gallon"],
    colorscale="RdYlBu_r",
    colorbar=dict(title="Dollars per Gallon"),
)
fig.add_trace(heatmap, row=1, col=1)

# --- Average Monthly Bar Chart ---
bars = go.Bar(
    x=monthly_price_avg["Dollars_Per_Gallon"],
    y=monthly_price_avg["Month"],
    orientation="h",
    marker_color="crimson",
    name="Monthly Avg"
)
fig.add_trace(bars, row=1, col=2)

# --- Layout settings ---
fig.update_layout(
    template="plotly_white",
    title="U.S. Gasoline Price Heatmap with Average Monthly Price",
    title_x=0.5,
    height=600,
)
fig.update_traces(
    selector=dict(type="heatmap"),
    showscale=True,
    hovertemplate="Year: %{x}<br>Month: %{y}<br>Price: %{z:.2f} USD<extra></extra>"
)
fig.update_traces(
    selector=dict(type="bar"),
    hovertemplate="Month: %{y}<br>Avg Price: %{x:.2f} USD<extra></extra>"
)

fig.update_yaxes(categoryorder="array", categoryarray=month_order[::-1])  # January → top
fig.show()

In [None]:
# Create a line plot to visualize year-over-year percentage changes in gasoline prices

prices1["YoY_Change_%"] = prices1["Dollars_Per_Gallon"].pct_change(12) * 100

fig = px.line(
    prices1,
    x="Date",
    y="YoY_Change_%", 
    title="Year-over-Year Change in U.S. Gasoline Prices (%)",
    labels={"YoY_Change_%":"% Change from Prior Year"},
)
fig.update_traces(line=dict(color="firebrick", width=2))
fig.update_layout(template="plotly_white", title_x=0.5)
fig.show()

In [None]:
# Create a histogram to visualize the distribution of monthly gasoline prices

px.histogram(
    prices1,
    x="Dollars_Per_Gallon",
    nbins=40,
    title="Distribution of Monthly U.S. Gasoline Prices (1993-2024)",
    labels={"Dollars_Per_Gallon":"Price (USD per Gallon)"}
).update_layout(template="plotly_white", title_x=0.5)

In [None]:
# Calculate 12-month rolling average of gasoline prices

prices1["Rolling_12mo_Avg"] = prices1["Dollars_Per_Gallon"].rolling(window=12).mean()

fig = px.line(
    prices1,
    x="Date",
    y="Rolling_12mo_Avg",
    title="12-Month Rolling Average of U.S. Gasoline Prices",
    labels={"Rolling_12mo_Avg":"Rolling 12-Month Average Price (USD/gal)"}
)
fig.update_traces(line=dict(color="darkred", width=2))
fig.update_layout(template="plotly_white", title_x=0.5)
fig.show()

In [186]:
px.box(
    prices1,
    x="Month",
    y="Dollars_Per_Gallon",
    title="Monthly Gasoline Price Variation (1993-2024)",
    category_orders={"Month": month_order},
    labels={"Dollars_Per_Gallon":"Price (USD/gal)"}
).update_layout(template="plotly_white", title_x=0.5)

In [193]:
# Fetch CPI Data from FRED for Inflation Adjusted Chart

# set start and end dates for data retrieval
start = datetime(1993, 1, 1)
end = datetime(2025, 1, 1)

# Fetch CPI data from FRED (All Urban Consumers, base period 1982-84=100)
cpi = web.DataReader('CPIAUCNS', 'fred', start, end)

# Reset index for merging
cpi.reset_index(inplace=True)
cpi.columns = ['Date', 'CPI']

# Merge based on month
prices_adj = pd.merge(prices1, cpi, on='Date', how='inner')

latest_cpi = prices_adj['CPI'].iloc[-1]  # CPI for most recent month

prices_adj['Real_Price_2024USD'] = prices_adj['Dollars_Per_Gallon'] * (latest_cpi / prices_adj['CPI'])

In [None]:
# Plot nominal vs inflation-adjusted prices

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=prices_adj["Date"],
    y=prices_adj["Dollars_Per_Gallon"],
    name="Nominal Price (USD)",
    line=dict(color="firebrick", width=2, dash="dot")
))

fig.add_trace(go.Scatter(
    x=prices_adj["Date"],
    y=prices_adj["Real_Price_2024USD"],
    name="Inflation-Adjusted Price (2024 USD)",
    line=dict(color="darkblue", width=3)
))

fig.update_layout(
    title="U.S. Gasoline Prices: Nominal vs Inflation-Adjusted (2024 Dollars)",
    xaxis_title="Year",
    yaxis_title="Price (USD per Gallon)",
    template="plotly_white",
    title_x=0.5,
    legend=dict(x=0.02, y=0.98)
)

fig.show()

# Merging Datasets

In [None]:
# Merge consumption and prices data on Date

merged_df = pd.merge(consumption1, prices1, on="Date", how="inner")
# Keep only relevant columns
merged_df = merged_df[[
    "Date",
    "Dollars_Per_Gallon",
    "Gasoline_Consumption(Millions of Gallons)"

]]

merged_df["Year"] = merged_df["Date"].dt.year
merged_df["Month"] = merged_df["Date"].dt.month_name()

merged_df.head(10)

Unnamed: 0,Date,Dollars_Per_Gallon,Gasoline_Consumption(Millions of Gallons),Year,Month
0,1993-04-01,1.078,9367.638,1993,April
1,1993-05-01,1.1,9876.09,1993,May
2,1993-06-01,1.097,9702.0,1993,June
3,1993-07-01,1.078,10135.65,1993,July
4,1993-08-01,1.062,10238.676,1993,August
5,1993-09-01,1.05,9585.198,1993,September
6,1993-10-01,1.092,9611.616,1993,October
7,1993-11-01,1.066,9491.244,1993,November
8,1993-12-01,1.014,9975.168,1993,December
9,1994-01-01,0.998,9088.044,1994,January


# Visualizing Merged Datasets

In [None]:
# Create dual-axis line plot for consumption and price

fig.update_layout(
    title="U.S. Gasoline Consumption vs Price (Nominal, 1993-2024)",
    xaxis=dict(title="Year"),
    yaxis=dict(
        title=dict(
            text="Gasoline Consumption (Millions of Gallons)",
            font=dict(color="steelblue")
        ),
        tickfont=dict(color="steelblue")
    ),
    yaxis2=dict(
        title=dict(
            text="Gasoline Price (USD per Gallon)",
            font=dict(color="firebrick")
        ),
        tickfont=dict(color="firebrick"),
        overlaying="y",
        side="right"
    ),
    template="plotly_white",
    legend=dict(x=0.01, y=0.99, bgcolor="rgba(255,255,255,0.8)"),
    title_x=0.5,
    height=600
)

check kurtosis and skew of data

if using the extra data, efficiency go up over time which could mean increased destination but reduced or constant consumption