In [10]:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

plt.style.use('Solarize_Light2')

- **Load WPSFD4131 data**

In [11]:
WPSFD4131_df = pd.read_csv('../data/WPSFD4131.csv')

WPSFD4131_df['DATE'] = pd.to_datetime(WPSFD4131_df['DATE'])
WPSFD4131_df.set_index('DATE', inplace=True)

WPSFD4131_df['^ %'] = WPSFD4131_df['WPSFD4131'].pct_change()

WPSFD4131_df.tail()

Unnamed: 0_level_0,WPSFD4131,^ %
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-07-01,260.3,0.002731
2025-08-01,261.299,0.003838
2025-09-01,261.806,0.00194
2025-10-01,262.953,0.004381
2025-11-01,263.565,0.002327


- **Average change**

In [12]:
print(f"Average change is {WPSFD4131_df['^ %'].mean() * 100}%")

Average change is 0.2690902360052101%


- **Overview on change**

In [13]:
positive_filter = WPSFD4131_df['^ %'] > 0
negative_filter = WPSFD4131_df['^ %'] < 0
zero_filter = WPSFD4131_df['^ %'] == 0

# count
total_items = len(WPSFD4131_df)

# average change
avg_change = [
    WPSFD4131_df[positive_filter]['^ %'].mean(),
    WPSFD4131_df[negative_filter]['^ %'].mean(),
    0,
    ""
]

# frequency
frequency = [
    WPSFD4131_df[positive_filter]['^ %'].count(),
    WPSFD4131_df[negative_filter]['^ %'].count(),
    WPSFD4131_df[zero_filter]['^ %'].count()
]
frequency.append(frequency[0]/frequency[1])

# frequency pct
frequency_pct = [100 * frequency[i]/total_items for i in range(3)]
frequency_pct.append("")

# Prob adj
prob_adj = [frequency_pct[i] * avg_change[i] for i in range(3)]
prob_adj.append("")

pd.DataFrame({
    "%": [x * 100 for x in avg_change],
    "Frequency": frequency,
    "Frequency %": frequency_pct,
    "Prob Adjust % Change": prob_adj,
}, index=["Av Pos", "Av Neg", "Zero", "Ratio P/N"])

Unnamed: 0,%,Frequency,Frequency %,Prob Adjust % Change
Av Pos,0.339887,522.0,83.788122,0.284785
Av Neg,-0.200939,50.0,8.025682,-0.016127
Zero,0.0,50.0,8.025682,0.0
Ratio P/N,,10.44,,


- **Stats**

In [14]:
from scipy.stats import describe

# Display the results

stats = describe(WPSFD4131_df['^ %'].dropna().tolist())
pd.DataFrame(
    {
        'value': [
            str(stats.nobs),
            stats.minmax[0] * 100,
            stats.minmax[1] * 100,
            stats.mean * 100,
            WPSFD4131_df['^ %'].median() * 100,
            WPSFD4131_df['^ %'].mode(dropna=True)[0] * 100,
            stats.variance,
            stats.skewness,
            stats.kurtosis
        ]
    },
    index=['nobs', 'Min %', 'Max %', 'Mean %', "Median %", "Mode %", 'Variance', 'Skewness', 'Kurtosis'],
)

Unnamed: 0,value
nobs,622.0
Min %,-1.24451
Max %,2.152642
Mean %,0.26909
Median %,0.209941
Mode %,0.0
Variance,1e-05
Skewness,1.438198
Kurtosis,6.28234


- **Data preview**

In [15]:
# Define bins
bins = [0.005 * i for i in range(-2, 5)]
bins.append(stats.minmax[1])
bins.insert(0, stats.minmax[0])
bin_labels = [f"{round(bins[i] * 100, 2)}% to {round(bins[i+1] * 100, 2)}%" for i in range(len(bins) - 1)]
bin_labels[0] = f"Less than {bins[1] * 100}%"
bin_labels[-1] = f"Greater than {bins[-2] * 100}%"

# Assign data to bins
binned = pd.cut(WPSFD4131_df['^ %'], bins=bins, labels=bin_labels, include_lowest=True)

# Calculate frequency, probability, and cumulative probability
frequency = binned.value_counts().sort_index()
probability = 100 * frequency / frequency.sum()
cumulative_probability = probability.cumsum()

occurrence_frequencies = pd.DataFrame({
    'Frequency': frequency.values,
    'Probability %': probability.values,
    'Cumulative Probability %': cumulative_probability.values
}, index=bin_labels)

occurrence_frequencies

Unnamed: 0,Frequency,Probability %,Cumulative Probability %
Less than -1.0%,1,0.160772,0.160772
-1.0% to -0.5%,5,0.803859,0.96463
-0.5% to 0.0%,94,15.11254,16.07717
0.0% to 0.5%,416,66.881029,82.958199
0.5% to 1.0%,88,14.14791,97.106109
1.0% to 1.5%,12,1.92926,99.03537
1.5% to 2.0%,5,0.803859,99.839228
Greater than 2.0%,1,0.160772,100.0


In [16]:
fig = px.bar(occurrence_frequencies, x=occurrence_frequencies.index, y='Probability %')

fig = px.bar(
    occurrence_frequencies,
    x=occurrence_frequencies.index,
    y='Probability %',
)

fig.update_layout(
    title=dict(text='WPSFD4131 % change'),
    width=800,
    height=600,
    yaxis=dict(title="Frequency"),
    hovermode="x unified",
    template="plotly_dark"
)

fig.show()

- **WPSFD4131 % change**

In [17]:
fig = go.Figure()

# Reference line
fig.add_trace(go.Scatter(
    x=WPSFD4131_df.index,
    y=[100 * stats.mean] * (stats.nobs + 1),
    name='Average % change',
    line=dict(
        width=2,
        color="#F2C14E",   # muted gold
        dash="dash"
    ),
    hoverinfo="skip",
    # showlegend=False,
))

# Bars
fig.add_trace(go.Bar(
    x=WPSFD4131_df.index,
    y=WPSFD4131_df['^ %'] * 100,
    name='WPSFD4131',
    marker=dict(
        color="#4C78A8",  # professional blue
        line=dict(color="#4C78A8", width=0)
    ),
    width=0.5,
))

fig.update_layout(
    title=dict(
        text='WPSFD4131 % change',
        x=0.02,
        font=dict(size=20)
    ),
    plot_bgcolor="#0E1117",
    paper_bgcolor="#0E1117",
    font=dict(color="#E6E6E6", size=14),
    width=1400,
    height=700,
    yaxis=dict(
        title="%",
        exponentformat="none",
        showgrid=False,
        zeroline=False
    ),
    xaxis=dict(
        showgrid=False,
        zeroline=False
    ),
    hovermode="x unified",
)

fig.show()


- **WPSFD49207 data plot**

In [18]:
fig = px.line(
    WPSFD4131_df,
    x=WPSFD4131_df.index,
    y="WPSFD4131",
    markers=True
)

fig.update_layout(
    title=dict(text='Producer Price Index by Commodity: Final Demand: Finished Goods Less Food and Energy'),
    width=1000,
    height=600,
    yaxis=dict(title="WPSFD4131"),
    hovermode="x unified",
    template="plotly_dark"
)

fig.show()