In [24]:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import sys
sys.path.append('../utils')
from helpers import real_gdp_df

plt.style.use('Solarize_Light2')

# **WPSFD4131**

- **Load WPSFD4131 data**

In [25]:
WPSFD4131_df = pd.read_csv('data/WPSFD4131.csv')

WPSFD4131_df['DATE'] = pd.to_datetime(WPSFD4131_df['DATE'])
WPSFD4131_df.set_index('DATE', inplace=True)

WPSFD4131_df['^ %'] = WPSFD4131_df['WPSFD4131'].pct_change()

WPSFD4131_df.tail()

Unnamed: 0_level_0,WPSFD4131,^ %
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-07-01,253.075,0.001262
2024-08-01,253.838,0.003015
2024-09-01,254.396,0.002198
2024-10-01,254.638,0.000951
2024-11-01,255.289,0.002557


- **Load GDP data**

In [26]:
GDP_df = real_gdp_df()
filtered_gdp_df = GDP_df[GDP_df.index >= WPSFD4131_df.index[0]]

- **Average change**

In [27]:
print(f"Average change is {WPSFD4131_df['^ %'].mean() * 100}%")

Average change is 0.269145766561457%


- **Overview on change**

In [28]:
positive_filter = WPSFD4131_df['^ %'] > 0
negative_filter = WPSFD4131_df['^ %'] < 0
zero_filter = WPSFD4131_df['^ %'] == 0

# count
total_items = len(WPSFD4131_df)

# average change
avg_change = [
    WPSFD4131_df[positive_filter]['^ %'].mean(),
    WPSFD4131_df[negative_filter]['^ %'].mean(),
    0,
    ""
]

# frequency
frequency = [
    WPSFD4131_df[positive_filter]['^ %'].count(),
    WPSFD4131_df[negative_filter]['^ %'].count(),
    WPSFD4131_df[zero_filter]['^ %'].count()
]
frequency.append(frequency[0]/frequency[1])

# frequency pct
frequency_pct = [100 * frequency[i]/total_items for i in range(3)]
frequency_pct.append("")

# Prob adj
prob_adj = [frequency_pct[i] * avg_change[i] for i in range(3)]
prob_adj.append("")

pd.DataFrame({
    "%": [x * 100 for x in avg_change],
    "Frequency": frequency,
    "Frequency %": frequency_pct,
    "Prob Adjust % Change": prob_adj,
}, index=["Av Pos", "Av Neg", "Zero", "Ratio P/N"])

Unnamed: 0,%,Frequency,Frequency %,Prob Adjust % Change
Av Pos,0.341619,510.0,83.469722,0.285149
Av Neg,-0.200939,50.0,8.183306,-0.016443
Zero,0.0,50.0,8.183306,0.0
Ratio P/N,,10.2,,


- **Stats**

In [29]:
from scipy.stats import describe

# Display the results

stats = describe(WPSFD4131_df['^ %'].dropna().tolist())
pd.DataFrame(
    {
        'value': [
            str(stats.nobs),
            stats.minmax[0] * 100,
            stats.minmax[1] * 100,
            stats.mean * 100,
            WPSFD4131_df['^ %'].median() * 100,
            WPSFD4131_df['^ %'].mode(dropna=True)[0] * 100,
            stats.variance,
            stats.skewness,
            stats.kurtosis
        ]
    },
    index=['nobs', 'Min %', 'Max %', 'Mean %', "Median %", "Mode %", 'Variance', 'Skewness', 'Kurtosis'],
)

Unnamed: 0,value
nobs,610.0
Min %,-1.24451
Max %,2.152642
Mean %,0.269146
Median %,0.206414
Mode %,0.0
Variance,1.1e-05
Skewness,1.428422
Kurtosis,6.136716


- **Data preview**

In [30]:
# Define bins
bins = [0.005 * i for i in range(-2, 5)]
bins.append(stats.minmax[1])
bins.insert(0, stats.minmax[0])
bin_labels = [f"{round(bins[i] * 100, 2)}% to {round(bins[i+1] * 100, 2)}%" for i in range(len(bins) - 1)]
bin_labels[0] = f"Less than {bins[1] * 100}%"
bin_labels[-1] = f"Greater than {bins[-2] * 100}%"

# Assign data to bins
binned = pd.cut(WPSFD4131_df['^ %'], bins=bins, labels=bin_labels, include_lowest=True)

# Calculate frequency, probability, and cumulative probability
frequency = binned.value_counts().sort_index()
probability = 100 * frequency / frequency.sum()
cumulative_probability = probability.cumsum()

occurrence_frequencies = pd.DataFrame({
    'Frequency': frequency.values,
    'Probability %': probability.values,
    'Cumulative Probability %': cumulative_probability.values
}, index=bin_labels)

occurrence_frequencies

Unnamed: 0,Frequency,Probability %,Cumulative Probability %
Less than -1.0%,1,0.163934,0.163934
-1.0% to -0.5%,5,0.819672,0.983607
-0.5% to 0.0%,94,15.409836,16.393443
0.0% to 0.5%,404,66.229508,82.622951
0.5% to 1.0%,88,14.42623,97.04918
1.0% to 1.5%,12,1.967213,99.016393
1.5% to 2.0%,5,0.819672,99.836066
Greater than 2.0%,1,0.163934,100.0


In [31]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x=occurrence_frequencies.index,
    y=occurrence_frequencies['Frequency'],
    name='M2 Annualised Rate',
    marker=dict(color='blue'),
))

fig.update_layout(
    title=dict(text='M2 Annualised Rate pct change %'),
    plot_bgcolor='rgb(230, 230,230)',
    width=700,
    height=700,
    yaxis=dict(title="Frequency", exponentformat="none", showgrid=False),
    barmode="group",
    xaxis=dict(showgrid=False),
    shapes=[
        dict(
            type="line",
            x0="0.0% to 0.5%",  
            x1="0.0% to 0.5%", 
            y0=0,  # Start of the line on the y-axis
            y1=330,  # End of the line on the y-axis
            line=dict(
                color="Red",
                width=2,
                dash="solid",
            ),
        )
    ]
)

fig.show()

- **WPSFD4131 data plot**

In [32]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=WPSFD4131_df.index,
    y=WPSFD4131_df['WPSFD4131'],
    name='WPSFD4131',
    fill='tonexty',
    fillcolor='rgba(0, 0, 255, 0.9)',
    line=dict(color='blue', width=1),
))

# ---------------------------------------------------------------------

Y = np.array(WPSFD4131_df['WPSFD4131'])
X = np.arange(1, len(Y) + 1)
m, c = np.polyfit(X, Y, 1)

# ---------------------------------------------------------------------

fig.add_trace(go.Scatter(
    x=WPSFD4131_df.index, 
    y=m * X + c, 
    name='Trend (LR)',
    line=dict(color='red', width=1),
    showlegend=True
))

fig.update_layout(
    title=dict(text='Producer Price Index by Commodity: Final Demand: Finished Goods Less Foods and Energy (WPSFD4131)'),
    width=1400,
    height=900,
    plot_bgcolor="black",
    paper_bgcolor="#212121",
    font=dict(color="white"),
    hovermode="x unified",
    yaxis=dict(title="Index 1982=100", exponentformat="none", showgrid=False, zeroline=False),
    barmode="group",
    legend=dict(x=0.5, y=-0.2, orientation="h"),
    xaxis=dict(showgrid=False),
)

fig.show()

- **WPSFD4131 vs GDP Growth**

In [33]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=WPSFD4131_df.index, 
    y=[100 * stats.mean] * (stats.nobs + 1), 
    name='PMI 50 Points',
    line=dict(width=2),
    hoverinfo="skip",
    showlegend=False,
    marker=dict(color='yellow'),
))

fig.add_trace(go.Bar(
    x=WPSFD4131_df.index,
    y=WPSFD4131_df['^ %'] * 100,
    name='WPSFD4131',
    marker=dict(line=dict(color='rgba(109,215,253, 0.9)', width=2)),
    width=0.5,
))

fig.add_trace(go.Bar(
    x=filtered_gdp_df.index,
    y=filtered_gdp_df['GDP'],
    name='GDP',
    yaxis="y2",
    marker=dict(line=dict(color='rgba(255, 0, 0, .9)', width=2)),
    width=0.5,
))

# ------------------------------------------------------------------------
level_a = 0  # Level on left y-axis
level_b = 0  # Matching level on right y-axis

# Define the ranges for each axis
range_left = [(WPSFD4131_df['^ %'] * 100).min() - .25, (WPSFD4131_df['^ %'] * 100).max() + .25]  # Min and max for right y-axis (to be adjusted)
range_right = [filtered_gdp_df['GDP'].min() - 5, filtered_gdp_df['GDP'].max() + 5]  # Min and max for left y-axis

# Calculate scaling factor to align levels
proportion = (level_a - range_left[0]) / (range_left[1] - range_left[0])  # Position of level_a
range_right[1] = range_right[0] + (level_b - range_right[0]) / proportion  # Adjust right range
# ------------------------------------------------------------------------

fig.update_layout(
    title=dict(text='WPSFD4131 pct change vs GDP Growth'),
    plot_bgcolor="black",
    paper_bgcolor="#212121",
    font=dict(color="white"),
    width=1400,
    height=700,
    hovermode="x unified",
    yaxis=dict(title="WPSFD4131 pct change %", exponentformat="none", range=range_left, showgrid=False),
    yaxis2=dict(title="Real GDP change in %", overlaying="y", side="right", range=range_right, showgrid=False),
    barmode="group",
    legend=dict(x=0.5, y=-0.2, orientation="h"),
    xaxis=dict(showgrid=False),
)

fig.show()