In [261]:
import numpy as np
import pandas as pd
import plotly.express as px

In [262]:
# Generate 3 years of sinusoidal data with trend and noise
np.random.seed(42)

# Parameters for the sinusoidal function and trend
periods = 48  # 3 years of monthly data
time = np.arange(1, periods + 1)
amplitude = 4  # Amplitude of the sine wave
frequency = 1 / 12  # Monthly seasonality (12-month period)
noise_std = 0.5  # Standard deviation of the noise
trend_slope = 0.5  # Slope for the trend
offset = 5  # Offset to ensure all values are positive

# Generate sinusoidal signal
signal = amplitude * np.sin(2 * np.pi * frequency * time)

# Add linear trend
trend = trend_slope * time

# Add random noise
noise = np.random.normal(0, noise_std, periods)

# Combine signal, trend, and noise, and add offset to avoid negative values
data_with_trend_noise = signal + trend + noise + offset

# Create a DataFrame for 3 years of data
dates = pd.date_range(start="2020-01-01", periods=periods, freq='MS')
df = pd.DataFrame({'date': dates, 'value': data_with_trend_noise})

#df['date'] = df['date'].dt.strftime('%Y-%m')

df['date'] = df['date'].dt.to_period('M')
df['month'] = df['date'].dt.month

df.head(13)

Unnamed: 0,date,value,month
0,2020-01,7.748357,1
1,2020-02,9.394969,2
2,2020-03,10.823844,3
3,2020-04,11.225617,4
4,2020-05,9.382923,5
5,2020-06,7.882932,6
6,2020-07,7.289606,7
7,2020-08,5.919616,8
8,2020-09,5.265263,9
9,2020-10,6.807178,10


In [263]:
x=df['date'].dt.strftime('%Y-%m')

# Plot using Plotly Express
fig = px.line(df, x=x, y='value', 
              title='Passengers', 
              labels={'Date': 'Date', 'Value': 'Value'},
              markers=True,
              width=1000,
              height=500
              )

# Show the plot
fig.show()

In [264]:
df['ma'] = df['value'].rolling(window=12).mean()
df = df.dropna().reset_index(drop=True)
df.head(5)

Unnamed: 0,date,value,month,ma
0,2020-12,10.767135,12,8.397978
1,2021-01,13.620981,1,8.887363
2,2021-02,14.507461,2,9.313404
3,2021-03,15.637541,3,9.714545
4,2021-04,16.182958,4,10.127657


In [265]:
df['detrend'] = df['value']-df['ma']
df.head(5)

Unnamed: 0,date,value,month,ma,detrend
0,2020-12,10.767135,12,8.397978,2.369157
1,2021-01,13.620981,1,8.887363,4.733618
2,2021-02,14.507461,2,9.313404,5.194057
3,2021-03,15.637541,3,9.714545,5.922996
4,2021-04,16.182958,4,10.127657,6.055301


In [273]:
df2 = (df.groupby('month')
                    .agg({'detrend':'mean'})
                    .reset_index(drop=True)
                    .rename(columns={'detrend':'seasonal_mean'})
)

df2

Unnamed: 0,seasonal_mean
0,4.81984
1,5.697828
2,6.18749
3,6.372406
4,4.74725
5,2.919772
6,0.640095
7,-0.515124
8,-1.064548
9,-0.84061


<img src="pics/z-score.png" style="width: 30%;"/>

In [274]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

pokus = scaler.fit_transform(df2['seasonal_mean'].to_numpy().reshape(-1,1))
pokus

array([[ 0.77938617],
       [ 1.10460795],
       [ 1.28598762],
       [ 1.35448355],
       [ 0.75249732],
       [ 0.07556756],
       [-0.7688647 ],
       [-1.19677809],
       [-1.40029437],
       [-1.31734371],
       [-0.62483933],
       [-0.04440998]])

In [275]:
df2['SF'] = (df2['seasonal_mean'] - df2['seasonal_mean'].mean()) / df2['seasonal_mean'].std()
df2.index = df2.index + 1
df2

Unnamed: 0,seasonal_mean,SF
1,4.81984,0.746205
2,5.697828,1.057582
3,6.18749,1.231239
4,6.372406,1.296819
5,4.74725,0.720461
6,2.919772,0.07235
7,0.640095,-0.736132
8,-0.515124,-1.145828
9,-1.064548,-1.34068
10,-0.84061,-1.261261


In [276]:
df['SF'] = df['month'].map(df2['SF'])
df.head(12)

Unnamed: 0,date,value,month,ma,detrend,SF
0,2020-12,10.767135,12,8.397978,2.369157,-0.042519
1,2021-01,13.620981,1,8.887363,4.733618,0.746205
2,2021-02,14.507461,2,9.313404,5.194057,1.057582
3,2021-03,15.637541,3,9.714545,5.922996,1.231239
4,2021-04,16.182958,4,10.127657,6.055301,1.296819
5,2021-05,14.993584,5,10.595212,4.398372,0.720461
6,2021-06,14.157124,6,11.118062,3.039062,0.07235
7,2021-07,12.045988,7,11.514427,0.531561,-0.736132
8,2021-08,10.829747,8,11.923604,-1.093858,-1.145828
9,2021-09,12.232824,9,12.504234,-0.27141,-1.34068


In [277]:
df['des'] = df['ma']-df['SF']
df

Unnamed: 0,date,value,month,ma,detrend,SF,des
0,2020-12,10.767135,12,8.397978,2.369157,-0.042519,8.440497
1,2021-01,13.620981,1,8.887363,4.733618,0.746205,8.141158
2,2021-02,14.507461,2,9.313404,5.194057,1.057582,8.255822
3,2021-03,15.637541,3,9.714545,5.922996,1.231239,8.483306
4,2021-04,16.182958,4,10.127657,6.055301,1.296819,8.830838
5,2021-05,14.993584,5,10.595212,4.398372,0.720461,9.874751
6,2021-06,14.157124,6,11.118062,3.039062,0.07235,11.045711
7,2021-07,12.045988,7,11.514427,0.531561,-0.736132,12.250559
8,2021-08,10.829747,8,11.923604,-1.093858,-1.145828,13.069432
9,2021-09,12.232824,9,12.504234,-0.27141,-1.34068,13.844914


In [248]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import acf
import plotly.express as px

from plotly.subplots import make_subplots
from statsmodels.tsa.seasonal import DecomposeResult, seasonal_decompose
import plotly.graph_objects as go

from statsmodels.graphics.tsaplots import plot_acf

from scipy.optimize import minimize

import statsmodels.api as sm

In [249]:
# Perform seasonal decomposition
decomposition = seasonal_decompose(df['value'], model='additive', period=12)

# Extract seasonal component
seasonal = decomposition.seasonal

# Detrended data (without seasonal component)
detrended = df['value'] - seasonal

detrended

0     11.319811
1     11.602904
2     11.423419
3     12.156115
4     12.438680
5     12.811443
6     13.958035
7     14.256931
8     14.016537
9     15.706119
10    16.056356
11    16.185769
12    16.840301
13    17.209732
14    18.435520
15    18.443077
16    18.907673
17    19.017539
18    19.655064
19    20.410090
20    21.648828
21    20.966546
22    21.640389
23    22.563277
24    22.942254
25    23.586355
26    23.400224
27    24.354480
28    24.818254
29    25.687092
30    25.886595
31    26.653119
32    26.572137
33    26.234033
34    27.809322
35    27.921685
36    30.081237
dtype: float64

In [None]:
# Create a DataFrame to store the results
result_df = pd.DataFrame({
    'date': df.index,
    'original': df['value'],
    'seasonal': seasonal,
    'detrended': detrended
})

# Plotting using Plotly Express
fig = px.line(result_df, x='date', y=['original', 'seasonal', 'detrended'],
              labels={'value': 'Value', 'date': 'Date'},
              title='Seasonal Decomposition of Time Series')

fig.show()

In [251]:
import numpy as np

# Assuming you have monthly data and a 12-month cycle (period=12)
window = 12

# Step 1: Calculate the moving average (trend)
trend = df['value'].rolling(window=window, center=True).mean()

# Step 2: Detrend the data (original data - trend)
detrended = df['value'] - trend

# Step 3: Calculate the seasonal component (average per month)
seasonal = detrended.groupby(df.index.month).mean()

# Step 4: Remove the seasonal component (for each month, subtract the seasonal average)
# Adjust for mean
adjusted_seasonal = detrended - seasonal[df.index.month]

# Plot to compare
plt.figure(figsize=(10, 6))
plt.subplot(3, 1, 1)
plt.plot(df['value'], label='Original Data')
plt.title('Original Data')
plt.subplot(3, 1, 2)
plt.plot(trend, label='Trend', color='green')
plt.title('Trend')
plt.subplot(3, 1, 3)
plt.plot(seasonal, label='Seasonal', color='orange')
plt.title('Seasonal Component')
plt.tight_layout()
plt.show()


AttributeError: 'RangeIndex' object has no attribute 'month'