In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from pandas.plotting import autocorrelation_plot
import statsmodels.api as sm

In [22]:
df = pd.read_csv("./Month_Value_1.csv")
df.head()

Unnamed: 0,Period,Revenue,Sales_quantity,Average_cost,The_average_annual_payroll_of_the_region
0,01.01.2015,16010070.0,12729.0,1257.763541,30024676.0
1,01.02.2015,15807590.0,11636.0,1358.507,30024676.0
2,01.03.2015,22047150.0,15922.0,1384.697024,30024676.0
3,01.04.2015,18814580.0,15227.0,1235.606705,30024676.0
4,01.05.2015,14021480.0,8620.0,1626.621765,30024676.0


# Time Series Analysis

In this notebook, we'll perform a comprehensive time series analysis to identify:
- Trend
- Seasonality
- Cyclical patterns
- Test for stationarity

In [None]:
# Data preparation - convert 'Period' to datetime and set as index
df['Period'] = pd.to_datetime(df['Period'], format='%d.%m.%Y')
df = df.set_index('Period')

# Drop NaN values for analysis
df_clean = df.dropna()

# Display the processed dataframe
print(df_clean.info())
df_clean.head()

In [None]:
# Visualize the time series data
plt.figure(figsize=(14, 8))

# Plot Revenue
plt.subplot(2, 2, 1)
plt.plot(df_clean.index, df_clean['Revenue'])
plt.title('Revenue Over Time')
plt.xticks(rotation=45)

# Plot Sales Quantity
plt.subplot(2, 2, 2)
plt.plot(df_clean.index, df_clean['Sales_quantity'])
plt.title('Sales Quantity Over Time')
plt.xticks(rotation=45)

# Plot Average Cost
plt.subplot(2, 2, 3)
plt.plot(df_clean.index, df_clean['Average_cost'])
plt.title('Average Cost Over Time')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

## Time Series Decomposition

We'll decompose the time series to identify:
1. Trend component
2. Seasonal component
3. Residual component

In [None]:
# Define function to perform decomposition and plot results
def decompose_series(series, title, model='additive', period=12):
    # Perform decomposition
    decomposition = seasonal_decompose(series, model=model, period=period)
    
    # Plot decomposition
    fig = plt.figure(figsize=(14, 12))
    
    # Original
    ax1 = fig.add_subplot(511)
    ax1.plot(series)
    ax1.set_title(f'Original {title} Time Series')
    
    # Trend
    ax2 = fig.add_subplot(512)
    ax2.plot(decomposition.trend)
    ax2.set_title('Trend Component')
    
    # Seasonality
    ax3 = fig.add_subplot(513)
    ax3.plot(decomposition.seasonal)
    ax3.set_title('Seasonal Component')
    
    # Residual
    ax4 = fig.add_subplot(514)
    ax4.plot(decomposition.resid)
    ax4.set_title('Residual Component')
    
    # Cycle (Using 12-month rolling mean of residuals)
    ax5 = fig.add_subplot(515)
    residual_cycle = decomposition.resid.rolling(window=6).mean()
    ax5.plot(residual_cycle)
    ax5.set_title('Cyclical Component (Rolling Mean of Residuals)')
    
    plt.tight_layout()
    plt.show()
    
    return decomposition

In [None]:
# Decompose Revenue time series
revenue_decomposition = decompose_series(df_clean['Revenue'], 'Revenue')

# Decompose Sales Quantity time series
sales_decomposition = decompose_series(df_clean['Sales_quantity'], 'Sales Quantity')

## Stationarity Testing

A stationary time series has constant statistical properties over time (mean, variance, autocorrelation).
We'll use the Augmented Dickey-Fuller (ADF) test to check for stationarity.

In [None]:
# Function to perform ADF test
def adf_test(series, title):
    print(f'Augmented Dickey-Fuller Test: {title}')
    result = adfuller(series.dropna())
    
    # Format and print the results
    labels = ['ADF Test Statistic', 'p-value', '# Lags Used', '# Observations']
    out = pd.Series(result[0:4], index=labels)
    
    for key, value in result[4].items():
        out[f'Critical Value ({key})'] = value
        
    print(out.to_string())
    
    # Print conclusion
    if result[1] <= 0.05:
        print("\nStrong evidence against the null hypothesis")
        print("Reject the null hypothesis")
        print("Data is stationary")
    else:
        print("\nWeak evidence against the null hypothesis")
        print("Fail to reject the null hypothesis")
        print("Data is non-stationary")
    print('\n' + '-'*50)

In [None]:
# Perform ADF test on Revenue
adf_test(df_clean['Revenue'], 'Revenue')

# Perform ADF test on Sales Quantity
adf_test(df_clean['Sales_quantity'], 'Sales Quantity')

# Perform ADF test on Average Cost
adf_test(df_clean['Average_cost'], 'Average Cost')

## Autocorrelation Analysis

Autocorrelation shows the correlation between a time series and its lagged values.
This helps identify seasonality and patterns in the data.

In [None]:
# Create ACF and PACF plots
def plot_acf_pacf(series, title, lags=40):
    plt.figure(figsize=(14, 8))
    
    # ACF plot
    plt.subplot(211)
    sm.graphics.tsa.plot_acf(series.dropna(), lags=lags, ax=plt.gca())
    plt.title(f'Autocorrelation Function (ACF) for {title}')
    
    # PACF plot
    plt.subplot(212)
    sm.graphics.tsa.plot_pacf(series.dropna(), lags=lags, ax=plt.gca())
    plt.title(f'Partial Autocorrelation Function (PACF) for {title}')
    
    plt.tight_layout()
    plt.show()

In [None]:
# Revenue autocorrelation
plot_acf_pacf(df_clean['Revenue'], 'Revenue')

# Sales Quantity autocorrelation
plot_acf_pacf(df_clean['Sales_quantity'], 'Sales Quantity')

## Summary of Findings

From our time series analysis:

1. **Trend Analysis**:
   - We've extracted the long-term movement in the time series data
   - The trend component shows the general direction of the data over time

2. **Seasonality**:
   - The seasonal component reveals regular patterns that repeat over fixed intervals
   - This helps identify predictable periodic fluctuations

3. **Cyclical Patterns**:
   - We've analyzed longer-term cycles that aren't fixed like seasonality
   - This helps identify business cycles and other irregular patterns

4. **Stationarity Test**:
   - The ADF test results indicate whether the time series is stationary
   - Stationarity is important for many time series models

5. **Autocorrelation Analysis**:
   - ACF and PACF plots help identify significant lags in the data
   - These aid in determining appropriate parameters for time series models like ARIMA