# Trend Analysis and Forecasting

This notebook focuses on:
- Time series trend analysis
- Seasonal decomposition
- Anomaly detection
- Financial forecasting
- Predictive modeling
- Risk analysis

## Setup and Data Loading

In [None]:
import sys
from pathlib import Path

# Add project root to path
project_root = Path().absolute().parent
sys.path.insert(0, str(project_root))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
from scipy import stats
from scipy.signal import find_peaks
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

# Configure plotting
plt.style.use('default')
sns.set_palette("husl")
%matplotlib inline

# Import project modules
from src.data_processing.database_manager import DatabaseManager
from src.analysis.trend_analysis import TrendAnalyzer
from src.analysis.financial_metrics import FinancialMetricsCalculator

print("Setup complete!")

In [None]:
# Load data
db_manager = DatabaseManager()
trend_analyzer = TrendAnalyzer()
metrics_calculator = FinancialMetricsCalculator()

# Load financial data
query = """
SELECT date, amount, category, subcategory, description, 
       store_location, payment_method
FROM financial_data
ORDER BY date
"""

df = db_manager.query_data(query)
df['date'] = pd.to_datetime(df['date'])

# Create time series data
daily_data = df.groupby('date').agg({
    'amount': ['sum', 'count', 'mean']
}).round(2)

daily_data.columns = ['total_amount', 'transaction_count', 'avg_amount']
daily_data = daily_data.reset_index()

print(f"Loaded {len(df):,} records")
print(f"Date range: {df['date'].min().date()} to {df['date'].max().date()}")
print(f"Daily data points: {len(daily_data)}")
print(f"Amount range: ${df['amount'].min():.2f} to ${df['amount'].max():.2f}")

# Display sample of daily data
print("\nDaily Data Sample:")
print(daily_data.head(10))

## Time Series Visualization

In [None]:
# Create comprehensive time series plots
fig, axes = plt.subplots(3, 1, figsize=(15, 12))

# Daily total amount
axes[0].plot(daily_data['date'], daily_data['total_amount'], 
             linewidth=1, alpha=0.7, color='blue')
axes[0].set_title('Daily Total Amount Over Time', fontsize=14)
axes[0].set_ylabel('Total Amount ($)')
axes[0].grid(True, alpha=0.3)

# Add 7-day moving average
daily_data['ma_7'] = daily_data['total_amount'].rolling(window=7).mean()
axes[0].plot(daily_data['date'], daily_data['ma_7'], 
             linewidth=2, color='red', label='7-day MA')
axes[0].legend()

# Daily transaction count
axes[1].plot(daily_data['date'], daily_data['transaction_count'], 
             linewidth=1, alpha=0.7, color='green')
axes[1].set_title('Daily Transaction Count Over Time', fontsize=14)
axes[1].set_ylabel('Number of Transactions')
axes[1].grid(True, alpha=0.3)

# Add 7-day moving average for transactions
daily_data['count_ma_7'] = daily_data['transaction_count'].rolling(window=7).mean()
axes[1].plot(daily_data['date'], daily_data['count_ma_7'], 
             linewidth=2, color='red', label='7-day MA')
axes[1].legend()

# Daily average amount
axes[2].plot(daily_data['date'], daily_data['avg_amount'], 
             linewidth=1, alpha=0.7, color='orange')
axes[2].set_title('Daily Average Transaction Amount Over Time', fontsize=14)
axes[2].set_ylabel('Average Amount ($)')
axes[2].set_xlabel('Date')
axes[2].grid(True, alpha=0.3)

# Add 7-day moving average for average amount
daily_data['avg_ma_7'] = daily_data['avg_amount'].rolling(window=7).mean()
axes[2].plot(daily_data['date'], daily_data['avg_ma_7'], 
             linewidth=2, color='red', label='7-day MA')
axes[2].legend()

plt.tight_layout()
plt.show()

# Time series statistics
print("TIME SERIES STATISTICS")
print("=" * 40)
print(f"Data points: {len(daily_data)}")
print(f"Average daily amount: ${daily_data['total_amount'].mean():,.2f}")
print(f"Standard deviation: ${daily_data['total_amount'].std():,.2f}")
print(f"Coefficient of variation: {daily_data['total_amount'].std() / daily_data['total_amount'].mean():.2f}")
print(f"Average daily transactions: {daily_data['transaction_count'].mean():.1f}")
print(f"Max daily amount: ${daily_data['total_amount'].max():,.2f}")
print(f"Min daily amount: ${daily_data['total_amount'].min():,.2f}")

## Trend Detection and Analysis

In [None]:
# Analyze trends using the TrendAnalyzer
amount_trend = trend_analyzer.detect_trend(daily_data, 'total_amount')
count_trend = trend_analyzer.detect_trend(daily_data, 'transaction_count')

print("TREND ANALYSIS RESULTS")
print("=" * 40)
print(f"Amount Trend: {amount_trend.trend_direction}")
print(f"Amount Slope: {amount_trend.slope:.4f}")
print(f"Amount R²: {amount_trend.r_squared:.4f}")
print(f"Amount P-value: {amount_trend.p_value:.6f}")
print(f"Amount Trend Strength: {amount_trend.trend_strength}")

print(f"\nTransaction Count Trend: {count_trend.trend_direction}")
print(f"Count Slope: {count_trend.slope:.4f}")
print(f"Count R²: {count_trend.r_squared:.4f}")
print(f"Count P-value: {count_trend.p_value:.6f}")
print(f"Count Trend Strength: {count_trend.trend_strength}")

# Visualize trends with regression lines
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Amount trend
x_numeric = np.arange(len(daily_data))
axes[0].scatter(daily_data['date'], daily_data['total_amount'], alpha=0.6, s=20)
axes[0].plot(daily_data['date'], amount_trend.fitted_values, 
             color='red', linewidth=2, label=f'Trend (R²={amount_trend.r_squared:.3f})')
axes[0].set_title(f'Amount Trend: {amount_trend.trend_direction}')
axes[0].set_ylabel('Total Amount ($)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Transaction count trend
axes[1].scatter(daily_data['date'], daily_data['transaction_count'], alpha=0.6, s=20, color='green')
axes[1].plot(daily_data['date'], count_trend.fitted_values, 
             color='red', linewidth=2, label=f'Trend (R²={count_trend.r_squared:.3f})')
axes[1].set_title(f'Transaction Count Trend: {count_trend.trend_direction}')
axes[1].set_ylabel('Transaction Count')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Moving averages analysis
ma_analysis = trend_analyzer.analyze_moving_averages(daily_data, 'total_amount', [7, 14, 30])

print("\nMOVING AVERAGES ANALYSIS")
print("=" * 40)
for window, metrics in ma_analysis.items():
    print(f"\n{window}-day Moving Average:")
    print(f"  Current MA: ${metrics['current_ma']:.2f}")
    print(f"  Previous MA: ${metrics['previous_ma']:.2f}")
    print(f"  Change: ${metrics['ma_change']:.2f}")
    print(f"  Trend: {metrics['ma_trend']}")
    print(f"  Volatility: {metrics['volatility']:.2f}")

# Plot moving averages
plt.figure(figsize=(15, 8))
plt.plot(daily_data['date'], daily_data['total_amount'], 
         alpha=0.3, label='Daily Amount', linewidth=1)

colors = ['red', 'blue', 'green']
for i, window in enumerate([7, 14, 30]):
    ma = daily_data['total_amount'].rolling(window=window).mean()
    plt.plot(daily_data['date'], ma, 
             color=colors[i], linewidth=2, label=f'{window}-day MA')

plt.title('Moving Averages Comparison', fontsize=14)
plt.ylabel('Amount ($)')
plt.xlabel('Date')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Seasonal Decomposition

In [None]:
# Perform seasonal decomposition if we have enough data
if len(daily_data) >= 30:  # Need at least 30 days for meaningful decomposition
    # Create a complete date range and fill missing dates
    date_range = pd.date_range(start=daily_data['date'].min(), 
                              end=daily_data['date'].max(), 
                              freq='D')
    
    complete_data = pd.DataFrame({'date': date_range})
    complete_data = complete_data.merge(daily_data, on='date', how='left')
    complete_data['total_amount'] = complete_data['total_amount'].fillna(0)
    
    # Set date as index for decomposition
    ts_data = complete_data.set_index('date')['total_amount']
    
    # Perform seasonal decomposition
    try:
        # Use additive decomposition with appropriate period
        period = min(7, len(ts_data) // 3)  # Weekly seasonality or max possible
        if period >= 3:
            decomposition = seasonal_decompose(ts_data, model='additive', period=period)
            
            # Plot decomposition
            fig, axes = plt.subplots(4, 1, figsize=(15, 12))
            
            decomposition.observed.plot(ax=axes[0], title='Original Time Series')
            decomposition.trend.plot(ax=axes[1], title='Trend Component')
            decomposition.seasonal.plot(ax=axes[2], title='Seasonal Component')
            decomposition.resid.plot(ax=axes[3], title='Residual Component')
            
            for ax in axes:
                ax.grid(True, alpha=0.3)
            
            plt.tight_layout()
            plt.show()
            
            # Analyze decomposition components
            print("SEASONAL DECOMPOSITION ANALYSIS")
            print("=" * 40)
            print(f"Decomposition Period: {period} days")
            print(f"Trend Strength: {1 - (np.var(decomposition.resid.dropna()) / np.var(decomposition.observed.dropna())):.3f}")
            print(f"Seasonal Strength: {1 - (np.var(decomposition.resid.dropna()) / np.var(decomposition.observed.dropna() - decomposition.trend.dropna())):.3f}")
            
            # Seasonal pattern analysis
            seasonal_stats = decomposition.seasonal.groupby(decomposition.seasonal.index.dayofweek).mean()
            day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
            
            print("\nSeasonal Pattern by Day of Week:")
            for i, day in enumerate(day_names):
                if i in seasonal_stats.index:
                    print(f"  {day}: {seasonal_stats.iloc[i]:.2f}")
                    
        else:
            print("Insufficient data for seasonal decomposition")
            
    except Exception as e:
        print(f"Seasonal decomposition failed: {e}")
        print("This is normal for datasets with insufficient seasonality")
else:
    print("Insufficient data for seasonal decomposition (need at least 30 days)")

## Anomaly Detection

In [None]:
# Detect anomalies using the TrendAnalyzer
anomalies = trend_analyzer.detect_anomalies(daily_data, 'total_amount')

print("ANOMALY DETECTION RESULTS")
print("=" * 40)
print(f"Total data points: {len(daily_data)}")
print(f"Anomalies detected: {len(anomalies)}")
print(f"Anomaly rate: {len(anomalies) / len(daily_data) * 100:.2f}%")

if len(anomalies) > 0:
    print("\nTop 5 Anomalies:")
    top_anomalies = anomalies.nlargest(5, 'amount')
    for idx, row in top_anomalies.iterrows():
        print(f"  {row['date'].date()}: ${row['amount']:,.2f} (Score: {row['anomaly_score']:.3f})")
    
    # Visualize anomalies
    plt.figure(figsize=(15, 8))
    
    # Plot normal data
    normal_data = daily_data[~daily_data.index.isin(anomalies.index)]
    plt.scatter(normal_data['date'], normal_data['total_amount'], 
                alpha=0.6, s=30, color='blue', label='Normal')
    
    # Plot anomalies
    plt.scatter(anomalies['date'], anomalies['amount'], 
                s=100, color='red', marker='x', label='Anomalies', linewidths=2)
    
    # Add trend line
    plt.plot(daily_data['date'], daily_data['ma_7'], 
             color='green', linewidth=2, alpha=0.7, label='7-day MA')
    
    plt.title('Anomaly Detection in Daily Amounts', fontsize=14)
    plt.ylabel('Total Amount ($)')
    plt.xlabel('Date')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()
    
    # Anomaly statistics
    print("\nANOMALY STATISTICS")
    print("=" * 40)
    print(f"Average anomaly amount: ${anomalies['amount'].mean():,.2f}")
    print(f"Average normal amount: ${normal_data['total_amount'].mean():,.2f}")
    print(f"Highest anomaly: ${anomalies['amount'].max():,.2f}")
    print(f"Lowest anomaly: ${anomalies['amount'].min():,.2f}")
    
    # Check if anomalies cluster on certain days
    anomaly_days = anomalies['date'].dt.day_name().value_counts()
    print("\nAnomalies by Day of Week:")
    for day, count in anomaly_days.items():
        print(f"  {day}: {count}")
        
else:
    print("\nNo anomalies detected in the dataset")
    
    # Still show the normal distribution
    plt.figure(figsize=(15, 6))
    
    plt.subplot(1, 2, 1)
    plt.scatter(daily_data['date'], daily_data['total_amount'], alpha=0.6, s=30)
    plt.plot(daily_data['date'], daily_data['ma_7'], color='red', linewidth=2, label='7-day MA')
    plt.title('Daily Amounts (No Anomalies Detected)')
    plt.ylabel('Total Amount ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    plt.hist(daily_data['total_amount'], bins=20, alpha=0.7, edgecolor='black')
    plt.title('Distribution of Daily Amounts')
    plt.xlabel('Total Amount ($)')
    plt.ylabel('Frequency')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## Forecasting

In [None]:
# Perform forecasting using TrendAnalyzer
forecast_days = min(30, len(daily_data) // 4)  # Forecast up to 30 days or 25% of data

if len(daily_data) >= 10:  # Need at least 10 days for forecasting
    forecast_result = trend_analyzer.forecast_trend(daily_data, 'total_amount', forecast_days)
    
    print(f"FORECASTING RESULTS ({forecast_days} days ahead)")
    print("=" * 40)
    print(f"Forecast method: {forecast_result.method}")
    print(f"Model accuracy (MAE): ${forecast_result.accuracy_metrics['mae']:.2f}")
    print(f"Model accuracy (RMSE): ${forecast_result.accuracy_metrics['rmse']:.2f}")
    print(f"Confidence level: {forecast_result.confidence_level}%")
    
    print("\nForecast Summary:")
    print(f"Average forecasted amount: ${forecast_result.forecast_values.mean():.2f}")
    print(f"Total forecasted amount: ${forecast_result.forecast_values.sum():,.2f}")
    print(f"Forecasted trend: {'Increasing' if forecast_result.forecast_values[-1] > forecast_result.forecast_values[0] else 'Decreasing'}")
    
    # Visualize forecast
    plt.figure(figsize=(15, 8))
    
    # Historical data
    plt.plot(daily_data['date'], daily_data['total_amount'], 
             color='blue', linewidth=2, label='Historical Data')
    
    # Forecast
    forecast_dates = pd.date_range(start=daily_data['date'].max() + pd.Timedelta(days=1), 
                                  periods=forecast_days, freq='D')
    
    plt.plot(forecast_dates, forecast_result.forecast_values, 
             color='red', linewidth=2, linestyle='--', label='Forecast')
    
    # Confidence intervals if available
    if hasattr(forecast_result, 'confidence_intervals') and forecast_result.confidence_intervals is not None:
        lower_bound = forecast_result.confidence_intervals['lower']
        upper_bound = forecast_result.confidence_intervals['upper']
        
        plt.fill_between(forecast_dates, lower_bound, upper_bound, 
                        alpha=0.3, color='red', label='Confidence Interval')
    
    # Add vertical line at forecast start
    plt.axvline(x=daily_data['date'].max(), color='gray', linestyle=':', alpha=0.7)
    
    plt.title(f'Financial Forecast - Next {forecast_days} Days', fontsize=14)
    plt.ylabel('Total Amount ($)')
    plt.xlabel('Date')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()
    
    # Show forecast details
    forecast_df = pd.DataFrame({
        'Date': forecast_dates,
        'Forecasted_Amount': forecast_result.forecast_values
    })
    
    print("\nDetailed Forecast:")
    print(forecast_df.head(10).to_string(index=False))
    
    if len(forecast_df) > 10:
        print("...")
        print(forecast_df.tail(5).to_string(index=False))
        
else:
    print("Insufficient data for forecasting (need at least 10 days)")

## Risk Analysis

In [None]:
# Calculate various risk metrics
print("RISK ANALYSIS")
print("=" * 40)

# Volatility analysis
daily_returns = daily_data['total_amount'].pct_change().dropna()
volatility = daily_returns.std()
annualized_volatility = volatility * np.sqrt(365)  # Annualized

print(f"Daily volatility: {volatility:.4f}")
print(f"Annualized volatility: {annualized_volatility:.4f}")

# Value at Risk (VaR) - 95% confidence
var_95 = np.percentile(daily_returns, 5)
var_99 = np.percentile(daily_returns, 1)

print(f"\nValue at Risk (95%): {var_95:.4f} ({var_95*100:.2f}%)")
print(f"Value at Risk (99%): {var_99:.4f} ({var_99*100:.2f}%)")

# Maximum drawdown
cumulative_returns = (1 + daily_returns).cumprod()
peak = cumulative_returns.expanding().max()
drawdown = (cumulative_returns - peak) / peak
max_drawdown = drawdown.min()

print(f"\nMaximum drawdown: {max_drawdown:.4f} ({max_drawdown*100:.2f}%)")

# Downside deviation
negative_returns = daily_returns[daily_returns < 0]
downside_deviation = negative_returns.std()

print(f"Downside deviation: {downside_deviation:.4f}")

# Risk visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Daily returns distribution
axes[0, 0].hist(daily_returns, bins=20, alpha=0.7, edgecolor='black')
axes[0, 0].axvline(var_95, color='red', linestyle='--', label=f'VaR 95%: {var_95:.3f}')
axes[0, 0].axvline(var_99, color='darkred', linestyle='--', label=f'VaR 99%: {var_99:.3f}')
axes[0, 0].set_title('Distribution of Daily Returns')
axes[0, 0].set_xlabel('Daily Return')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Cumulative returns
axes[0, 1].plot(daily_data['date'][1:], cumulative_returns, linewidth=2)
axes[0, 1].set_title('Cumulative Returns')
axes[0, 1].set_ylabel('Cumulative Return')
axes[0, 1].grid(True, alpha=0.3)

# Drawdown chart
axes[1, 0].fill_between(daily_data['date'][1:], drawdown, 0, alpha=0.7, color='red')
axes[1, 0].set_title('Drawdown Over Time')
axes[1, 0].set_ylabel('Drawdown')
axes[1, 0].grid(True, alpha=0.3)

# Rolling volatility
rolling_vol = daily_returns.rolling(window=7).std()
axes[1, 1].plot(daily_data['date'][1:], rolling_vol, linewidth=2, color='orange')
axes[1, 1].set_title('7-Day Rolling Volatility')
axes[1, 1].set_ylabel('Volatility')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Risk assessment
print("\nRISK ASSESSMENT")
print("=" * 40)

if annualized_volatility < 0.1:
    risk_level = "Low"
elif annualized_volatility < 0.2:
    risk_level = "Medium"
else:
    risk_level = "High"

print(f"Overall risk level: {risk_level}")
print(f"Volatility assessment: {'Stable' if volatility < 0.05 else 'Volatile' if volatility < 0.1 else 'Highly Volatile'}")
print(f"Downside risk: {'Low' if abs(max_drawdown) < 0.05 else 'Medium' if abs(max_drawdown) < 0.15 else 'High'}")

# Risk recommendations
print("\nRISK MANAGEMENT RECOMMENDATIONS:")
if volatility > 0.1:
    print("• High volatility detected - consider diversification strategies")
if abs(max_drawdown) > 0.1:
    print("• Significant drawdowns observed - implement stop-loss mechanisms")
if len(negative_returns) / len(daily_returns) > 0.4:
    print("• High frequency of negative returns - review operational efficiency")
if abs(var_95) > 0.05:
    print("• High Value at Risk - maintain adequate cash reserves")

print("• Monitor risk metrics regularly")
print("• Maintain emergency fund for unexpected downturns")
print("• Consider hedging strategies for extreme scenarios")

## Advanced Analytics

In [None]:
# Advanced statistical analysis
print("ADVANCED ANALYTICS")
print("=" * 40)

# Autocorrelation analysis
from statsmodels.tsa.stattools import acf

autocorr = acf(daily_data['total_amount'], nlags=min(20, len(daily_data)//4))
print(f"First-order autocorrelation: {autocorr[1]:.4f}")

if abs(autocorr[1]) > 0.3:
    print("Strong autocorrelation detected - data has memory")
elif abs(autocorr[1]) > 0.1:
    print("Moderate autocorrelation detected")
else:
    print("Low autocorrelation - data appears random")

# Stationarity test
from statsmodels.tsa.stattools import adfuller

adf_result = adfuller(daily_data['total_amount'].dropna())
print(f"\nStationarity Test (ADF):")
print(f"ADF Statistic: {adf_result[0]:.4f}")
print(f"P-value: {adf_result[1]:.4f}")
print(f"Data is {'stationary' if adf_result[1] < 0.05 else 'non-stationary'}")

# Periodicity detection
from scipy.fft import fft, fftfreq

if len(daily_data) > 14:  # Need enough data for FFT
    # Remove trend for better periodicity detection
    detrended = daily_data['total_amount'] - daily_data['total_amount'].rolling(window=7).mean()
    detrended = detrended.dropna()
    
    # Perform FFT
    fft_values = fft(detrended)
    frequencies = fftfreq(len(detrended))
    
    # Find dominant frequencies
    power_spectrum = np.abs(fft_values)**2
    dominant_freq_idx = np.argsort(power_spectrum)[-5:]  # Top 5 frequencies
    
    print("\nPeriodicity Analysis:")
    for i, idx in enumerate(dominant_freq_idx[::-1]):
        if frequencies[idx] != 0:  # Avoid division by zero
            period = 1 / abs(frequencies[idx])
            if 1 < period < len(detrended)/2:  # Reasonable periods only
                print(f"  Potential {period:.1f}-day cycle detected")

# Correlation with external factors (day of week, month)
daily_data['day_of_week'] = daily_data['date'].dt.dayofweek
daily_data['month'] = daily_data['date'].dt.month
daily_data['is_weekend'] = daily_data['day_of_week'].isin([5, 6])

print("\nCorrelation Analysis:")
weekend_avg = daily_data[daily_data['is_weekend']]['total_amount'].mean()
weekday_avg = daily_data[~daily_data['is_weekend']]['total_amount'].mean()

print(f"Weekend average: ${weekend_avg:.2f}")
print(f"Weekday average: ${weekday_avg:.2f}")
print(f"Weekend vs Weekday ratio: {weekend_avg/weekday_avg:.2f}")

# Month-over-month analysis
monthly_avg = daily_data.groupby('month')['total_amount'].mean()
print(f"\nBest performing month: {monthly_avg.idxmax()} (${monthly_avg.max():.2f})")
print(f"Worst performing month: {monthly_avg.idxmin()} (${monthly_avg.min():.2f})")

# Visualization of advanced analytics
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Autocorrelation plot
lags = range(len(autocorr))
axes[0, 0].bar(lags, autocorr)
axes[0, 0].axhline(y=0, color='black', linestyle='-', alpha=0.5)
axes[0, 0].set_title('Autocorrelation Function')
axes[0, 0].set_xlabel('Lag')
axes[0, 0].set_ylabel('Autocorrelation')
axes[0, 0].grid(True, alpha=0.3)

# Day of week analysis
dow_avg = daily_data.groupby('day_of_week')['total_amount'].mean()
day_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
axes[0, 1].bar(range(7), [dow_avg.get(i, 0) for i in range(7)])
axes[0, 1].set_title('Average Amount by Day of Week')
axes[0, 1].set_xlabel('Day of Week')
axes[0, 1].set_ylabel('Average Amount ($)')
axes[0, 1].set_xticks(range(7))
axes[0, 1].set_xticklabels(day_names)
axes[0, 1].grid(True, alpha=0.3)

# Monthly analysis
axes[1, 0].bar(monthly_avg.index, monthly_avg.values)
axes[1, 0].set_title('Average Amount by Month')
axes[1, 0].set_xlabel('Month')
axes[1, 0].set_ylabel('Average Amount ($)')
axes[1, 0].grid(True, alpha=0.3)

# Rolling correlation with trend
if len(daily_data) > 10:
    x = np.arange(len(daily_data))
    rolling_corr = pd.Series(daily_data['total_amount']).rolling(window=7).corr(pd.Series(x))
    axes[1, 1].plot(daily_data['date'], rolling_corr, linewidth=2)
    axes[1, 1].set_title('7-Day Rolling Correlation with Trend')
    axes[1, 1].set_ylabel('Correlation')
    axes[1, 1].axhline(y=0, color='red', linestyle='--', alpha=0.5)
    axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Summary and Insights

In [None]:
print("TREND ANALYSIS SUMMARY")
print("=" * 50)

print(f"\n📈 TREND ANALYSIS")
print(f"   Overall Trend: {amount_trend.trend_direction} (Strength: {amount_trend.trend_strength})")
print(f"   Trend Slope: {amount_trend.slope:.4f} per day")
print(f"   R-squared: {amount_trend.r_squared:.4f}")
print(f"   Statistical Significance: {'Yes' if amount_trend.p_value < 0.05 else 'No'} (p={amount_trend.p_value:.4f})")

print(f"\n📊 VOLATILITY & RISK")
print(f"   Daily Volatility: {volatility:.4f}")
print(f"   Risk Level: {risk_level}")
print(f"   Maximum Drawdown: {max_drawdown:.2f}%")
print(f"   Value at Risk (95%): {var_95:.2f}%")

print(f"\n🔍 ANOMALIES")
print(f"   Anomalies Detected: {len(anomalies)}")
print(f"   Anomaly Rate: {len(anomalies) / len(daily_data) * 100:.2f}%")
if len(anomalies) > 0:
    print(f"   Largest Anomaly: ${anomalies['amount'].max():,.2f} on {anomalies.loc[anomalies['amount'].idxmax(), 'date'].date()}")

print(f"\n🔮 FORECASTING")
if len(daily_data) >= 10:
    print(f"   Forecast Period: {forecast_days} days")
    print(f"   Forecast Method: {forecast_result.method}")
    print(f"   Predicted Average: ${forecast_result.forecast_values.mean():.2f}")
    print(f"   Forecast Accuracy (MAE): ${forecast_result.accuracy_metrics['mae']:.2f}")
else:
    print(f"   Insufficient data for forecasting")

print(f"\n📋 DATA CHARACTERISTICS")
print(f"   Data Points: {len(daily_data)}")
print(f"   Time Span: {(daily_data['date'].max() - daily_data['date'].min()).days} days")
print(f"   Stationarity: {'Stationary' if adf_result[1] < 0.05 else 'Non-stationary'}")
print(f"   Autocorrelation: {'Strong' if abs(autocorr[1]) > 0.3 else 'Moderate' if abs(autocorr[1]) > 0.1 else 'Weak'}")

print(f"\n🌟 KEY INSIGHTS")

# Trend insights
if amount_trend.trend_direction == 'increasing':
    print(f"   ✅ Positive growth trend detected")
elif amount_trend.trend_direction == 'decreasing':
    print(f"   ⚠️  Declining trend requires attention")
else:
    print(f"   ➡️  Stable trend with no significant change")

# Risk insights
if risk_level == 'High':
    print(f"   🔴 High volatility - implement risk management")
elif risk_level == 'Medium':
    print(f"   🟡 Moderate risk - monitor closely")
else:
    print(f"   🟢 Low risk environment")

# Seasonal insights
if not monthly_avg.empty:
    seasonal_var = monthly_avg.std() / monthly_avg.mean()
    if seasonal_var > 0.2:
        print(f"   📅 Strong seasonal patterns detected")
    else:
        print(f"   📅 Weak seasonal effects")

# Weekend effect
weekend_effect = (weekend_avg - weekday_avg) / weekday_avg
if abs(weekend_effect) > 0.1:
    effect_dir = "higher" if weekend_effect > 0 else "lower"
    print(f"   📆 Weekend amounts are {abs(weekend_effect)*100:.1f}% {effect_dir} than weekdays")

print(f"\n🎯 RECOMMENDATIONS")
print(f"   1. {'Maintain' if amount_trend.trend_direction == 'increasing' else 'Improve'} current trajectory")
print(f"   2. {'Implement' if risk_level == 'High' else 'Monitor'} risk management strategies")
print(f"   3. Use forecasting for {'short-term' if len(daily_data) < 30 else 'medium-term'} planning")
if len(anomalies) > 0:
    print(f"   4. Investigate anomalies for operational improvements")
print(f"   5. Leverage seasonal patterns for strategic planning")
print(f"   6. Regular monitoring of key trend indicators")

print(f"\n" + "=" * 50)