# Wavelet Analysis and Pattern Discovery Demo

This notebook demonstrates the wavelet analysis capabilities for financial time series:
- Continuous Wavelet Transform (CWT)
- Motif Discovery
- Shapelet Extraction
- Pattern Visualization

In [None]:
# Import required libraries
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('.'))))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Import our modules
from src.wavelet_analysis import (
    WaveletAnalyzer,
    MotifDiscovery,
    ShapeletExtractor,
    PatternVisualizer
)
from src.data_collection import StorageManager

# Set up plotting
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

## 1. Load Financial Data

In [None]:
# Initialize storage manager
storage = StorageManager(base_path="../data", storage_format="hdf5")

# Load data for a ticker
ticker = 'TSLA'
timeframe = '1d'

data = storage.load_raw_data(tickers=[ticker], timeframes=[timeframe])

if ticker in data and timeframe in data[ticker]:
    df = data[ticker][timeframe]
    print(f"Loaded {len(df)} days of {ticker} data")
    print(f"Date range: {df.index[0]} to {df.index[-1]}")
    
    # Calculate returns
    df['Returns'] = df['Close'].pct_change()
    df['LogReturns'] = np.log(df['Close'] / df['Close'].shift(1))
    
    # Display sample
    df.tail()

## 2. Wavelet Analysis

In [None]:
# Initialize wavelet analyzer
analyzer = WaveletAnalyzer(wavelet='morl')

# Perform CWT on log returns
returns = df['LogReturns'].dropna().values
coeffs, freqs = analyzer.transform(returns)

print(f"CWT coefficients shape: {coeffs.shape}")
print(f"Number of scales: {len(analyzer.scales)}")
print(f"Frequency range: {freqs.min():.4f} - {freqs.max():.4f}")

In [None]:
# Extract wavelet features
features = analyzer.extract_features(coeffs)

# Display key features
print("Wavelet Features:")
print(f"- Dominant scale: {features['dominant_scale']:.2f}")
print(f"- Number of ridges detected: {len(features['ridges'])}")
print(f"- Mean scale energy: {np.mean(features['scale_energy']):.4f}")
print(f"- Max time energy: {np.max(features['time_energy']):.4f}")

In [None]:
# Visualize wavelet transform
viz = PatternVisualizer(figsize=(14, 8))

fig = viz.plot_wavelet_transform(
    coeffs,
    analyzer.scales,
    time=df.index[1:],
    title=f"{ticker} Returns - Continuous Wavelet Transform"
)
plt.show()

In [None]:
# Detect significant patterns in wavelet domain
patterns = analyzer.detect_patterns(coeffs, min_duration=5, power_threshold=0.4)

print(f"Found {len(patterns)} significant patterns")
print("\nTop 5 patterns by power:")
for i, pattern in enumerate(patterns[:5]):
    print(f"Pattern {i+1}:")
    print(f"  - Scale: {pattern['scale']:.2f}")
    print(f"  - Time range: {pattern['start']} - {pattern['end']}")
    print(f"  - Duration: {pattern['duration']} days")
    print(f"  - Max power: {pattern['max_power']:.3f}")

## 3. Motif Discovery

In [None]:
# Initialize motif discovery
md = MotifDiscovery(window_size=20, min_distance=10)

# Find motifs in returns
motifs = md.find_motifs(returns, top_k=10)

print(f"Found {len(motifs)} motifs")
print("\nTop 5 motifs:")
for motif in motifs[:5]:
    print(f"\nMotif {motif['id']}:")
    print(f"  - Occurrences: {motif['num_occurrences']}")
    print(f"  - Primary index: {motif['primary_index']}")
    print(f"  - Mean distance: {motif['mean_distance']:.3f}")
    print(f"  - Pattern length: {len(motif['pattern'])} days")

In [None]:
# Visualize motifs
fig = viz.plot_motifs(
    returns,
    motifs,
    title=f"{ticker} - Discovered Return Motifs"
)
plt.show()

In [None]:
# Find discords (anomalies)
discords = md.find_discords(returns, top_k=5)

print("Top 5 anomalous patterns (discords):")
for discord in discords:
    date_idx = df.index[discord['index'] + 1]  # +1 because returns start from index 1
    print(f"\nDiscord at {date_idx}:")
    print(f"  - Anomaly score: {discord['anomaly_score']:.3f}")
    print(f"  - Distance to nearest neighbor: {discord['distance']:.3f}")
    print(f"  - Pattern length: {len(discord['pattern'])} days")

In [None]:
# Extract motif features for analysis
motif_features = md.get_motif_features(motifs)
print("Motif feature statistics:")
motif_features.describe()

## 4. Shapelet Extraction

In [None]:
# Create labels based on future returns
# Label 1: Positive future returns (bullish)
# Label 0: Negative future returns (bearish)
future_window = 5
future_returns = pd.Series(returns).rolling(future_window).mean().shift(-future_window).fillna(0)
labels = (future_returns > 0).astype(int).values

print(f"Label distribution:")
print(f"  - Bearish (0): {np.sum(labels == 0)} ({np.mean(labels == 0)*100:.1f}%)")
print(f"  - Bullish (1): {np.sum(labels == 1)} ({np.mean(labels == 1)*100:.1f}%)")

In [None]:
# Initialize shapelet extractor
extractor = ShapeletExtractor(
    min_length=10,
    max_length=30,
    num_shapelets=20,
    quality_threshold=0.1
)

# Extract shapelets (this may take a while)
print("Extracting shapelets...")
shapelets = extractor.extract_shapelets(returns, labels, n_jobs=1)

print(f"\nExtracted {len(shapelets)} shapelets")
print("\nTop 5 shapelets by quality:")
for i, shapelet in enumerate(shapelets[:5]):
    print(f"\nShapelet {i+1}:")
    print(f"  - Quality: {shapelet['quality']:.3f}")
    print(f"  - Length: {len(shapelet['pattern'])} days")
    print(f"  - Threshold: {shapelet['threshold']:.3f}")
    print(f"  - Mean distance: {shapelet['mean_distance']:.3f}")

In [None]:
# Visualize shapelets
fig = viz.plot_shapelets(
    shapelets[:12],
    title=f"{ticker} - Discriminative Shapelets"
)
plt.show()

In [None]:
# Get shapelet features
shapelet_features = extractor.get_shapelet_features()
print("Shapelet feature statistics:")
shapelet_features[['length', 'quality', 'threshold', 'trend']].describe()

## 5. Pattern Similarity Analysis

In [None]:
# Compare patterns from motifs and shapelets
all_patterns = []
pattern_labels = []

# Add top motif patterns
for i, motif in enumerate(motifs[:5]):
    all_patterns.append(motif['pattern'])
    pattern_labels.append(f'Motif {i+1}')

# Add top shapelet patterns
for i, shapelet in enumerate(shapelets[:5]):
    # Pad or truncate to match motif length
    pattern = shapelet['pattern']
    if len(pattern) < 20:
        pattern = np.pad(pattern, (0, 20 - len(pattern)), mode='constant')
    else:
        pattern = pattern[:20]
    all_patterns.append(pattern)
    pattern_labels.append(f'Shapelet {i+1}')

# Create similarity heatmap
fig = viz.plot_pattern_heatmap(
    all_patterns,
    labels=pattern_labels,
    title="Pattern Similarity Matrix"
)
plt.show()

## 6. Interactive Visualizations

In [None]:
# Create interactive scalogram
interactive_fig = viz.create_interactive_scalogram(
    coeffs,
    analyzer.scales,
    time=np.arange(len(returns)),
    original_data=returns,
    title=f"{ticker} - Interactive Wavelet Scalogram"
)
interactive_fig.show()

In [None]:
# Create interactive motif plot
interactive_motif_fig = viz.create_interactive_motif_plot(
    returns,
    motifs,
    title=f"{ticker} - Interactive Motif Discovery"
)
interactive_motif_fig.show()

## 7. Save Analysis Results

In [None]:
# Save shapelets for the ticker
shapelet_data = {
    timeframe: [s['pattern'] for s in shapelets]
}
storage.save_shapelets(shapelet_data, ticker)

# Save wavelet features
wavelet_features_df = pd.DataFrame([features])
storage.save_processed_data(
    wavelet_features_df,
    f'{ticker}_wavelet_features',
    category='wavelet_analysis'
)

# Save motif features
storage.save_processed_data(
    motif_features,
    f'{ticker}_motif_features',
    category='wavelet_analysis'
)

# Save shapelet features
storage.save_processed_data(
    shapelet_features,
    f'{ticker}_shapelet_features',
    category='wavelet_analysis'
)

print(f"Analysis results saved for {ticker}")

## 8. Multi-Ticker Analysis

In [None]:
# Analyze multiple tickers
tickers_to_analyze = ['TSLA', 'GME', 'AMC', 'NVDA', 'COIN']
analysis_results = {}

for ticker in tickers_to_analyze:
    print(f"\nAnalyzing {ticker}...")
    
    # Load data
    data = storage.load_raw_data(tickers=[ticker], timeframes=['1d'])
    
    if ticker not in data or '1d' not in data[ticker]:
        print(f"  No data found for {ticker}")
        continue
    
    df = data[ticker]['1d']
    returns = np.diff(np.log(df['Close'].values))
    
    # Wavelet analysis
    coeffs, _ = analyzer.transform(returns)
    features = analyzer.extract_features(coeffs)
    
    # Store results
    analysis_results[ticker] = {
        'dominant_scale': features['dominant_scale'],
        'num_ridges': len(features['ridges']),
        'mean_energy': np.mean(features['scale_energy']),
        'volatility': np.std(returns)
    }
    
    print(f"  Dominant scale: {features['dominant_scale']:.2f}")
    print(f"  Volatility: {np.std(returns):.4f}")

In [None]:
# Compare analysis results
results_df = pd.DataFrame(analysis_results).T
results_df = results_df.sort_values('volatility', ascending=False)

# Plot comparison
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Dominant scale
axes[0, 0].bar(results_df.index, results_df['dominant_scale'])
axes[0, 0].set_title('Dominant Wavelet Scale')
axes[0, 0].set_ylabel('Scale')

# Number of ridges
axes[0, 1].bar(results_df.index, results_df['num_ridges'])
axes[0, 1].set_title('Number of Wavelet Ridges')
axes[0, 1].set_ylabel('Count')

# Mean energy
axes[1, 0].bar(results_df.index, results_df['mean_energy'])
axes[1, 0].set_title('Mean Wavelet Energy')
axes[1, 0].set_ylabel('Energy')

# Volatility
axes[1, 1].bar(results_df.index, results_df['volatility'])
axes[1, 1].set_title('Return Volatility')
axes[1, 1].set_ylabel('Std Dev')

plt.tight_layout()
plt.show()

print("\nAnalysis Summary:")
print(results_df)

## Summary

This notebook demonstrated:

1. **Wavelet Analysis**: Decomposed financial returns into time-frequency components
2. **Motif Discovery**: Found recurring patterns in the time series
3. **Shapelet Extraction**: Identified discriminative subsequences for classification
4. **Pattern Visualization**: Created both static and interactive visualizations
5. **Multi-ticker Comparison**: Analyzed patterns across different stocks

The discovered patterns and shapelets can be used as features for:
- Price movement prediction
- Anomaly detection
- Market regime identification
- Trading strategy development