# Advanced Usage Examples

This notebook demonstrates advanced features and practical use cases of the China Stock Data library.

In [None]:
# Advanced imports and setup
from china_stock_data import StockData, StockMarket
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('seaborn-v0_8' if 'seaborn-v0_8' in plt.style.available else 'default')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

print("Advanced analysis environment ready! 🚀")

## 1. Portfolio Analysis

In [None]:
# Portfolio analysis with multiple stocks
portfolio_symbols = ['000001', '000002', '600519', '600036', '000858']
portfolio_names = ['平安银行', '万科A', '贵州茅台', '招商银行', '五粮液']
portfolio_data = {}

print("Building portfolio data...")
for symbol, name in zip(portfolio_symbols, portfolio_names):
    try:
        stock = StockData(symbol, days=252)  # ~1 year of trading days
        data = stock.get_data('kline')
        if not data.empty:
            portfolio_data[name] = data['收盘']
            print(f"✅ {name} ({symbol}): {len(data)} days")
        else:
            print(f"❌ {name} ({symbol}): No data")
    except Exception as e:
        print(f"❌ {name} ({symbol}): Error - {e}")

if portfolio_data:
    portfolio_df = pd.DataFrame(portfolio_data)
    print(f"\nPortfolio DataFrame shape: {portfolio_df.shape}")
    print(f"Date range: {portfolio_df.index.min()} to {portfolio_df.index.max()}")
else:
    print("No portfolio data available")

In [None]:
# Calculate portfolio returns and correlations
if 'portfolio_df' in locals() and not portfolio_df.empty:
    # Calculate daily returns
    returns = portfolio_df.pct_change().dropna()
    
    # Calculate cumulative returns
    cumulative_returns = (1 + returns).cumprod()
    
    # Plot normalized price performance
    plt.figure(figsize=(14, 10))
    
    # Normalize prices to start at 100
    normalized_prices = portfolio_df.div(portfolio_df.iloc[0]) * 100
    
    plt.subplot(2, 2, 1)
    for col in normalized_prices.columns:
        plt.plot(normalized_prices.index, normalized_prices[col], label=col, linewidth=2)
    plt.title('Portfolio Performance (Normalized to 100)', fontsize=14, fontweight='bold')
    plt.xlabel('Date')
    plt.ylabel('Normalized Price')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True, alpha=0.3)
    
    # Correlation heatmap
    plt.subplot(2, 2, 2)
    correlation_matrix = returns.corr()
    im = plt.imshow(correlation_matrix, cmap='RdYlBu', aspect='auto', vmin=-1, vmax=1)
    plt.colorbar(im)
    plt.title('Correlation Matrix', fontsize=14, fontweight='bold')
    plt.xticks(range(len(correlation_matrix.columns)), correlation_matrix.columns, rotation=45)
    plt.yticks(range(len(correlation_matrix.columns)), correlation_matrix.columns)
    
    # Add correlation values to heatmap
    for i in range(len(correlation_matrix.columns)):
        for j in range(len(correlation_matrix.columns)):
            plt.text(j, i, f'{correlation_matrix.iloc[i, j]:.2f}', 
                    ha='center', va='center', fontsize=8)
    
    # Risk-Return scatter plot
    plt.subplot(2, 2, 3)
    annual_returns = returns.mean() * 252
    annual_volatility = returns.std() * np.sqrt(252)
    
    colors = plt.cm.Set3(np.linspace(0, 1, len(annual_returns)))
    scatter = plt.scatter(annual_volatility, annual_returns, c=colors, s=100, alpha=0.8)
    
    for i, (vol, ret) in enumerate(zip(annual_volatility, annual_returns)):
        plt.annotate(annual_returns.index[i], (vol, ret), 
                    xytext=(5, 5), textcoords='offset points', fontsize=9)
    
    plt.title('Risk-Return Profile', fontsize=14, fontweight='bold')
    plt.xlabel('Annualized Volatility')
    plt.ylabel('Annualized Return')
    plt.grid(True, alpha=0.3)
    
    # Volume analysis
    plt.subplot(2, 2, 4)
    rolling_volatility = returns.rolling(window=30).std() * np.sqrt(252)
    
    for col in rolling_volatility.columns:
        plt.plot(rolling_volatility.index, rolling_volatility[col], label=col, alpha=0.8)
    
    plt.title('30-Day Rolling Volatility', fontsize=14, fontweight='bold')
    plt.xlabel('Date')
    plt.ylabel('Annualized Volatility')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Summary statistics
    print("\n" + "="*60)
    print("PORTFOLIO SUMMARY STATISTICS")
    print("="*60)
    
    summary_stats = pd.DataFrame({
        'Annual Return': annual_returns,
        'Annual Volatility': annual_volatility,
        'Sharpe Ratio': annual_returns / annual_volatility,
        'Max Drawdown': returns.cumsum().expanding().max() - returns.cumsum()
    })
    
    summary_stats['Max Drawdown'] = summary_stats['Max Drawdown'].min()
    
    print(summary_stats.round(4))
    
else:
    print("No portfolio data available for analysis")

## 2. Market Sentiment Analysis

In [None]:
# Market sentiment and macro analysis
market = StockMarket()

print("Fetching market sentiment data...")
try:
    sentiment_data = market.get_data('sentiment')
    print(f"Sentiment data shape: {sentiment_data.shape}")
    print(f"Columns: {list(sentiment_data.columns)}")
    
    if not sentiment_data.empty:
        # Plot sentiment indicators
        plt.figure(figsize=(15, 10))
        
        # Determine number of subplots based on available columns
        numeric_cols = sentiment_data.select_dtypes(include=[np.number]).columns
        n_plots = min(6, len(numeric_cols))  # Max 6 plots
        
        if n_plots > 0:
            rows = (n_plots + 2) // 3  # Calculate rows needed
            
            for i, col in enumerate(numeric_cols[:n_plots]):
                plt.subplot(rows, 3, i + 1)
                if sentiment_data[col].dtype in ['int64', 'float64']:
                    plt.plot(sentiment_data.index, sentiment_data[col], linewidth=2, color='steelblue')
                    plt.title(f'{col}', fontsize=12, fontweight='bold')
                    plt.xlabel('Date')
                    plt.ylabel('Value')
                    plt.grid(True, alpha=0.3)
                    plt.xticks(rotation=45)
            
            plt.tight_layout()
            plt.show()
        
        print("\nMarket Sentiment Summary:")
        print(sentiment_data.describe())
    else:
        print("No sentiment data available")
        
except Exception as e:
    print(f"Error fetching sentiment data: {e}")

## 3. Index Analysis and Comparison

In [None]:
# Index analysis and comparison
indices = {
    '000300': 'CSI 300',
    '000905': 'CSI 500', 
    '399006': 'ChiNext',
    '000001': 'Shanghai Composite'
}

print("Analyzing major indices...")
index_data = {}

for code, name in indices.items():
    try:
        market = StockMarket(code)
        
        # Get index components
        components = market.get_data('index_components')
        
        if not components.empty:
            index_data[name] = {
                'code': code,
                'components_count': len(components),
                'components': components
            }
            print(f"✅ {name} ({code}): {len(components)} components")
        else:
            print(f"❌ {name} ({code}): No components data")
            
    except Exception as e:
        print(f"❌ {name} ({code}): Error - {e}")

# Index composition analysis
if index_data:
    plt.figure(figsize=(15, 8))
    
    # Plot 1: Number of components
    plt.subplot(2, 3, 1)
    names = list(index_data.keys())
    counts = [index_data[name]['components_count'] for name in names]
    colors = plt.cm.Set3(np.linspace(0, 1, len(names)))
    
    bars = plt.bar(names, counts, color=colors, alpha=0.8)
    plt.title('Index Components Count', fontsize=12, fontweight='bold')
    plt.ylabel('Number of Stocks')
    plt.xticks(rotation=45)
    
    # Add value labels on bars
    for bar, count in zip(bars, counts):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5, 
                str(count), ha='center', va='bottom', fontweight='bold')
    
    plt.grid(True, alpha=0.3)
    
    # Sector analysis if available
    plt.subplot(2, 3, 2)
    # This would require sector classification data
    plt.text(0.5, 0.5, 'Sector Analysis\n(Requires sector data)', 
             ha='center', va='center', transform=plt.gca().transAxes,
             fontsize=12, bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
    plt.title('Sector Distribution', fontsize=12, fontweight='bold')
    
    # Market cap analysis placeholder
    plt.subplot(2, 3, 3)
    plt.text(0.5, 0.5, 'Market Cap Analysis\n(Requires market cap data)', 
             ha='center', va='center', transform=plt.gca().transAxes,
             fontsize=12, bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
    plt.title('Market Cap Distribution', fontsize=12, fontweight='bold')
    
    # Display sample components for largest index
    largest_index = max(index_data.keys(), key=lambda x: index_data[x]['components_count'])
    sample_components = index_data[largest_index]['components'].head(10)
    
    plt.subplot(2, 1, 2)
    plt.axis('off')
    
    # Create table of top components
    if not sample_components.empty:
        table_data = []
        display_cols = ['股票代码', '股票名称']
        
        # Add weight column if available
        if '权重' in sample_components.columns:
            display_cols.append('权重')
        
        available_cols = [col for col in display_cols if col in sample_components.columns]
        
        for idx, row in sample_components.iterrows():
            row_data = []
            for col in available_cols:
                value = row[col]
                if col == '权重' and isinstance(value, (int, float)):
                    row_data.append(f"{value:.2f}%")
                else:
                    row_data.append(str(value))
            table_data.append(row_data)
        
        table = plt.table(cellText=table_data, colLabels=available_cols,
                         cellLoc='center', loc='center', 
                         colWidths=[0.15] * len(available_cols))
        table.auto_set_font_size(False)
        table.set_fontsize(10)
        table.scale(1, 2)
        
        # Style the table
        for i in range(len(available_cols)):
            table[(0, i)].set_facecolor('#4CAF50')
            table[(0, i)].set_text_props(weight='bold', color='white')
        
        plt.title(f'Top 10 Components - {largest_index}', 
                 fontsize=14, fontweight='bold', pad=20)
    
    plt.tight_layout()
    plt.show()
    
    print("\n" + "="*60)
    print("INDEX COMPOSITION SUMMARY")
    print("="*60)
    
    for name, data in index_data.items():
        print(f"{name} ({data['code']}): {data['components_count']} stocks")
        
else:
    print("No index data available for analysis")

## 4. Technical Analysis Dashboard

In [None]:
# Technical analysis dashboard
def calculate_technical_indicators(data):
    """Calculate various technical indicators"""
    df = data.copy()
    
    # Moving averages
    df['MA5'] = df['收盘'].rolling(5).mean()
    df['MA10'] = df['收盘'].rolling(10).mean()
    df['MA20'] = df['收盘'].rolling(20).mean()
    df['MA60'] = df['收盘'].rolling(60).mean()
    
    # RSI
    delta = df['收盘'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    
    # MACD
    exp1 = df['收盘'].ewm(span=12).mean()
    exp2 = df['收盘'].ewm(span=26).mean()
    df['MACD'] = exp1 - exp2
    df['MACD_Signal'] = df['MACD'].ewm(span=9).mean()
    df['MACD_Histogram'] = df['MACD'] - df['MACD_Signal']
    
    # Bollinger Bands
    df['BB_Middle'] = df['收盘'].rolling(20).mean()
    bb_std = df['收盘'].rolling(20).std()
    df['BB_Upper'] = df['BB_Middle'] + (bb_std * 2)
    df['BB_Lower'] = df['BB_Middle'] - (bb_std * 2)
    
    # Volume indicators
    df['Volume_MA'] = df['成交量'].rolling(20).mean()
    df['Volume_Ratio'] = df['成交量'] / df['Volume_MA']
    
    return df

# Select a stock for technical analysis
analysis_symbol = '000001'  # Ping An Bank
stock = StockData(analysis_symbol, days=200)
data = stock.get_data('kline')

if not data.empty:
    # Calculate technical indicators
    tech_data = calculate_technical_indicators(data)
    
    # Create technical analysis dashboard
    fig, axes = plt.subplots(4, 1, figsize=(15, 16), 
                            gridspec_kw={'height_ratios': [3, 1, 1, 1]})
    
    # Main price chart with moving averages and Bollinger Bands
    ax1 = axes[0]
    ax1.plot(tech_data.index, tech_data['收盘'], label='Close Price', linewidth=2, color='black')
    ax1.plot(tech_data.index, tech_data['MA5'], label='MA5', alpha=0.8, color='red')
    ax1.plot(tech_data.index, tech_data['MA20'], label='MA20', alpha=0.8, color='blue')
    ax1.plot(tech_data.index, tech_data['MA60'], label='MA60', alpha=0.8, color='green')
    
    # Bollinger Bands
    ax1.fill_between(tech_data.index, tech_data['BB_Upper'], tech_data['BB_Lower'], 
                     alpha=0.2, color='gray', label='Bollinger Bands')
    ax1.plot(tech_data.index, tech_data['BB_Upper'], alpha=0.5, color='gray', linestyle='--')
    ax1.plot(tech_data.index, tech_data['BB_Lower'], alpha=0.5, color='gray', linestyle='--')
    
    ax1.set_title(f'Technical Analysis Dashboard - {analysis_symbol}', 
                  fontsize=16, fontweight='bold')
    ax1.set_ylabel('Price (¥)', fontsize=12)
    ax1.legend(loc='upper left')
    ax1.grid(True, alpha=0.3)
    
    # Volume chart
    ax2 = axes[1]
    ax2.bar(tech_data.index, tech_data['成交量'], alpha=0.6, color='steelblue')
    ax2.plot(tech_data.index, tech_data['Volume_MA'], color='red', linewidth=2, label='Volume MA')
    ax2.set_ylabel('Volume', fontsize=12)
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # RSI chart
    ax3 = axes[2]
    ax3.plot(tech_data.index, tech_data['RSI'], color='purple', linewidth=2)
    ax3.axhline(y=70, color='red', linestyle='--', alpha=0.7, label='Overbought (70)')
    ax3.axhline(y=30, color='green', linestyle='--', alpha=0.7, label='Oversold (30)')
    ax3.fill_between(tech_data.index, 30, 70, alpha=0.1, color='yellow')
    ax3.set_ylabel('RSI', fontsize=12)
    ax3.set_ylim(0, 100)
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # MACD chart
    ax4 = axes[3]
    ax4.plot(tech_data.index, tech_data['MACD'], label='MACD', linewidth=2, color='blue')
    ax4.plot(tech_data.index, tech_data['MACD_Signal'], label='Signal', linewidth=2, color='red')
    ax4.bar(tech_data.index, tech_data['MACD_Histogram'], alpha=0.6, color='green', label='Histogram')
    ax4.axhline(y=0, color='black', linestyle='-', alpha=0.5)
    ax4.set_ylabel('MACD', fontsize=12)
    ax4.set_xlabel('Date', fontsize=12)
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    # Format x-axis for all subplots
    for ax in axes:
        ax.tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    # Current technical signals
    latest_data = tech_data.iloc[-1]
    
    print("\n" + "="*60)
    print(f"TECHNICAL ANALYSIS SUMMARY - {analysis_symbol}")
    print("="*60)
    
    print(f"Current Price: ¥{latest_data['收盘']:.2f}")
    print(f"RSI: {latest_data['RSI']:.2f} {'(Overbought)' if latest_data['RSI'] > 70 else '(Oversold)' if latest_data['RSI'] < 30 else '(Neutral)'}")
    
    # Moving average signals
    ma_signals = []
    if latest_data['收盘'] > latest_data['MA5']:
        ma_signals.append("Above MA5 (Bullish)")
    if latest_data['收盘'] > latest_data['MA20']:
        ma_signals.append("Above MA20 (Bullish)")
    if latest_data['收盘'] > latest_data['MA60']:
        ma_signals.append("Above MA60 (Long-term Bullish)")
    
    print(f"Moving Average Signals: {', '.join(ma_signals) if ma_signals else 'Below major MAs (Bearish)'}")
    
    # Bollinger Bands position
    bb_position = "Middle"
    if latest_data['收盘'] > latest_data['BB_Upper']:
        bb_position = "Above Upper Band (Overbought)"
    elif latest_data['收盘'] < latest_data['BB_Lower']:
        bb_position = "Below Lower Band (Oversold)"
    
    print(f"Bollinger Bands: {bb_position}")
    
    # Volume analysis
    volume_signal = "High" if latest_data['Volume_Ratio'] > 1.5 else "Low" if latest_data['Volume_Ratio'] < 0.5 else "Normal"
    print(f"Volume: {volume_signal} ({latest_data['Volume_Ratio']:.2f}x average)")
    
    print("\n📊 Technical analysis complete!")
    
else:
    print(f"No data available for technical analysis of {analysis_symbol}")

## 5. Summary and Next Steps

In [None]:
print("🎉 Advanced Usage Examples Completed!")
print("\n" + "="*60)
print("WHAT YOU'VE LEARNED:")
print("="*60)
print("1. ✅ Portfolio Analysis & Risk Management")
print("   - Multi-stock comparison")
print("   - Correlation analysis")
print("   - Risk-return profiling")
print("\n2. ✅ Market Sentiment Analysis")
print("   - Market indicators visualization")
print("   - Sentiment data interpretation")
print("\n3. ✅ Index Analysis")
print("   - Index composition analysis")
print("   - Cross-index comparison")
print("   - Component stock analysis")
print("\n4. ✅ Technical Analysis Dashboard")
print("   - Multiple technical indicators")
print("   - Signal interpretation")
print("   - Comprehensive charting")

print("\n" + "="*60)
print("NEXT STEPS:")
print("="*60)
print("1. 🔍 Explore more stocks and time periods")
print("2. 📈 Build custom trading strategies")
print("3. 🤖 Implement automated analysis workflows")
print("4. 📊 Create custom visualization dashboards")
print("5. 📝 Export analysis results for reporting")

print("\n💡 Happy analyzing! The china_stock_data library provides")
print("   powerful tools for Chinese stock market analysis.")
print("\n📚 Check out other example notebooks for more use cases!")