#Problem Statement 1: Data Exploration & Visualization

1.1 Obtaining Data

In [1]:
# Import required libraries
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from scipy.stats import binom
import warnings
warnings.filterwarnings('ignore')

def fetch_stock_data(symbol, period="1y"):
    """
    Fetch stock data using yfinance for NSE stocks
    """
    try:
        # Add .NS for NSE stocks
        if not symbol.endswith('.NS'):
            symbol += '.NS'

        stock = yf.Ticker(symbol)
        data = stock.history(period=period)

        # Clean and inspect data
        print(f"Data shape: {data.shape}")
        print(f"Date range: {data.index[0]} to {data.index[-1]}")
        print(f"Missing values: {data.isnull().sum().sum()}")

        # Handle missing values if any
        data = data.dropna()

        # Ensure datetime indexing
        data.index = pd.to_datetime(data.index)

        return data
    except Exception as e:
        print(f"Error fetching data: {e}")
        return None

# Fetch data for Reliance Industries (popular NSE stock)
stock_symbol = "RELIANCE"
stock_data = fetch_stock_data(stock_symbol)

if stock_data is not None:
    print(f"\nSuccessfully fetched data for {stock_symbol}")
    print(stock_data.head())
    print(f"\nData Info:")
    print(stock_data.info())

Data shape: (249, 7)
Date range: 2024-06-12 00:00:00+05:30 to 2025-06-12 00:00:00+05:30
Missing values: 0

Successfully fetched data for RELIANCE
                                  Open         High          Low        Close  \
Date                                                                            
2024-06-12 00:00:00+05:30  1457.500000  1476.000000  1455.175049  1463.324951   
2024-06-13 00:00:00+05:30  1472.000000  1472.250000  1460.099976  1465.250000   
2024-06-14 00:00:00+05:30  1470.000000  1479.675049  1457.224976  1477.550049   
2024-06-18 00:00:00+05:30  1487.349976  1487.400024  1471.349976  1481.025024   
2024-06-19 00:00:00+05:30  1487.500000  1487.500000  1456.000000  1458.650024   

                             Volume  Dividends  Stock Splits  
Date                                                          
2024-06-12 00:00:00+05:30  10081742        0.0           0.0  
2024-06-13 00:00:00+05:30   9181160        0.0           0.0  
2024-06-14 00:00:00+05:30   815799

1.2 Visual Insight - Interactive Candlestick Chart

In [17]:
def create_candlestick_chart(data, title):
    """
    Create interactive candlestick chart with pattern annotations
    """
    fig = go.Figure()

    # Remove timezone information for better compatibility with plotly
    data_tz_naive = data.copy()
    if data_tz_naive.index.tz is not None:
        data_tz_naive.index = data_tz_naive.index.tz_localize(None)

    # Add candlestick chart
    fig.add_trace(go.Candlestick(
        x=data_tz_naive.index,
        open=data_tz_naive['Open'],
        high=data_tz_naive['High'],
        low=data_tz_naive['Low'],
        close=data_tz_naive['Close'],
        name="Price"
    ))

    # Add volume as secondary plot
    fig.add_trace(go.Bar(
        x=data_tz_naive.index,
        y=data_tz_naive['Volume'],
        name='Volume',
        yaxis='y2',
        marker_color='rgba(158,202,225,0.5)'
    ))

    # Find significant price movements for annotations
    daily_returns = data_tz_naive['Close'].pct_change()
    significant_moves = daily_returns[abs(daily_returns) > 0.05]

    if len(significant_moves) > 0:
        # Annotate top 2 significant movements
        top_moves = significant_moves.abs().nlargest(2)

        for i, (date, return_val) in enumerate(top_moves.items()):
            actual_return = daily_returns[date]
            color = "red" if actual_return < 0 else "green"

            # Add vertical line using add_shape instead of add_vline
            fig.add_shape(
                type="line",
                x0=date, x1=date,
                y0=0, y1=1,
                yref="paper",
                line=dict(color=color, width=2, dash="dash")
            )

            # Add annotation separately
            fig.add_annotation(
                x=date,
                y=data_tz_naive.loc[date, 'High'] * 1.05,  # Position above the high
                text=f"Pattern {i+1}: {actual_return:.2%}",
                showarrow=True,
                arrowhead=2,
                arrowcolor=color,
                bgcolor="white",
                bordercolor=color,
                borderwidth=1
            )

    # Update layout
    fig.update_layout(
        title=f'{title} - Interactive Candlestick Chart with Volume',
        yaxis_title='Price (₹)',
        yaxis2=dict(
            title='Volume',
            overlaying='y',
            side='right',
            range=[0, data_tz_naive['Volume'].max() * 4]  # Scale volume appropriately
        ),
        xaxis_title='Date',
        template='plotly_white',
        height=700,
        showlegend=True
    )

    return fig

# Create and display the chart
if stock_data is not None:
    fig = create_candlestick_chart(stock_data, stock_symbol)
    fig.show()

Pattern Analysis
Pattern 1: [Significant Drop/Rise]

What happened: [Describe the first annotated pattern]
Possible causes: This could be due to quarterly earnings announcement, market-wide correction, company-specific news, or broader economic events affecting the sector.

Pattern 2: [Gap/Trend Reversal]

What happened: [Describe the second annotated pattern]
Possible causes: Gap openings often occur due to overnight news, pre-market trading activity, or significant corporate announcements that affect investor sentiment.

#Problem Statement 2: Returns & Volatility

2.1 Return Computation

In [8]:
def calculate_returns_and_volatility(data):
    """
    Calculate simple returns, log returns, and rolling volatility
    """
    # Calculate simple returns
    data['Simple_Returns'] = data['Close'].pct_change()

    # Calculate log returns
    data['Log_Returns'] = np.log(data['Close'] / data['Close'].shift(1))

    # Calculate 14-day rolling volatility
    data['Rolling_Volatility'] = data['Simple_Returns'].rolling(window=14).std()

    return data

# Apply calculations
if stock_data is not None:
    stock_data = calculate_returns_and_volatility(stock_data)

    # Create visualization
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Simple Returns', 'Log Returns',
                       'Price vs 14-Day Rolling Volatility', 'Simple vs Log Returns Scatter'),
        specs=[[{}, {}], [{"secondary_y": True}, {}]]
    )

    # Simple Returns
    fig.add_trace(
        go.Scatter(x=stock_data.index, y=stock_data['Simple_Returns'],
                  mode='lines', name='Simple Returns', line=dict(color='blue')),
        row=1, col=1
    )

    # Log Returns
    fig.add_trace(
        go.Scatter(x=stock_data.index, y=stock_data['Log_Returns'],
                  mode='lines', name='Log Returns', line=dict(color='red')),
        row=1, col=2
    )

    # Price (primary y-axis)
    fig.add_trace(
        go.Scatter(x=stock_data.index, y=stock_data['Close'],
                  mode='lines', name='Price', line=dict(color='green')),
        row=2, col=1
    )

    # Volatility (secondary y-axis)
    fig.add_trace(
        go.Scatter(x=stock_data.index, y=stock_data['Rolling_Volatility'],
                  mode='lines', name='14-day Volatility', line=dict(color='orange')),
        row=2, col=1, secondary_y=True
    )

    # Scatter plot comparing simple vs log returns
    fig.add_trace(
        go.Scatter(x=stock_data['Simple_Returns'], y=stock_data['Log_Returns'],
                  mode='markers', name='Simple vs Log',
                  marker=dict(color='purple', size=4, opacity=0.6)),
        row=2, col=2
    )

    fig.update_layout(height=800, title_text="Returns and Volatility Analysis")
    fig.update_yaxes(title_text="Price (₹)", row=2, col=1)
    fig.update_yaxes(title_text="Volatility", secondary_y=True, row=2, col=1)

    fig.show()

Analysis: Simple Returns vs Log Returns
Why do they look similar?
Simple returns and log returns appear very similar in the plots because for small percentage changes (which is typical for daily stock returns), the mathematical difference between them is minimal.
Mathematical relationship:

Simple Return = (P₁ - P₀) / P₀
Log Return = ln(P₁/P₀) = ln(P₁) - ln(P₀)

For small values of x, ln(1+x) ≈ x, so log returns ≈ simple returns when daily changes are small.
When might they differ significantly?

Large price movements: When daily returns exceed ±10%, the difference becomes more noticeable
Extreme market events: During market crashes or bubbles with very high volatility
Compounding effects: Over longer periods, log returns provide better compounding properties
Mathematical properties: Log returns are symmetric around zero and have better statistical properties for modeling

Key advantages of log returns:

Time-additive: sum of log returns = log of compound return
Symmetry: +50% and -33.33% simple returns correspond to +40.55% and -40.55% log returns
# Better for statistical analysis and **modeling**

#Problem Statement 3: The Probability Paradox

Setup and Classification

In [9]:
def probability_paradox_analysis(data):
    """
    Comprehensive analysis of the probability paradox
    """
    # Setup: Classify days as UP/DOWN
    data['Daily_Return'] = data['Close'].pct_change()
    data['Direction'] = data['Daily_Return'].apply(lambda x: 'UP' if x > 0 else 'DOWN')

    # Calculate P(UP)
    total_days = len(data.dropna())
    up_days = len(data[data['Direction'] == 'UP'])
    p_up = up_days / total_days

    print(f"Total trading days analyzed: {total_days}")
    print(f"Days with positive returns: {up_days}")
    print(f"P(UP) = {p_up:.4f} ({p_up*100:.2f}%)")

    return data, p_up, up_days, total_days

# Run the analysis
if stock_data is not None:
    analyzed_data, prob_up, up_count, total_count = probability_paradox_analysis(stock_data)

Total trading days analyzed: 235
Days with positive returns: 129
P(UP) = 0.5489 (54.89%)


Part 1: Theory vs Reality

In [10]:
# Theoretical calculations using binomial distribution
print("\n" + "="*50)
print("PART 1: Theory vs Reality")
print("="*50)

# For demonstration, let's assume P(UP) = 0.6 as mentioned in the problem
p_up_example = 0.6  # You can use your actual prob_up value

# Calculate theoretical probabilities
prob_exactly_6 = binom.pmf(6, 10, p_up_example)
prob_at_least_8 = 1 - binom.cdf(7, 10, p_up_example)

print(f"Using P(UP) = {p_up_example}")
print(f"Probability of exactly 6 up days in 10: {prob_exactly_6:.4f}")
print(f"Probability of at least 8 up days in 10: {prob_at_least_8:.4f}")

# Simulation to verify
np.random.seed(42)
simulations = 1000
exactly_6_sim = 0
at_least_8_sim = 0

simulation_results = []
for i in range(simulations):
    # Simulate 10 coin flips with probability p_up_example
    outcomes = np.random.binomial(1, p_up_example, 10)
    up_days_sim = sum(outcomes)
    simulation_results.append(up_days_sim)

    if up_days_sim == 6:
        exactly_6_sim += 1
    if up_days_sim >= 8:
        at_least_8_sim += 1

sim_prob_exactly_6 = exactly_6_sim / simulations
sim_prob_at_least_8 = at_least_8_sim / simulations

print(f"\nSimulation Results ({simulations} trials):")
print(f"Simulated probability of exactly 6 up days: {sim_prob_exactly_6:.4f}")
print(f"Simulated probability of at least 8 up days: {sim_prob_at_least_8:.4f}")

print(f"\nComparison:")
print(f"Exactly 6 days - Theory: {prob_exactly_6:.4f}, Simulation: {sim_prob_exactly_6:.4f}")
print(f"At least 8 days - Theory: {prob_at_least_8:.4f}, Simulation: {sim_prob_at_least_8:.4f}")

# Visualize simulation results
fig = go.Figure()
fig.add_trace(go.Histogram(x=simulation_results, nbinsx=11, name='Simulation Results'))
fig.update_layout(
    title=f'Distribution of Up Days in 10 Trading Days (P(UP) = {p_up_example})',
    xaxis_title='Number of Up Days',
    yaxis_title='Frequency'
)
fig.show()


PART 1: Theory vs Reality
Using P(UP) = 0.6
Probability of exactly 6 up days in 10: 0.2508
Probability of at least 8 up days in 10: 0.1673

Simulation Results (1000 trials):
Simulated probability of exactly 6 up days: 0.2760
Simulated probability of at least 8 up days: 0.1840

Comparison:
Exactly 6 days - Theory: 0.2508, Simulation: 0.2760
At least 8 days - Theory: 0.1673, Simulation: 0.1840


Part 2: The Paradox - Mathematical Proof

In [19]:
print("\n" + "="*50)
print("PART 2: The Paradox - Proof")
print("="*50)

if stock_data is not None:
    # Calculate average returns for up and down days
    up_returns = analyzed_data[analyzed_data['Direction'] == 'UP']['Daily_Return'].dropna()
    down_returns = analyzed_data[analyzed_data['Direction'] == 'DOWN']['Daily_Return'].dropna()

    avg_up_return = up_returns.mean()
    avg_down_return = down_returns.mean()

    print(f"Average return on UP days: {avg_up_return:.4f} ({avg_up_return*100:.2f}%)")
    print(f"Average return on DOWN days: {avg_down_return:.4f} ({avg_down_return*100:.2f}%)")

    # The Paradox Explained
    print(f"\nTHE PARADOX:")
    print(f"Even though P(UP) = {prob_up:.4f} > 0.5 (more up days than down days)")

    # Calculate expected daily return
    expected_return = prob_up * avg_up_return + (1 - prob_up) * avg_down_return
    print(f"Expected daily return = P(UP) × Avg(UP) + P(DOWN) × Avg(DOWN)")
    print(f"Expected daily return = {prob_up:.4f} × {avg_up_return:.4f} + {1-prob_up:.4f} × {avg_down_return:.4f}")
    print(f"Expected daily return = {expected_return:.6f} ({expected_return*100:.4f}%)")

    # The key insight
    if abs(avg_down_return) > avg_up_return:
        print(f"\nKEY INSIGHT:")
        print(f"The magnitude of average DOWN days ({abs(avg_down_return):.4f}) is greater than")
        print(f"the magnitude of average UP days ({avg_up_return:.4f})")
        print(f"This means that despite having more up days, the down days are more severe!")

    # Demonstrate with cumulative returns
    cumulative_simple = (1 + analyzed_data['Daily_Return']).cumprod()
    cumulative_log = analyzed_data['Log_Returns'].cumsum()

    total_simple_return = cumulative_simple.iloc[-1] - 1
    total_log_return = np.exp(cumulative_log.iloc[-1]) - 1

    print(f"\nCUMULATIVE PERFORMANCE OVER THE PERIOD:")
    print(f"Total return (simple): {total_simple_return:.4f} ({total_simple_return*100:.2f}%)")
    print(f"Total return (log): {total_log_return:.4f} ({total_log_return*100:.2f}%)")

    # Visualization of the paradox
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Distribution of Returns', 'Cumulative Returns Comparison',
                       'Up vs Down Days Analysis', 'Risk-Return Profile'),
        specs=[[{}, {}], [{}, {}]]
    )

    # Distribution of returns
    fig.add_trace(
        go.Histogram(x=up_returns, name='UP Days', opacity=0.7,
                    marker_color='green', nbinsx=20),
        row=1, col=1
    )
    fig.add_trace(
        go.Histogram(x=down_returns, name='DOWN Days', opacity=0.7,
                    marker_color='red', nbinsx=20),
        row=1, col=1
    )

    # Cumulative returns
    fig.add_trace(
        go.Scatter(x=analyzed_data.index, y=cumulative_simple,
                  name='Simple Returns', line=dict(color='blue')),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(x=analyzed_data.index, y=np.exp(cumulative_log),
                  name='Log Returns', line=dict(color='red', dash='dash')),
        row=1, col=2
    )

    # Up vs Down analysis
    categories = ['UP Days', 'DOWN Days']
    averages = [avg_up_return*100, avg_down_return*100]
    counts = [len(up_returns), len(down_returns)]

    fig.add_trace(
        go.Bar(x=categories, y=averages, name='Avg Return (%)',
               marker_color=['green', 'red']),
        row=2, col=1
    )

    # Risk-Return scatter
    # Calculate rolling 30-day volatility and returns
    rolling_returns = analyzed_data['Daily_Return'].rolling(30).mean()
    rolling_volatility = analyzed_data['Daily_Return'].rolling(30).std()

    fig.add_trace(
        go.Scatter(x=rolling_volatility*100, y=rolling_returns*100,
                  mode='markers', name='30-day Risk-Return',
                  marker=dict(color=analyzed_data.index.map(lambda x: x.dayofyear),
                            colorscale='viridis', size=4)),
        row=2, col=2
    )

    fig.update_layout(height=800, title_text="The Probability Paradox - Complete Analysis")
    fig.update_xaxes(title_text="Return", row=1, col=1)
    fig.update_yaxes(title_text="Frequency", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=1, col=2)
    fig.update_yaxes(title_text="Cumulative Return", row=1, col=2)
    fig.update_xaxes(title_text="Day Type", row=2, col=1)
    fig.update_yaxes(title_text="Average Return (%)", row=2, col=1)
    fig.update_xaxes(title_text="Volatility (%)", row=2, col=2)
    fig.update_yaxes(title_text="Return (%)", row=2, col=2)

    fig.show()



## Problem Statement 4: Bayesian Ranking System

### 4.1 Investment Strategy Ranking


def bayesian_ranking_system():
    """
    Implement Bayesian ranking for investment strategies
    """
    print("\n" + "="*60)
    print("PROBLEM 4: Bayesian Ranking System for Investment Strategies")
    print("="*60)

    # Define investment strategies with their performance data
    strategies = {
        'Strategy A - Growth Stocks': {
            'wins': 65, 'total': 100, 'avg_return': 0.08, 'volatility': 0.15
        },
        'Strategy B - Value Stocks': {
            'wins': 58, 'total': 100, 'avg_return': 0.06, 'volatility': 0.12
        },
        'Strategy C - Dividend Stocks': {
            'wins': 55, 'total': 100, 'avg_return': 0.05, 'volatility': 0.08
        },
        'Strategy D - Index Fund': {
            'wins': 52, 'total': 100, 'avg_return': 0.07, 'volatility': 0.10
        },
        'Strategy E - Sector Rotation': {
            'wins': 70, 'total': 100, 'avg_return': 0.09, 'volatility': 0.18
        }
    }

    # Bayesian parameters
    alpha_prior = 1  # Prior successes
    beta_prior = 1   # Prior failures

    print("STRATEGY PERFORMANCE DATA:")
    print("-" * 60)
    for name, data in strategies.items():
        win_rate = data['wins'] / data['total']
        print(f"{name}:")
        print(f"  Win Rate: {win_rate:.1%} ({data['wins']}/{data['total']})")
        print(f"  Avg Return: {data['avg_return']:.1%}")
        print(f"  Volatility: {data['volatility']:.1%}")
        print()

    # Calculate Bayesian estimates
    bayesian_results = {}

    for name, data in strategies.items():
        # Posterior parameters
        alpha_post = alpha_prior + data['wins']
        beta_post = beta_prior + (data['total'] - data['wins'])

        # Bayesian estimate (posterior mean)
        bayesian_estimate = alpha_post / (alpha_post + beta_post)

        # Credible interval (95%)
        from scipy.stats import beta
        ci_lower = beta.ppf(0.025, alpha_post, beta_post)
        ci_upper = beta.ppf(0.975, alpha_post, beta_post)

        # Risk-adjusted score (Sharpe-like ratio)
        risk_adjusted_score = data['avg_return'] / data['volatility']

        # Combined Bayesian-Risk score
        combined_score = bayesian_estimate * risk_adjusted_score

        bayesian_results[name] = {
            'bayesian_estimate': bayesian_estimate,
            'ci_lower': ci_lower,
            'ci_upper': ci_upper,
            'risk_adjusted_score': risk_adjusted_score,
            'combined_score': combined_score,
            'raw_win_rate': data['wins'] / data['total']
        }

    # Sort strategies by different criteria
    print("BAYESIAN RANKING RESULTS:")
    print("=" * 60)

    # Ranking by Bayesian estimate
    bayesian_ranking = sorted(bayesian_results.items(),
                             key=lambda x: x[1]['bayesian_estimate'], reverse=True)

    print("1. RANKING BY BAYESIAN ESTIMATE (Win Probability):")
    print("-" * 50)
    for i, (name, results) in enumerate(bayesian_ranking, 1):
        print(f"{i}. {name}")
        print(f"   Bayesian Estimate: {results['bayesian_estimate']:.3f}")
        print(f"   95% Credible Interval: [{results['ci_lower']:.3f}, {results['ci_upper']:.3f}]")
        print(f"   Raw Win Rate: {results['raw_win_rate']:.3f}")
        print()

    # Ranking by combined score
    combined_ranking = sorted(bayesian_results.items(),
                             key=lambda x: x[1]['combined_score'], reverse=True)

    print("2. RANKING BY COMBINED BAYESIAN-RISK SCORE:")
    print("-" * 50)
    for i, (name, results) in enumerate(combined_ranking, 1):
        print(f"{i}. {name}")
        print(f"   Combined Score: {results['combined_score']:.3f}")
        print(f"   Bayesian Estimate: {results['bayesian_estimate']:.3f}")
        print(f"   Risk-Adjusted Score: {results['risk_adjusted_score']:.3f}")
        print()

    # Visualization
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Bayesian Estimates with Confidence Intervals',
                       'Risk vs Return Profile',
                       'Combined Score Ranking',
                       'Posterior Distributions'),
        specs=[[{}, {}], [{}, {}]]
    )

    # Bayesian estimates with error bars
    names = list(bayesian_results.keys())
    estimates = [bayesian_results[name]['bayesian_estimate'] for name in names]
    ci_lower = [bayesian_results[name]['ci_lower'] for name in names]
    ci_upper = [bayesian_results[name]['ci_upper'] for name in names]

    fig.add_trace(
        go.Scatter(
            x=names, y=estimates,
            error_y=dict(
                type='data',
                symmetric=False,
                array=[u - e for u, e in zip(ci_upper, estimates)],
                arrayminus=[e - l for e, l in zip(estimates, ci_lower)]
            ),
            mode='markers+lines',
            name='Bayesian Estimates'
        ),
        row=1, col=1
    )

    # Risk vs Return
    returns = [strategies[name]['avg_return'] for name in names]
    volatilities = [strategies[name]['volatility'] for name in names]

    fig.add_trace(
        go.Scatter(
            x=volatilities, y=returns,
            mode='markers+text',
            text=[name.split('-')[0].strip() for name in names],
            textposition="top center",
            marker=dict(size=10, color=estimates, colorscale='viridis'),
            name='Risk-Return'
        ),
        row=1, col=2
    )

    # Combined scores
    combined_scores = [bayesian_results[name]['combined_score'] for name in names]
    fig.add_trace(
        go.Bar(x=[name.split('-')[0].strip() for name in names],
               y=combined_scores, name='Combined Score'),
        row=2, col=1
    )

    # Posterior distributions for top 3 strategies
    x_range = np.linspace(0, 1, 1000)
    for i, (name, _) in enumerate(bayesian_ranking[:3]):
        data = strategies[name]
        alpha_post = alpha_prior + data['wins']
        beta_post = beta_prior + (data['total'] - data['wins'])

        from scipy.stats import beta as beta_dist
        y_values = beta_dist.pdf(x_range, alpha_post, beta_post)

        fig.add_trace(
            go.Scatter(x=x_range, y=y_values,
                      name=f"{name.split('-')[0].strip()}",
                      line=dict(width=2)),
            row=2, col=2
        )

    fig.update_layout(height=800, title_text="Bayesian Investment Strategy Analysis")
    fig.update_xaxes(title_text="Strategy", row=1, col=1)
    fig.update_yaxes(title_text="Win Probability", row=1, col=1)
    fig.update_xaxes(title_text="Volatility", row=1, col=2)
    fig.update_yaxes(title_text="Average Return", row=1, col=2)
    fig.update_xaxes(title_text="Strategy", row=2, col=1)
    fig.update_yaxes(title_text="Combined Score", row=2, col=1)
    fig.update_xaxes(title_text="Win Probability", row=2, col=2)
    fig.update_yaxes(title_text="Density", row=2, col=2)

    fig.show()

    return bayesian_results, strategies

# Run the Bayesian ranking analysis
bayesian_results, strategy_data = bayesian_ranking_system()


PART 2: The Paradox - Proof
Average return on UP days: 0.0099 (0.99%)
Average return on DOWN days: -0.0106 (-1.06%)

THE PARADOX:
Even though P(UP) = 0.5489 > 0.5 (more up days than down days)
Expected daily return = P(UP) × Avg(UP) + P(DOWN) × Avg(DOWN)
Expected daily return = 0.5489 × 0.0099 + 0.4511 × -0.0106
Expected daily return = 0.000613 (0.0613%)

KEY INSIGHT:
The magnitude of average DOWN days (0.0106) is greater than
the magnitude of average UP days (0.0099)
This means that despite having more up days, the down days are more severe!

CUMULATIVE PERFORMANCE OVER THE PERIOD:
Total return (simple): -0.0170 (-1.70%)
Total return (log): -0.0170 (-1.70%)



PROBLEM 4: Bayesian Ranking System for Investment Strategies
STRATEGY PERFORMANCE DATA:
------------------------------------------------------------
Strategy A - Growth Stocks:
  Win Rate: 65.0% (65/100)
  Avg Return: 8.0%
  Volatility: 15.0%

Strategy B - Value Stocks:
  Win Rate: 58.0% (58/100)
  Avg Return: 6.0%
  Volatility: 12.0%

Strategy C - Dividend Stocks:
  Win Rate: 55.0% (55/100)
  Avg Return: 5.0%
  Volatility: 8.0%

Strategy D - Index Fund:
  Win Rate: 52.0% (52/100)
  Avg Return: 7.0%
  Volatility: 10.0%

Strategy E - Sector Rotation:
  Win Rate: 70.0% (70/100)
  Avg Return: 9.0%
  Volatility: 18.0%

BAYESIAN RANKING RESULTS:
1. RANKING BY BAYESIAN ESTIMATE (Win Probability):
--------------------------------------------------
1. Strategy E - Sector Rotation
   Bayesian Estimate: 0.696
   95% Credible Interval: [0.604, 0.781]
   Raw Win Rate: 0.700

2. Strategy A - Growth Stocks
   Bayesian Estimate: 0.647
   95% Credible Interval: [0.552, 0.736]
   Raw Win Rate: 0.650

