# Ghana Stock Exchange Market Data Visualization

This notebook provides interactive visualizations of GSE market data using Plotly.

## Contents
1. Setup and Data Loading
2. Data Processing and Consolidation
3. Individual Stock Analysis
4. Market-wide Analysis
5. Interactive Dashboards

## 1. Setup and Dependencies

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import glob
import os

## 2. Data Loading and Processing

In [2]:
def load_stock_data(symbol):
    """Load data for a single stock from CSV"""
    df = pd.read_csv(f"historical_data/{symbol}.csv")
    df["Date"] = pd.to_datetime(df["Date"], format="%d/%m/%Y")
    df.set_index("Date", inplace=True)
    return df


def load_all_stocks():
    """Load and combine data for all stocks"""
    # Get list of all CSV files
    csv_files = glob.glob("historical_data/*.csv")

    # Dictionary to store DataFrames
    stock_data = {}

    for file in csv_files:
        symbol = os.path.basename(file).replace(".csv", "")
        stock_data[symbol] = load_stock_data(symbol)

    return stock_data

In [3]:
# Load all stock data
stock_data = load_all_stocks()
print(f"Loaded data for {len(stock_data)} symbols")

Loaded data for 37 symbols


## 3. Individual Stock Analysis Functions

In [4]:
def plot_stock_candlestick(symbol, start_date=None, end_date=None):
    """Create an interactive candlestick chart with volume for a single stock"""
    df = stock_data[symbol]

    if start_date:
        df = df[df.index >= start_date]
    if end_date:
        df = df[df.index <= end_date]

    # Create figure with secondary y-axis
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=True,
        vertical_spacing=0.03,
        subplot_titles=(f"{symbol} Price", "Volume"),
        row_heights=[0.7, 0.3],
    )

    # Add candlestick chart
    fig.add_trace(
        go.Candlestick(
            x=df.index,
            open=df["Open"],
            high=df["High"],
            low=df["Low"],
            close=df["Close"],
            name="Price",
        ),
        row=1,
        col=1,
    )

    # Add volume bar chart
    fig.add_trace(go.Bar(x=df.index, y=df["Volume"], name="Volume"), row=2, col=1)

    # Update layout
    fig.update_layout(
        xaxis_rangeslider_visible=False,
        height=800,
        title_text=f"{symbol} Stock Price and Volume",
        showlegend=False,
    )

    return fig

In [5]:
def calculate_stock_metrics(symbol):
    """Calculate key metrics for a stock"""
    df = stock_data[symbol].copy()

    # Calculate daily returns
    df["Daily_Return"] = df["Close"].pct_change(fill_method=None)

    # Calculate moving averages
    df["MA20"] = df["Close"].rolling(window=20).mean()
    df["MA50"] = df["Close"].rolling(window=50).mean()
    df["MA200"] = df["Close"].rolling(window=200).mean()

    # Calculate volatility (20-day rolling standard deviation of returns)
    df["Volatility_20D"] = df["Daily_Return"].rolling(window=20).std()

    # Calculate trading metrics
    df["VWAP"] = (df["Close"] * df["Volume"]).cumsum() / df["Volume"].cumsum()

    # Calculate momentum indicators
    df["RSI"] = calculate_rsi(df["Close"])
    df["Money_Flow_Index"] = calculate_mfi(df)

    return df


def calculate_rsi(prices, period=14):
    """Calculate Relative Strength Index"""
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()

    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi


def calculate_mfi(df, period=14):
    """Calculate Money Flow Index"""
    typical_price = (df["High"] + df["Low"] + df["Close"]) / 3
    money_flow = typical_price * df["Volume"]

    delta = money_flow.diff()
    positive_flow = delta.where(delta > 0, 0).rolling(window=period).sum()
    negative_flow = (-delta.where(delta < 0, 0)).rolling(window=period).sum()

    money_ratio = positive_flow / negative_flow
    mfi = 100 - (100 / (1 + money_ratio))
    return mfi

In [6]:
def plot_bollinger_bands(symbol, window=20, num_std=2):
    """Plot price with Bollinger Bands"""
    df = stock_data[symbol].copy()
    df = df[~df.index.duplicated(keep="last")]

    # Calculate Bollinger Bands
    sma = df["Close"].rolling(window=window).mean()
    std = df["Close"].rolling(window=window).std()
    upper_band = sma + (std * num_std)
    lower_band = sma - (std * num_std)

    # Create figure
    fig = go.Figure()

    # Add price line
    fig.add_trace(
        go.Scatter(
            x=df.index, y=df["Close"], name="Close Price", line=dict(color="blue")
        )
    )

    # Add Bollinger Bands
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=upper_band,
            name=f"Upper Band (+{num_std}σ)",
            line=dict(color="red", dash="dash"),
        )
    )
    fig.add_trace(
        go.Scatter(
            x=df.index, y=sma, name=f"{window}-day SMA", line=dict(color="orange")
        )
    )
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=lower_band,
            name=f"Lower Band (-{num_std}σ)",
            line=dict(color="green", dash="dash"),
            fill="tonexty",
        )
    )

    fig.update_layout(
        title=f"{symbol} - Bollinger Bands ({window}-day, {num_std}σ)",
        xaxis_title="Date",
        yaxis_title="Price",
        height=600,
        hovermode="x unified",
    )

    return fig

In [7]:
def plot_cumulative_returns(symbols=None, start_date=None):
    """Plot cumulative returns for multiple stocks (horse race chart)"""
    if symbols is None:
        symbols = list(stock_data.keys())

    fig = go.Figure()

    for symbol in symbols:
        if symbol not in stock_data or stock_data[symbol].empty:
            continue

        df = stock_data[symbol].copy()
        df = df[~df.index.duplicated(keep="last")]

        if start_date:
            df = df[df.index >= start_date]

        if len(df) > 1:
            returns = df["Close"].pct_change(fill_method=None)
            cumulative_returns = (1 + returns).cumprod()

            fig.add_trace(
                go.Scatter(
                    x=df.index,
                    y=(cumulative_returns - 1) * 100,  # Show as percentage
                    name=symbol,
                    mode="lines",
                )
            )

    fig.update_layout(
        title="Cumulative Returns - GSE Stocks (Horse Race)",
        xaxis_title="Date",
        yaxis_title="Cumulative Return (%)",
        height=700,
        hovermode="x unified",
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
    )

    return fig

## 4. Market-wide Analysis Functions

In [8]:
def calculate_market_metrics():
    """Calculate market-wide metrics"""
    # Get the latest date for each stock
    latest_data = {}
    for symbol, df in stock_data.items():
        if not df.empty:
            latest_data[symbol] = {
                "Close": df["Close"].iloc[-1],
                "Volume": df["Volume"].iloc[-1],
                "Market_Cap": df["Close"].iloc[-1] * df["Volume"].iloc[-1],
            }

    market_metrics = pd.DataFrame.from_dict(latest_data, orient="index")
    return market_metrics


def plot_market_overview():
    """Create a market overview visualization"""
    market_metrics = calculate_market_metrics()

    # Create treemap of market capitalization
    fig = px.treemap(
        market_metrics,
        path=[market_metrics.index],
        values="Market_Cap",
        title="GSE Market Capitalization Overview",
    )

    return fig

In [9]:
def calculate_market_summary(start_date=None, end_date=None):
    """Calculate market-wide performance summary"""
    summary = {}

    for symbol, df in stock_data.items():
        if df.empty:
            continue

        # Filter date range if specified
        data = df.copy()
        if start_date:
            data = data[data.index >= start_date]
        if end_date:
            data = data[data.index <= end_date]

        if len(data) < 2:
            continue

        # Calculate metrics
        returns = data["Close"].pct_change(fill_method=None)
        summary[symbol] = {
            "Total_Return": ((data["Close"].iloc[-1] / data["Close"].iloc[0]) - 1)
            * 100,
            "Annualized_Return": (
                (data["Close"].iloc[-1] / data["Close"].iloc[0]) ** (252 / len(data))
                - 1
            )
            * 100,
            "Daily_Volatility": returns.std() * 100,
            "Annualized_Volatility": returns.std() * np.sqrt(252) * 100,
            "Sharpe_Ratio": (
                (returns.mean() / returns.std()) * np.sqrt(252)
                if returns.std() != 0
                else 0
            ),
            "Max_Drawdown": (
                (data["Close"] / data["Close"].expanding().max()) - 1
            ).min()
            * 100,
            "Avg_Daily_Volume": data["Volume"].mean(),
            "Volume_Trend": np.polyfit(range(len(data)), data["Volume"], 1)[0],
        }

    return pd.DataFrame.from_dict(summary, orient="index")

In [10]:
def create_market_index():
    """Create an equal-weighted market index from all stocks"""
    all_returns = {}

    for symbol, df in stock_data.items():
        if not df.empty:
            df_clean = df[~df.index.duplicated(keep="last")]
            if len(df_clean) > 1:
                returns = df_clean["Close"].pct_change(fill_method=None)
                all_returns[symbol] = returns

    if all_returns:
        returns_df = pd.DataFrame(all_returns)
        # Equal-weighted index: average returns across all stocks
        market_returns = returns_df.mean(axis=1)
        return market_returns

    return pd.Series()


def calculate_risk_return_metrics(risk_free_rate=0.05):
    """Calculate risk-return metrics for all stocks"""
    metrics = {}

    for symbol, df in stock_data.items():
        if df.empty:
            continue

        df_clean = df[~df.index.duplicated(keep="last")]
        if len(df_clean) < 30:  # Need minimum data
            continue

        returns = df_clean["Close"].pct_change(fill_method=None).dropna()

        if len(returns) > 0:
            # Annualized metrics (assuming 252 trading days per year)
            ann_return = returns.mean() * 252 * 100
            ann_volatility = returns.std() * np.sqrt(252) * 100
            sharpe_ratio = (returns.mean() * 252 - risk_free_rate) / (
                returns.std() * np.sqrt(252)
            )

            metrics[symbol] = {
                "Annualized_Return": ann_return,
                "Annualized_Volatility": ann_volatility,
                "Sharpe_Ratio": sharpe_ratio,
                "Daily_Mean_Return": returns.mean() * 100,
                "Daily_Std": returns.std() * 100,
            }

    return pd.DataFrame.from_dict(metrics, orient="index")


def calculate_beta(market_returns=None):
    """Calculate beta for each stock relative to market"""
    if market_returns is None:
        market_returns = create_market_index()

    if market_returns.empty:
        return pd.DataFrame()

    betas = {}
    market_var = market_returns.var()

    for symbol, df in stock_data.items():
        if df.empty:
            continue

        df_clean = df[~df.index.duplicated(keep="last")]
        if len(df_clean) < 30:
            continue

        returns = df_clean["Close"].pct_change(fill_method=None).dropna()

        # Align the returns with market returns
        common_dates = returns.index.intersection(market_returns.index)
        if len(common_dates) > 30:
            stock_aligned = returns.loc[common_dates]
            market_aligned = market_returns.loc[common_dates]

            covariance = stock_aligned.cov(market_aligned)
            beta = covariance / market_var

            betas[symbol] = {"Beta": beta}

    return pd.DataFrame.from_dict(betas, orient="index")


def plot_risk_return_scatter(risk_free_rate=0.05):
    """Create risk-return scatter plot"""
    metrics = calculate_risk_return_metrics(risk_free_rate)

    if metrics.empty:
        print("Not enough data to create risk-return plot")
        return None

    # Clean metrics for plotting (handle inf and negative values)
    metrics_clean = metrics.copy()
    metrics_clean["Sharpe_Ratio_Display"] = metrics_clean["Sharpe_Ratio"].replace(
        [np.inf, -np.inf], np.nan
    )

    # For size, we need positive values - use absolute value + 1 to avoid zeros
    metrics_clean["Size_Value"] = (
        metrics_clean["Sharpe_Ratio_Display"].fillna(0).abs() + 0.5
    )

    fig = px.scatter(
        metrics_clean,
        x="Annualized_Volatility",
        y="Annualized_Return",
        text=metrics_clean.index,
        title="Risk-Return Profile of GSE Stocks",
        labels={
            "Annualized_Volatility": "Annualized Volatility (Risk) %",
            "Annualized_Return": "Annualized Return %",
        },
        color="Sharpe_Ratio_Display",
        color_continuous_scale="RdYlGn",
        size="Size_Value",
        size_max=15,
        hover_data={"Sharpe_Ratio": ":.3f"},
    )

    fig.update_traces(textposition="top center")
    fig.update_layout(height=700, width=900)

    # Add quadrant lines
    fig.add_hline(
        y=metrics["Annualized_Return"].median(),
        line_dash="dash",
        line_color="gray",
        opacity=0.5,
    )
    fig.add_vline(
        x=metrics["Annualized_Volatility"].median(),
        line_dash="dash",
        line_color="gray",
        opacity=0.5,
    )

    return fig

In [11]:
def analyze_returns_distribution(symbol):
    """Analyze the distribution of returns for a stock"""
    from scipy import stats

    df = stock_data[symbol].copy()
    df = df[~df.index.duplicated(keep="last")]

    returns = (
        df["Close"].pct_change(fill_method=None).dropna() * 100
    )  # Convert to percentage

    if len(returns) == 0:
        return None

    # Calculate statistics
    mean_return = returns.mean()
    std_return = returns.std()
    skewness = stats.skew(returns)
    kurt = stats.kurtosis(returns)

    # Create histogram with normal distribution overlay
    fig = go.Figure()

    # Add histogram
    fig.add_trace(
        go.Histogram(
            x=returns, name="Daily Returns", nbinsx=50, histnorm="probability density"
        )
    )

    # Add normal distribution overlay
    x_range = np.linspace(returns.min(), returns.max(), 100)
    normal_dist = stats.norm.pdf(x_range, mean_return, std_return)

    fig.add_trace(
        go.Scatter(
            x=x_range,
            y=normal_dist,
            mode="lines",
            name="Normal Distribution",
            line=dict(color="red", width=2),
        )
    )

    # Add annotations with statistics
    stats_text = (
        f"Mean: {mean_return:.3f}%<br>"
        f"Std Dev: {std_return:.3f}%<br>"
        f"Skewness: {skewness:.3f}<br>"
        f"Kurtosis: {kurt:.3f}"
    )

    fig.add_annotation(
        xref="paper",
        yref="paper",
        x=0.98,
        y=0.98,
        text=stats_text,
        showarrow=False,
        bgcolor="white",
        bordercolor="black",
        borderwidth=1,
        xanchor="right",
        yanchor="top",
    )

    fig.update_layout(
        title=f"{symbol} - Daily Returns Distribution",
        xaxis_title="Daily Return (%)",
        yaxis_title="Density",
        height=600,
        showlegend=True,
    )

    return fig


def plot_multiple_returns_distributions(symbols):
    """Compare return distributions for multiple stocks"""
    fig = go.Figure()

    for symbol in symbols:
        if symbol not in stock_data or stock_data[symbol].empty:
            continue

        df = stock_data[symbol].copy()
        df = df[~df.index.duplicated(keep="last")]
        returns = df["Close"].pct_change(fill_method=None).dropna() * 100

        if len(returns) > 0:
            fig.add_trace(
                go.Violin(
                    y=returns, name=symbol, box_visible=True, meanline_visible=True
                )
            )

    fig.update_layout(
        title="Returns Distribution Comparison",
        yaxis_title="Daily Return (%)",
        height=600,
        showlegend=True,
    )

    return fig

In [12]:
def calculate_correlation_matrix():
    """Calculate correlation matrix between stocks"""
    # Use a common date range for all stocks
    all_dates = pd.DatetimeIndex([])
    for df in stock_data.values():
        if not df.empty:
            # Remove duplicates first
            clean_dates = df.index[~df.index.duplicated(keep="last")]
            all_dates = all_dates.union(clean_dates)

    if len(all_dates) == 0:
        return pd.DataFrame()

    # Use the most recent year
    end_date = all_dates.max()
    start_date = end_date - pd.DateOffset(years=1)

    # Create a DataFrame with aligned dates
    price_data = {}
    for symbol, df in stock_data.items():
        if not df.empty:
            # Clean the data first
            df_clean = df[~df.index.duplicated(keep="last")].copy()
            # Filter for date range
            mask = (df_clean.index >= start_date) & (df_clean.index <= end_date)
            if mask.any():
                price_data[symbol] = df_clean.loc[mask, "Close"]

    if len(price_data) < 2:
        return pd.DataFrame()

    # Create DataFrame with aligned dates
    price_df = pd.DataFrame(price_data)

    # Ensure we have enough data points
    min_required = 30  # Minimum trading days required

    # Forward fill limited to 5 days and drop dates with all missing values
    price_df = price_df.ffill(limit=5)
    price_df = price_df.dropna(how="all")

    # Drop stocks with insufficient data
    valid_counts = price_df.count()
    valid_stocks = valid_counts[valid_counts >= min_required].index
    if len(valid_stocks) < 2:
        return pd.DataFrame()

    price_df = price_df[valid_stocks]

    # Calculate returns and drop any remaining NA rows
    returns_df = price_df.pct_change(fill_method=None)
    returns_df = returns_df.dropna(how="any")

    if len(returns_df) >= min_required:
        # Calculate correlations with minimum periods requirement
        corr_matrix = returns_df.corr(method="pearson", min_periods=min_required)

        # Remove stocks with too many missing correlations
        min_valid = int(len(corr_matrix) * 0.25)  # At least 25% valid correlations
        correlation_counts = corr_matrix.count()
        valid_stocks = correlation_counts.index[correlation_counts >= min_valid]

        if len(valid_stocks) >= 2:
            corr_matrix = corr_matrix.loc[valid_stocks, valid_stocks]
            return corr_matrix

    return pd.DataFrame()


def plot_correlation_heatmap():
    """Create a correlation heatmap"""
    corr_matrix = calculate_correlation_matrix()

    if corr_matrix.empty:
        print("Not enough data to calculate correlations")
        return None

    print(f"Generated correlation matrix for {len(corr_matrix)} stocks")

    # Sort correlation matrix by average correlation
    avg_corr = corr_matrix.mean()
    sorted_cols = avg_corr.sort_values(ascending=False).index
    corr_matrix = corr_matrix.loc[sorted_cols, sorted_cols]

    # Create heatmap with improved layout
    fig = px.imshow(
        corr_matrix,
        title=f"Stock Price Correlation Matrix (1-year returns, {len(corr_matrix)} stocks)",
        labels=dict(color="Correlation"),
        color_continuous_scale="RdBu",
        aspect="auto",
        zmin=-1,
        zmax=1,
    )

    # Update layout for better visibility
    fig.update_layout(height=800, width=800, title_x=0.5, xaxis_tickangle=45)

    return fig

## 5. Example Visualizations

# Sector Analysis

The Ghana Stock Exchange (GSE) includes companies from various sectors. Let's analyze performance by sector to understand industry trends and relationships.

In [13]:
# Define sector classifications
sector_mapping = {
    "Banking": ["EGH", "GCB", "CAL", "SCB", "SOGEGH", "ADB", "RBGH"],
    "Insurance": ["EGL", "SIC"],
    "Telecom": ["MTNGH"],
    "Consumer Goods": ["UNIL", "FML", "GGBL"],
    "Mining": ["ANGLD", "GLD"],
    "Oil & Gas": ["TOTAL", "GOIL"],
    "Manufacturing": ["AYRTN", "PZC"],
    "Agriculture": ["BOPP"],
    "Financial Services": ["EGH", "CAL", "SCB"],
}


def analyze_sector_performance(sector_mapping, start_date=None, end_date=None):
    """Analyze performance by sector"""
    sector_performance = {}

    # Calculate performance for each sector
    for sector, symbols in sector_mapping.items():
        sector_returns = {}
        for symbol in symbols:
            if symbol in stock_data:
                df = stock_data[symbol].copy()

                # Remove duplicate dates first
                df = df[~df.index.duplicated(keep="last")]

                if start_date:
                    df = df[df.index >= start_date]
                if end_date:
                    df = df[df.index <= end_date]

                if not df.empty and len(df) > 1:
                    returns = df["Close"].pct_change(fill_method=None)
                    # Only add if there are valid returns
                    if not returns.dropna().empty:
                        sector_returns[symbol] = returns

        if sector_returns:
            # Combine returns into a DataFrame
            try:
                sector_df = pd.DataFrame(sector_returns)
                # Drop any dates where all values are NaN
                sector_df = sector_df.dropna(how="all")

                if not sector_df.empty:
                    sector_performance[sector] = {
                        "Avg_Daily_Return": sector_df.mean().mean() * 100,
                        "Volatility": sector_df.std().mean() * 100,
                        "Num_Companies": len(sector_returns),
                    }
            except ValueError as e:
                # Skip sector if there's an issue with duplicate indices
                print(f"Skipping {sector} due to data alignment issues: {e}")
                continue

    return pd.DataFrame.from_dict(sector_performance, orient="index")


def plot_sector_performance():
    """Create visualization of sector performance"""
    sector_perf = analyze_sector_performance(sector_mapping)

    # Create subplots for different metrics
    fig = make_subplots(
        rows=2,
        cols=1,
        subplot_titles=("Average Daily Returns by Sector (%)", "Sector Volatility (%)"),
        vertical_spacing=0.15,
    )

    # Add returns bar chart
    fig.add_trace(
        go.Bar(
            x=sector_perf.index, y=sector_perf["Avg_Daily_Return"], name="Daily Returns"
        ),
        row=1,
        col=1,
    )

    # Add volatility bar chart
    fig.add_trace(
        go.Bar(x=sector_perf.index, y=sector_perf["Volatility"], name="Volatility"),
        row=2,
        col=1,
    )

    # Update layout
    fig.update_layout(
        height=800,
        title_text="GSE Sector Analysis",
        showlegend=False,
        xaxis_tickangle=45,
        xaxis2_tickangle=45,
    )

    return fig

## Example Market Analysis

Let's analyze the GSE market performance across different time periods and sectors.

## Portfolio Analysis & Risk Metrics

Now let's analyze risk-adjusted performance using Modern Portfolio Theory concepts.

In [14]:
# Calculate risk-return metrics for all stocks
risk_return_metrics = calculate_risk_return_metrics(risk_free_rate=0.05)

print("=== Risk-Return Metrics ===")
print("\nTop 10 by Sharpe Ratio:")
print(risk_return_metrics.sort_values("Sharpe_Ratio", ascending=False).head(10))

print("\nTop 10 by Annualized Return:")
print(risk_return_metrics.sort_values("Annualized_Return", ascending=False).head(10))

# Plot risk-return scatter
fig = plot_risk_return_scatter(risk_free_rate=0.05)
if fig:
    fig.show()

=== Risk-Return Metrics ===

Top 10 by Sharpe Ratio:
        Annualized_Return  Annualized_Volatility  Sharpe_Ratio  \
MTNGH           44.623245              69.434969      0.570653   
CPC             85.327103             143.831130      0.558482   
BOPP            41.317091              68.900087      0.527098   
GCB             35.491671              58.840888      0.518205   
ACCESS         104.666638             221.484466      0.449994   
GOIL            18.922667              31.374097      0.443763   
TOTAL           33.070609              66.766163      0.420432   
EGL             19.816462              36.521457      0.405692   
SOGEGH          19.166924              35.307410      0.401245   
UNIL            24.108667              50.616569      0.377518   

        Daily_Mean_Return  Daily_Std  
MTNGH            0.177076   4.373992  
CPC              0.338600   9.060510  
BOPP             0.163957   4.340298  
GCB              0.140840   3.706628  
ACCESS           0.415344

  sharpe_ratio = (returns.mean() * 252 - risk_free_rate) / (
  sharpe_ratio = (returns.mean() * 252 - risk_free_rate) / (


In [15]:
# Calculate Beta values relative to market
print("\n=== Beta Analysis ===")
market_index = create_market_index()
print(f"Created equal-weighted market index with {len(market_index)} trading days")

betas = calculate_beta(market_index)
print("\nStock Betas (relative to GSE market):")
print(betas.sort_values("Beta", ascending=False))

print("\nInterpretation:")
print("Beta > 1: More volatile than market (Aggressive)")
print("Beta = 1: Moves with market")
print("Beta < 1: Less volatile than market (Defensive)")


=== Beta Analysis ===
Created equal-weighted market index with 4499 trading days

Stock Betas (relative to GSE market):
                Beta
ALW        32.444333
ACCESS      0.210673
ETI         0.118730
CLYD        0.114386
SIC         0.096456
MTNGH       0.096139
RBGH        0.074469
GCB         0.068467
TOTAL       0.066364
CPC         0.065654
BOPP        0.050222
EGH         0.039481
CAL         0.036963
FML         0.034588
UNIL        0.034226
GGBL        0.031081
SCB         0.029464
EGL         0.024426
SOGEGH      0.023771
GOIL        0.021717
TBL         0.012908
CMLT        0.008649
IIL         0.003763
PBC         0.003105
DASPHARMA   0.002915
SCB PREF    0.001818
SWL         0.001187
TLW         0.000651
ADB         0.000350
MAC         0.000177
HORDS       0.000144
SAMBA       0.000094
ASG         0.000075
AGA         0.000023
MMH         0.000018
DIGICUT     0.000000
ALLGH      -0.001370

Interpretation:
Beta > 1: More volatile than market (Aggressive)
Beta = 1: Moves

In [16]:
# Plot cumulative returns for all stocks
print("\n=== Cumulative Returns Analysis ===")
fig = plot_cumulative_returns()
fig.show()


=== Cumulative Returns Analysis ===


## Technical Analysis Examples

Let's look at some individual stock analysis with technical indicators.

In [17]:
# Bollinger Bands for a specific stock
sample_stock = "MTNGH" if "MTNGH" in stock_data else list(stock_data.keys())[0]
print(f"Analyzing {sample_stock} with Bollinger Bands")
fig = plot_bollinger_bands(sample_stock, window=20, num_std=2)
fig.show()

Analyzing MTNGH with Bollinger Bands


In [18]:
# Analyze returns distribution for a specific stock
print(f"\nReturns Distribution Analysis for {sample_stock}")
fig = analyze_returns_distribution(sample_stock)
if fig:
    fig.show()

# Compare returns distributions across multiple stocks
top_stocks = risk_return_metrics.nlargest(5, "Sharpe_Ratio").index.tolist()
print(f"\nComparing top 5 stocks by Sharpe Ratio: {top_stocks}")
fig = plot_multiple_returns_distributions(top_stocks)
fig.show()


Returns Distribution Analysis for MTNGH



Comparing top 5 stocks by Sharpe Ratio: ['MTNGH', 'CPC', 'BOPP', 'GCB', 'ACCESS']


## Export Charts for Documentation

Let's export key visualizations as interactive HTML files for the repository.

In [19]:
# Export key visualizations to HTML files
import os

# Create charts directory if it doesn't exist
charts_dir = "../charts"
os.makedirs(charts_dir, exist_ok=True)

print("Exporting charts...")

# 1. Risk-Return Scatter Plot
print("1. Risk-Return Scatter...")
fig = plot_risk_return_scatter(risk_free_rate=0.05)
if fig:
    fig.write_html(f"{charts_dir}/risk_return_scatter.html")
    print("   ✓ Saved: risk_return_scatter.html")

# 2. Correlation Heatmap
print("2. Correlation Heatmap...")
fig = plot_correlation_heatmap()
if fig:
    fig.write_html(f"{charts_dir}/correlation_heatmap.html")
    print("   ✓ Saved: correlation_heatmap.html")

# 3. Cumulative Returns
print("3. Cumulative Returns...")
fig = plot_cumulative_returns()
fig.write_html(f"{charts_dir}/cumulative_returns.html")
print("   ✓ Saved: cumulative_returns.html")

# 4. Sector Performance
print("4. Sector Performance...")
fig = plot_sector_performance()
fig.write_html(f"{charts_dir}/sector_performance.html")
print("   ✓ Saved: sector_performance.html")

# 5. Market Overview
print("5. Market Overview...")
fig = plot_market_overview()
fig.write_html(f"{charts_dir}/market_overview.html")
print("   ✓ Saved: market_overview.html")

# 6. Sample Stock - Candlestick with Volume
print("6. Sample Candlestick Chart (MTNGH)...")
fig = plot_stock_candlestick("MTNGH")
fig.write_html(f"{charts_dir}/mtngh_candlestick.html")
print("   ✓ Saved: mtngh_candlestick.html")

# 7. Bollinger Bands
print("7. Bollinger Bands (MTNGH)...")
fig = plot_bollinger_bands("MTNGH")
fig.write_html(f"{charts_dir}/mtngh_bollinger_bands.html")
print("   ✓ Saved: mtngh_bollinger_bands.html")

# 8. Returns Distribution
print("8. Returns Distribution (MTNGH)...")
fig = analyze_returns_distribution("MTNGH")
if fig:
    fig.write_html(f"{charts_dir}/mtngh_returns_distribution.html")
    print("   ✓ Saved: mtngh_returns_distribution.html")

print("\n✅ All charts exported to 'charts/' directory!")

Exporting charts...
1. Risk-Return Scatter...
   ✓ Saved: risk_return_scatter.html
2. Correlation Heatmap...
Generated correlation matrix for 24 stocks
   ✓ Saved: correlation_heatmap.html
3. Cumulative Returns...



divide by zero encountered in scalar divide



   ✓ Saved: cumulative_returns.html
4. Sector Performance...
   ✓ Saved: sector_performance.html
5. Market Overview...
   ✓ Saved: market_overview.html
6. Sample Candlestick Chart (MTNGH)...
   ✓ Saved: mtngh_candlestick.html
7. Bollinger Bands (MTNGH)...
   ✓ Saved: mtngh_bollinger_bands.html
8. Returns Distribution (MTNGH)...
   ✓ Saved: mtngh_returns_distribution.html

✅ All charts exported to 'charts/' directory!


## Summary of Key Findings

This section summarizes the main insights from our analysis of the Ghana Stock Exchange.

In [20]:
# Generate summary statistics for README
print("=" * 80)
print("GHANA STOCK EXCHANGE ANALYSIS - KEY FINDINGS")
print("=" * 80)

# 1. Market Overview
print("\n📊 MARKET OVERVIEW")
print("-" * 80)
print(f"Total Stocks Analyzed: {len(stock_data)}")

# Calculate date range from the data
min_date = min(df.index.min() for df in stock_data.values() if not df.empty)
max_date = max(df.index.max() for df in stock_data.values() if not df.empty)
print(f"Date Range: {min_date.strftime('%Y-%m-%d')} to {max_date.strftime('%Y-%m-%d')}")

# 2. Top Performers by Sharpe Ratio
print("\n🏆 TOP 5 PERFORMERS (by Risk-Adjusted Returns - Sharpe Ratio)")
print("-" * 80)
top_sharpe = risk_return_metrics.nlargest(5, "Sharpe_Ratio")[
    ["Annualized_Return", "Annualized_Volatility", "Sharpe_Ratio"]
]
print(top_sharpe.to_string())

# 3. Highest Returns
print("\n📈 TOP 5 HIGHEST ANNUALIZED RETURNS")
print("-" * 80)
top_returns = risk_return_metrics.nlargest(5, "Annualized_Return")[
    ["Annualized_Return", "Annualized_Volatility", "Sharpe_Ratio"]
]
print(top_returns.to_string())

# 4. Most Volatile Stocks
print("\n⚡ TOP 5 MOST VOLATILE STOCKS")
print("-" * 80)
most_volatile = risk_return_metrics.nlargest(5, "Annualized_Volatility")[
    ["Annualized_Return", "Annualized_Volatility", "Sharpe_Ratio"]
]
print(most_volatile.to_string())

# 5. Beta Analysis
print("\n🎯 BETA ANALYSIS (Market Sensitivity)")
print("-" * 80)
betas_sorted = betas.sort_values("Beta", ascending=False)
print(
    f"Most Aggressive (β > 1): {betas_sorted[betas_sorted['Beta'] > 1].index.tolist()}"
)
print(f"\nTop 5 Defensive Stocks (lowest positive β):")
defensive = betas_sorted[betas_sorted["Beta"] > 0].nsmallest(5, "Beta")
print(defensive.to_string())

# 6. Sector Performance
print("\n🏭 SECTOR PERFORMANCE")
print("-" * 80)
sector_perf = analyze_sector_performance(sector_mapping)
print(sector_perf.sort_values("Avg_Daily_Return", ascending=False).to_string())

# 7. Correlation Insights
print("\n🔗 MARKET CORRELATIONS")
print("-" * 80)
correlation_matrix = calculate_correlation_matrix()
if not correlation_matrix.empty:
    print(f"Stocks with sufficient correlation data: {len(correlation_matrix)}")
    avg_correlation = correlation_matrix.mean().mean()
    print(f"Average market correlation: {avg_correlation:.3f}")
    print(
        f"\nInterpretation: {'Low market correlation suggests good diversification opportunities' if avg_correlation < 0.3 else 'Moderate to high correlation suggests stocks move together'}"
    )
else:
    print("Insufficient data for correlation analysis")

# 8. Market Statistics
print("\n📊 OVERALL MARKET STATISTICS")
print("-" * 80)
print(
    f"Average Annualized Return: {risk_return_metrics['Annualized_Return'].mean():.2f}%"
)
print(
    f"Average Annualized Volatility: {risk_return_metrics['Annualized_Volatility'].mean():.2f}%"
)
print(f"Average Sharpe Ratio: {risk_return_metrics['Sharpe_Ratio'].mean():.3f}")
print(f"Median Beta: {betas_sorted['Beta'].median():.3f}")

print("\n" + "=" * 80)
print("Analysis complete! Check the charts/ directory for interactive visualizations.")
print("=" * 80)

GHANA STOCK EXCHANGE ANALYSIS - KEY FINDINGS

📊 MARKET OVERVIEW
--------------------------------------------------------------------------------
Total Stocks Analyzed: 37
Date Range: 2007-07-03 to 2025-11-07

🏆 TOP 5 PERFORMERS (by Risk-Adjusted Returns - Sharpe Ratio)
--------------------------------------------------------------------------------
        Annualized_Return  Annualized_Volatility  Sharpe_Ratio
MTNGH           44.623245              69.434969      0.570653
CPC             85.327103             143.831130      0.558482
BOPP            41.317091              68.900087      0.527098
GCB             35.491671              58.840888      0.518205
ACCESS         104.666638             221.484466      0.449994

📈 TOP 5 HIGHEST ANNUALIZED RETURNS
--------------------------------------------------------------------------------
        Annualized_Return  Annualized_Volatility  Sharpe_Ratio
ACCESS         104.666638             221.484466      0.449994
CPC             85.327103   

## Advanced Analysis - Additional Technical Indicators

Let's add more sophisticated technical analysis tools.

In [21]:
def calculate_macd(prices, fast=12, slow=26, signal=9):
    """Calculate MACD (Moving Average Convergence Divergence)"""
    ema_fast = prices.ewm(span=fast).mean()
    ema_slow = prices.ewm(span=slow).mean()
    macd_line = ema_fast - ema_slow
    signal_line = macd_line.ewm(span=signal).mean()
    histogram = macd_line - signal_line

    return macd_line, signal_line, histogram


def plot_macd(symbol):
    """Plot price with MACD indicator"""
    df = stock_data[symbol].copy()
    df = df[~df.index.duplicated(keep="last")]

    macd_line, signal_line, histogram = calculate_macd(df["Close"])

    # Create subplots
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=True,
        vertical_spacing=0.03,
        subplot_titles=(f"{symbol} Price", "MACD"),
        row_heights=[0.7, 0.3],
    )

    # Add price
    fig.add_trace(
        go.Scatter(x=df.index, y=df["Close"], name="Price", line=dict(color="blue")),
        row=1,
        col=1,
    )

    # Add MACD
    fig.add_trace(
        go.Scatter(
            x=df.index, y=macd_line, name="MACD", line=dict(color="blue", width=2)
        ),
        row=2,
        col=1,
    )

    fig.add_trace(
        go.Scatter(
            x=df.index, y=signal_line, name="Signal", line=dict(color="red", width=2)
        ),
        row=2,
        col=1,
    )

    # Add histogram
    colors = ["green" if val >= 0 else "red" for val in histogram]
    fig.add_trace(
        go.Bar(x=df.index, y=histogram, name="Histogram", marker_color=colors),
        row=2,
        col=1,
    )

    fig.update_layout(
        title=f"{symbol} - MACD Analysis",
        xaxis2_title="Date",
        yaxis_title="Price",
        yaxis2_title="MACD",
        height=800,
        hovermode="x unified",
        showlegend=True,
    )

    return fig


def calculate_stochastic_oscillator(df, period=14, smooth_k=3, smooth_d=3):
    """Calculate Stochastic Oscillator"""
    low_min = df["Low"].rolling(window=period).min()
    high_max = df["High"].rolling(window=period).max()

    k_percent = 100 * ((df["Close"] - low_min) / (high_max - low_min))
    k_percent = k_percent.rolling(window=smooth_k).mean()
    d_percent = k_percent.rolling(window=smooth_d).mean()

    return k_percent, d_percent


def plot_stochastic(symbol):
    """Plot price with Stochastic Oscillator"""
    df = stock_data[symbol].copy()
    df = df[~df.index.duplicated(keep="last")]

    k_percent, d_percent = calculate_stochastic_oscillator(df)

    # Create subplots
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=True,
        vertical_spacing=0.03,
        subplot_titles=(f"{symbol} Price", "Stochastic Oscillator"),
        row_heights=[0.7, 0.3],
    )

    # Add price
    fig.add_trace(
        go.Scatter(x=df.index, y=df["Close"], name="Price", line=dict(color="blue")),
        row=1,
        col=1,
    )

    # Add Stochastic
    fig.add_trace(
        go.Scatter(x=df.index, y=k_percent, name="%K", line=dict(color="blue")),
        row=2,
        col=1,
    )

    fig.add_trace(
        go.Scatter(x=df.index, y=d_percent, name="%D", line=dict(color="red")),
        row=2,
        col=1,
    )

    # Add overbought/oversold lines
    fig.add_hline(y=80, line_dash="dash", line_color="red", row=2, col=1, opacity=0.5)
    fig.add_hline(y=20, line_dash="dash", line_color="green", row=2, col=1, opacity=0.5)

    fig.update_layout(
        title=f"{symbol} - Stochastic Oscillator",
        xaxis2_title="Date",
        yaxis_title="Price",
        yaxis2_title="Stochastic %",
        height=800,
        hovermode="x unified",
    )

    return fig


print("✅ Advanced technical indicators defined (MACD, Stochastic Oscillator)")

✅ Advanced technical indicators defined (MACD, Stochastic Oscillator)


## Portfolio Optimization - Efficient Frontier

Modern Portfolio Theory (MPT) analysis to find optimal portfolio allocations.

In [22]:
def calculate_efficient_frontier(num_portfolios=5000, risk_free_rate=0.05):
    """
    Calculate the efficient frontier using Monte Carlo simulation

    Parameters:
    -----------
    num_portfolios : int
        Number of random portfolios to generate
    risk_free_rate : float
        Annual risk-free rate for Sharpe ratio calculation

    Returns:
    --------
    results_df : DataFrame
        Portfolio allocations with returns, volatility, and Sharpe ratios
    """
    # Get returns data for stocks with sufficient history
    returns_dict = {}
    for symbol, df in stock_data.items():
        df_clean = df[~df.index.duplicated(keep="last")]
        if len(df_clean) >= 252:  # At least 1 year of data
            returns = df_clean["Close"].pct_change(fill_method=None).dropna()
            returns_dict[symbol] = returns

    if len(returns_dict) < 2:
        print("Not enough stocks with sufficient data for portfolio optimization")
        return None

    # Align returns on common dates
    returns_df = pd.DataFrame(returns_dict)
    returns_df = returns_df.dropna(how="all")

    # Drop stocks with too many missing values
    threshold = len(returns_df) * 0.8  # Keep stocks with at least 80% data
    returns_df = returns_df.dropna(thresh=threshold, axis=1)

    # Forward fill remaining gaps (max 5 days)
    returns_df = returns_df.ffill(limit=5).dropna()

    if len(returns_df.columns) < 2:
        print("Not enough stocks after data cleaning")
        return None

    print(f"Optimizing portfolios with {len(returns_df.columns)} stocks")
    print(f"Stocks included: {list(returns_df.columns)}")

    # Calculate expected returns and covariance
    mean_returns = returns_df.mean() * 252  # Annualized
    cov_matrix = returns_df.cov() * 252  # Annualized

    # Storage for results
    results = np.zeros((num_portfolios, 3 + len(returns_df.columns)))

    np.random.seed(42)  # For reproducibility

    for i in range(num_portfolios):
        # Generate random weights
        weights = np.random.random(len(returns_df.columns))
        weights /= np.sum(weights)  # Normalize to sum to 1

        # Calculate portfolio metrics
        portfolio_return = np.sum(mean_returns * weights)
        portfolio_std = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
        sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_std

        # Store results
        results[i, 0] = portfolio_return
        results[i, 1] = portfolio_std
        results[i, 2] = sharpe_ratio
        results[i, 3:] = weights

    # Create DataFrame
    columns = ["Return", "Volatility", "Sharpe_Ratio"] + list(returns_df.columns)
    results_df = pd.DataFrame(results, columns=columns)

    return results_df, returns_df.columns


def plot_efficient_frontier(results_df, stock_names):
    """Plot the efficient frontier with key portfolios highlighted"""
    if results_df is None:
        return None

    # Find special portfolios
    max_sharpe_idx = results_df["Sharpe_Ratio"].idxmax()
    min_vol_idx = results_df["Volatility"].idxmin()
    max_return_idx = results_df["Return"].idxmax()

    max_sharpe_portfolio = results_df.loc[max_sharpe_idx]
    min_vol_portfolio = results_df.loc[min_vol_idx]
    max_return_portfolio = results_df.loc[max_return_idx]

    # Create scatter plot
    fig = go.Figure()

    # Add all portfolios
    fig.add_trace(
        go.Scatter(
            x=results_df["Volatility"] * 100,
            y=results_df["Return"] * 100,
            mode="markers",
            marker=dict(
                size=3,
                color=results_df["Sharpe_Ratio"],
                colorscale="Viridis",
                showscale=True,
                colorbar=dict(title="Sharpe<br>Ratio"),
            ),
            name="Portfolios",
            text=results_df["Sharpe_Ratio"].round(3),
            hovertemplate="Return: %{y:.2f}%<br>Volatility: %{x:.2f}%<br>Sharpe: %{text}<extra></extra>",
        )
    )

    # Highlight maximum Sharpe ratio portfolio
    fig.add_trace(
        go.Scatter(
            x=[max_sharpe_portfolio["Volatility"] * 100],
            y=[max_sharpe_portfolio["Return"] * 100],
            mode="markers",
            marker=dict(
                size=20, color="red", symbol="star", line=dict(width=2, color="white")
            ),
            name="Max Sharpe Ratio",
            hovertemplate=f"<b>Max Sharpe Portfolio</b><br>Return: {max_sharpe_portfolio['Return']*100:.2f}%<br>Volatility: {max_sharpe_portfolio['Volatility']*100:.2f}%<br>Sharpe: {max_sharpe_portfolio['Sharpe_Ratio']:.3f}<extra></extra>",
        )
    )

    # Highlight minimum volatility portfolio
    fig.add_trace(
        go.Scatter(
            x=[min_vol_portfolio["Volatility"] * 100],
            y=[min_vol_portfolio["Return"] * 100],
            mode="markers",
            marker=dict(
                size=20,
                color="green",
                symbol="diamond",
                line=dict(width=2, color="white"),
            ),
            name="Min Volatility",
            hovertemplate=f"<b>Min Volatility Portfolio</b><br>Return: {min_vol_portfolio['Return']*100:.2f}%<br>Volatility: {min_vol_portfolio['Volatility']*100:.2f}%<br>Sharpe: {min_vol_portfolio['Sharpe_Ratio']:.3f}<extra></extra>",
        )
    )

    fig.update_layout(
        title="GSE Portfolio Efficient Frontier",
        xaxis_title="Volatility (Risk) %",
        yaxis_title="Expected Annual Return %",
        height=700,
        width=900,
        hovermode="closest",
        showlegend=True,
    )

    # Print portfolio allocations
    print("\n" + "=" * 80)
    print("OPTIMAL PORTFOLIO ALLOCATIONS")
    print("=" * 80)

    print("\n🏆 Maximum Sharpe Ratio Portfolio:")
    print(f"   Expected Return: {max_sharpe_portfolio['Return']*100:.2f}%")
    print(f"   Volatility: {max_sharpe_portfolio['Volatility']*100:.2f}%")
    print(f"   Sharpe Ratio: {max_sharpe_portfolio['Sharpe_Ratio']:.3f}")
    print("\n   Allocations:")
    for stock in stock_names:
        weight = max_sharpe_portfolio[stock]
        if weight > 0.01:  # Only show weights > 1%
            print(f"   {stock}: {weight*100:.2f}%")

    print("\n💎 Minimum Volatility Portfolio:")
    print(f"   Expected Return: {min_vol_portfolio['Return']*100:.2f}%")
    print(f"   Volatility: {min_vol_portfolio['Volatility']*100:.2f}%")
    print(f"   Sharpe Ratio: {min_vol_portfolio['Sharpe_Ratio']:.3f}")
    print("\n   Allocations:")
    for stock in stock_names:
        weight = min_vol_portfolio[stock]
        if weight > 0.01:
            print(f"   {stock}: {weight*100:.2f}%")

    print("=" * 80)

    return fig


print("✅ Portfolio optimization functions defined")

✅ Portfolio optimization functions defined


In [23]:
# Example: MACD Analysis
print("MACD Analysis Example:")
fig = plot_macd("MTNGH")
fig.show()

MACD Analysis Example:


In [24]:
# Example: Stochastic Oscillator
print("\nStochastic Oscillator Example:")
fig = plot_stochastic("MTNGH")
fig.show()


Stochastic Oscillator Example:


In [25]:
# Calculate and plot Efficient Frontier
print("\nCalculating Efficient Frontier...")
results_df, stock_names = calculate_efficient_frontier(num_portfolios=5000)

if results_df is not None:
    fig = plot_efficient_frontier(results_df, stock_names)
    if fig:
        fig.show()
        # Save to charts
        fig.write_html("../charts/efficient_frontier.html")
        print("\n✅ Efficient Frontier chart saved to charts/efficient_frontier.html")


Calculating Efficient Frontier...
Optimizing portfolios with 20 stocks
Stocks included: ['SCB', 'CLYD', 'AGA', 'SIC', 'TOTAL', 'UNIL', 'GOIL', 'BOPP', 'PBC', 'CMLT', 'TBL', 'CAL', 'ETI', 'GCB', 'SCB PREF', 'FML', 'CPC', 'SWL', 'EGL', 'GGBL']
Optimizing portfolios with 20 stocks
Stocks included: ['SCB', 'CLYD', 'AGA', 'SIC', 'TOTAL', 'UNIL', 'GOIL', 'BOPP', 'PBC', 'CMLT', 'TBL', 'CAL', 'ETI', 'GCB', 'SCB PREF', 'FML', 'CPC', 'SWL', 'EGL', 'GGBL']

OPTIMAL PORTFOLIO ALLOCATIONS

🏆 Maximum Sharpe Ratio Portfolio:
   Expected Return: 16.08%
   Volatility: 12.21%
   Sharpe Ratio: 0.908

   Allocations:
   SCB: 2.01%
   CLYD: 1.23%
   AGA: 2.79%
   SIC: 4.46%
   TOTAL: 5.58%
   GOIL: 11.39%
   BOPP: 8.40%
   PBC: 3.79%
   CMLT: 3.74%
   TBL: 5.26%
   CAL: 7.35%
   ETI: 1.72%
   GCB: 2.22%
   SCB PREF: 10.08%
   FML: 3.01%
   CPC: 4.46%
   SWL: 10.22%
   EGL: 7.78%
   GGBL: 4.01%

💎 Minimum Volatility Portfolio:
   Expected Return: 4.35%
   Volatility: 7.09%
   Sharpe Ratio: -0.092

   Alloc


✅ Efficient Frontier chart saved to charts/efficient_frontier.html


## 🎉 Analysis Complete!

This notebook provides comprehensive analysis of the Ghana Stock Exchange including:

### ✅ Completed Analyses:
1. **37 stocks** from the GSE analyzed over historical data (2007-2025)
2. **Risk-Return Analysis** with Sharpe ratios for all stocks
3. **Beta Calculations** showing market sensitivity
4. **Correlation Analysis** across 24 stocks with sufficient data
5. **Sector Performance** comparison across 9 major sectors
6. **Technical Indicators**: Bollinger Bands, RSI, MFI, MACD, Stochastic Oscillator
7. **Portfolio Optimization**: Efficient Frontier with 5,000 simulated portfolios
8. **Interactive Visualizations**: 9 HTML charts exported to `/charts` directory

### 📊 Key Findings:
- **Best Risk-Adjusted Returns**: MTNGH, CPC, BOPP, GCB, ACCESS
- **Optimal Portfolio**: 16.08% return, 12.21% volatility, 0.908 Sharpe ratio
- **Market Characteristics**: Low average correlation (good diversification opportunities)
- **Defensive Stocks**: Banking sector with β < 0.1

### 📁 All Charts Exported:
All interactive visualizations have been saved to `charts/` directory for easy sharing and documentation.

**Next Steps**: Update your GitHub repository with these findings!

In [26]:
# Calculate and display market summary for different time periods

# Get the latest date in our dataset
latest_date = max(df.index.max() for df in stock_data.values() if not df.empty)

# Define time periods
one_year_ago = latest_date - pd.DateOffset(years=1)
six_months_ago = latest_date - pd.DateOffset(months=6)
one_month_ago = latest_date - pd.DateOffset(months=1)

# Calculate summaries
print("=== Market Performance Summary ===")
print("\nLast Month Performance:")
monthly_summary = calculate_market_summary(one_month_ago, latest_date)
print(monthly_summary.sort_values("Total_Return", ascending=False).head())

print("\nLast 6 Months Performance:")
half_year_summary = calculate_market_summary(six_months_ago, latest_date)
print(half_year_summary.sort_values("Total_Return", ascending=False).head())

print("\nOne Year Performance:")
yearly_summary = calculate_market_summary(one_year_ago, latest_date)
print(yearly_summary.sort_values("Total_Return", ascending=False).head())

=== Market Performance Summary ===

Last Month Performance:
        Total_Return  Annualized_Return  Daily_Volatility  \
CLYD      129.411765      611496.088661          4.172903   
SOGEGH     97.115385      124227.006917          4.338614   
FML        46.788991        5527.060564          3.005412   
CPC        33.333333        1950.485650          6.950480   
GGBL       20.658135         618.363730          2.402202   

        Annualized_Volatility  Sharpe_Ratio  Max_Drawdown  Avg_Daily_Volume  \
CLYD                66.242779     14.287988           0.0       4171.750000   
SOGEGH              68.873367     11.268891           0.0      20540.391304   
FML                 47.709436      9.106968           0.0      61596.958333   
CPC                110.335457      3.310064           0.0       5666.666667   
GGBL                38.133768      5.590988           0.0      53670.562500   

        Volume_Trend  
CLYD             NaN  
SOGEGH           NaN  
FML     -3982.411739  
CPC   

In [27]:
# Analyze and visualize sector performance
print("\n=== Sector Analysis ===")
sector_perf = analyze_sector_performance(sector_mapping)
print("\nSector Performance Summary:")
print(sector_perf)

# Plot sector performance
fig = plot_sector_performance()
fig.show()

# Calculate and display correlations between sectors
sector_returns = {}
for sector, symbols in sector_mapping.items():
    sector_prices = {}
    for symbol in symbols:
        if symbol in stock_data:
            df = stock_data[symbol]
            if not df.empty:
                # Remove duplicates
                df_clean = df[~df.index.duplicated(keep="last")]
                prices = df_clean["Close"]
                sector_prices[symbol] = prices

    if sector_prices:
        # Calculate average sector price using DataFrame
        try:
            sector_price_df = pd.DataFrame(sector_prices)
            avg_sector_price = sector_price_df.mean(axis=1)
            sector_returns[sector] = avg_sector_price.pct_change(fill_method=None)
        except ValueError as e:
            print(f"Skipping {sector} correlation due to data issues: {e}")
            continue

if len(sector_returns) > 1:
    sector_corr = pd.DataFrame(sector_returns).corr()

    # Plot sector correlations
    fig = px.imshow(
        sector_corr,
        title="GSE Sector Correlations",
        labels=dict(color="Correlation"),
        color_continuous_scale="RdBu",
        aspect="auto",
        zmin=-1,
        zmax=1,
    )
    fig.update_layout(height=600, width=600, title_x=0.5)
    fig.show()
else:
    print("Not enough sectors with valid data to calculate correlations")


=== Sector Analysis ===

Sector Performance Summary:
                    Avg_Daily_Return  Volatility  Num_Companies
Banking                     0.068401    2.657461              7
Insurance                   0.108914    4.251969              2
Telecom                     0.177076    4.373992              1
Consumer Goods              0.085879    3.132771              3
Oil & Gas                   0.103161    3.091128              2
Agriculture                 0.163957    4.340298              1
Financial Services          0.061999    2.885531              3


In [28]:
# Plot candlestick chart for MTNGH
plot_stock_candlestick("MTNGH")

In [29]:
# Plot market overview
plot_market_overview()

In [30]:
# Calculate and plot correlation matrix
correlation_matrix = calculate_correlation_matrix()
if not correlation_matrix.empty:
    print(f"Successfully calculated correlations for {len(correlation_matrix)} stocks")
    fig = plot_correlation_heatmap()
    if fig is not None:
        fig.show()
else:
    print("Could not generate correlation matrix: insufficient data")

Successfully calculated correlations for 24 stocks
Generated correlation matrix for 24 stocks


In [31]:
# Diagnostic information
def print_data_stats():
    print("Stock data summary:")
    for symbol, df in stock_data.items():
        prices = df[~df.index.duplicated(keep="last")]["Close"]
        valid_prices = prices[~prices.isnull()]
        print(
            f"{symbol}: {len(valid_prices)} valid prices, Range: {valid_prices.index.min()} to {valid_prices.index.max()}"
        )

In [32]:
# Run the diagnostic
print_data_stats()

Stock data summary:
SCB: 4495 valid prices, Range: 2007-07-03 00:00:00 to 2025-11-07 00:00:00
ASG: 822 valid prices, Range: 2022-06-29 00:00:00 to 2025-11-07 00:00:00
CLYD: 4497 valid prices, Range: 2007-07-03 00:00:00 to 2025-11-07 00:00:00
HORDS: 2519 valid prices, Range: 2015-08-11 00:00:00 to 2025-11-07 00:00:00
AGA: 4496 valid prices, Range: 2007-07-03 00:00:00 to 2025-11-07 00:00:00
MTNGH: 1758 valid prices, Range: 2018-09-05 00:00:00 to 2025-11-07 00:00:00
SIC: 4355 valid prices, Range: 2008-01-25 00:00:00 to 2025-11-07 00:00:00
TOTAL: 4485 valid prices, Range: 2007-07-03 00:00:00 to 2025-11-07 00:00:00
UNIL: 4494 valid prices, Range: 2007-07-03 00:00:00 to 2025-11-07 00:00:00
GOIL: 4401 valid prices, Range: 2007-11-16 00:00:00 to 2025-11-07 00:00:00
DASPHARMA: 1424 valid prices, Range: 2020-01-15 00:00:00 to 2025-11-07 00:00:00
MMH: 2542 valid prices, Range: 2015-07-07 00:00:00 to 2025-11-07 00:00:00
ALW: 3350 valid prices, Range: 2007-07-03 00:00:00 to 2023-10-03 00:00:00
BOPP

In [33]:
# More detailed diagnostics
print("\nDetailed correlation diagnostics:")
# Get price data
price_dict = {}
for symbol, df in stock_data.items():
    prices = df[~df.index.duplicated(keep="last")]["Close"]
    if len(prices) > 0:
        price_dict[symbol] = prices

if price_dict:
    print(f"\nNumber of stocks with data: {len(price_dict)}")
    price_df = pd.DataFrame(price_dict)
    print(f"Shape of price DataFrame: {price_df.shape}")
    print(f"Number of non-null values:\n{price_df.count()}")
    print("\nDate range:", price_df.index.min(), "to", price_df.index.max())
else:
    print("No valid price data found")


Detailed correlation diagnostics:

Number of stocks with data: 37
Shape of price DataFrame: (4499, 37)
Number of non-null values:
SCB          4495
ASG           822
CLYD         4497
HORDS        2519
AGA          4496
MTNGH        1758
SIC          4355
TOTAL        4485
UNIL         4494
GOIL         4401
DASPHARMA    1424
MMH          2542
ALW          3350
BOPP         4496
PBC          4004
CMLT         4497
TLW          3513
RBGH         1849
DIGICUT      1857
TBL          4496
CAL          4494
MAC          2838
ETI          4496
GCB          3867
SCB PREF     4178
ALLGH         370
ACCESS       2173
FML          4496
CPC          4497
SWL          3866
EGL          3669
SOGEGH       3038
EGH          2360
SAMBA        2580
GGBL         4496
ADB          2184
IIL          2431
dtype: int64

Date range: 2007-07-03 00:00:00 to 2025-11-07 00:00:00


In [34]:
# Debug correlation calculation
min_required_data_points = 50
price_dict = {}

for symbol, df in stock_data.items():
    prices = df[~df.index.duplicated(keep="last")]["Close"]
    if len(prices) >= min_required_data_points:
        price_dict[symbol] = prices

if price_dict:
    print(f"\nStocks with {min_required_data_points}+ data points: {len(price_dict)}")
    price_df = pd.DataFrame(price_dict)
    print(f"Initial price_df shape: {price_df.shape}")

    # Forward fill
    price_df = price_df.ffill(limit=5)
    print(f"After ffill shape: {price_df.shape}")

    # Calculate returns
    returns_df = price_df.pct_change(fill_method=None)
    returns_df = returns_df.dropna(how="all")
    print(f"Returns shape: {returns_df.shape}")

    # Check correlation matrix
    if len(returns_df) > min_required_data_points:
        corr_matrix = returns_df.corr(
            method="pearson", min_periods=min_required_data_points
        )
        print(f"Correlation matrix shape: {corr_matrix.shape}")

        # Check valid correlations
        min_correlations = float(len(corr_matrix.columns) * 0.25)  # Convert to float
        correlation_counts = corr_matrix.notna().sum()  # Count non-NA values
        valid_stocks = correlation_counts[correlation_counts >= min_correlations].index
        print(f"Stocks with enough correlations: {len(valid_stocks)}")
else:
    print("No stocks with enough data points")


Stocks with 50+ data points: 37
Initial price_df shape: (4499, 37)
After ffill shape: (4499, 37)
Returns shape: (4498, 37)
Correlation matrix shape: (37, 37)
Stocks with enough correlations: 36
