# Capstone 2: ASEAN Investment Vehicle Analysis (Improved)

## 1. Data Collection
We will download historical data for selected ASEAN ETFs, REITs, and Stocks using `yfinance`.

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set_style('whitegrid')

In [None]:
tickers = {
    'Indonesia ETF': 'EIDA.JK',
    'Singapore ETF': 'EWS',
    'Malaysia ETF': 'EWM',
    'Philippines ETF': 'EPHE',
    'Vietnam ETF': 'VNM',
    'CapitaLand Integ. Comm. Trust': 'C38U.SI',
    'Ascendas REIT': 'A17U.SI',
    'DBS Group': 'D05.SI',
    'Maybank': '1155.KL',
    'BCA': 'BBCA.JK'
}

start_date = '2015-01-01'
end_date = '2024-01-01'

data = {}
for name, ticker in tickers.items():
    print(f"Downloading {name} ({ticker})...")
    try:
        df = yf.download(ticker, start=start_date, end=end_date, progress=False)
        if not df.empty:
            data[name] = df
        else:
            print(f"Failed to download {ticker}")
    except Exception as e:
        print(f"Error downloading {ticker}: {e}")

## 2. Data Cleaning & Preparation
Align dates and handle missing values.

In [None]:
# Combine Close prices
close_prices = pd.DataFrame()

for name, df in data.items():
    # Handle MultiIndex columns if present
    if isinstance(df.columns, pd.MultiIndex):
        try:
            close = df['Close'].iloc[:, 0] 
        except:
             close = df['Close']
    else:
        close = df['Close']
    
    close_prices[name] = close

# Forward fill missing data
close_prices = close_prices.ffill().dropna()

print(f"Shape: {close_prices.shape}")
close_prices.head()

## 3. Feature Engineering & Analysis

### Why these features?

We select features that capture both **Trend** and **Volatility** (Risk).

1.  **Daily Returns**: The daily percentage change in price. Fundamental for calculating volatility and comparing performance.
2.  **Volatility (Rolling Standard Deviation)**: Measures risk. Higher volatility implies higher risk. We use a 30-day window.
3.  **Sharpe Ratio (Rolling)**: Measures risk-adjusted return. A higher Sharpe ratio indicates better returns for the same level of risk.
4.  **RSI (Relative Strength Index)**: A momentum oscillator that measures the speed and change of price movements. RSI > 70 is considered overbought, RSI < 30 is oversold.
5.  **MACD (Moving Average Convergence Divergence)**: A trend-following momentum indicator that shows the relationship between two moving averages of a security's price.
    - **Signal Line crossovers** indicate buy/sell signals.

In [None]:
def calculate_technical_indicators(prices_df):
    indicators = pd.DataFrame(index=prices_df.index)
    
    # 1. Daily Returns
    indicators['Returns'] = prices_df.pct_change()
    
    # 2. Volatility (30-day rolling std dev of returns)
    indicators['Volatility'] = indicators['Returns'].rolling(window=30).std() * np.sqrt(252) # Annualized
    
    # 3. Sharpe Ratio (Annualized, assuming 0 risk-free rate for simplicity in trend view)
    # Simple rolling Average Return / Rolling Std Dev
    rolling_return = indicators['Returns'].rolling(window=30).mean() * 252
    indicators['Sharpe'] = rolling_return / indicators['Volatility']
    
    # 4. RSI (14-day)
    delta = prices_df.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    indicators['RSI'] = 100 - (100 / (1 + rs))
    
    # 5. MACD
    exp1 = prices_df.ewm(span=12, adjust=False).mean()
    exp2 = prices_df.ewm(span=26, adjust=False).mean()
    indicators['MACD'] = exp1 - exp2
    indicators['Signal_Line'] = indicators['MACD'].ewm(span=9, adjust=False).mean()
    
    return indicators

# Calculate for a sample asset (e.g., Singapore ETF)
sample_asset = 'Singapore ETF'
sample_data = close_prices[sample_asset]
features_df = calculate_technical_indicators(sample_data).dropna()

features_df.head()

## 4. Improved EDA & Visual Insights

### 4.1 Correlation Heatmap
Understanding the relationship between technical indicators helps in feature selection. Highly correlated features might be redundant.

In [None]:
plt.figure(figsize=(10, 8))
correlation_matrix = features_df[['Returns', 'Volatility', 'RSI', 'MACD', 'Sharpe']].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title(f'Feature Correlation Heatmap: {sample_asset}')
plt.show()

### 4.2 Distribution of Returns and Volatility
Let's define a simple target variable to analyze distributions: **"Attractive"** vs **"Unattractive"**.
- **Attractive**: Future 1-month return > 0
- **Unattractive**: Future 1-month return <= 0

In [None]:
# Create target label: 1 if next month (21 trading days) return is positive, else 0
features_df['Future_Return'] = sample_data.pct_change(21).shift(-21)
features_df = features_df.dropna()
features_df['Target'] = (features_df['Future_Return'] > 0).astype(int)
features_df['Label'] = features_df['Target'].map({1: 'Attractive', 0: 'Unattractive'})

# Plot Distributions
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# RSI Distribution by Class
sns.kdeplot(data=features_df, x='RSI', hue='Label', fill=True, ax=axes[0], palette='viridis')
axes[0].set_title('RSI Distribution by Class')

# Volatility Distribution by Class
sns.kdeplot(data=features_df, x='Volatility', hue='Label', fill=True, ax=axes[1], palette='magma')
axes[1].set_title('Volatility Distribution by Class')

plt.tight_layout()
plt.show()

**Insight**: 
- We look for separation in the distributions. For example, if the "Attractive" class tends to have lower RSI (buying the dip) or specific volatility characteristics.
- Overlaps indicate that a single feature might not be enough for classification, justifying the need for machine learning models.

### 4.3 Technical Indicators Overlay
Visualizing RSI and MACD buy/sell signals on the price chart.

In [None]:
# Plotting Price + RSI + MACD for the last 1 year of data
plot_data = features_df.tail(252).copy()
price_data = sample_data.loc[plot_data.index]

fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14, 12), sharex=True, gridspec_kw={'height_ratios': [2, 1, 1]})

# Price Chart
ax1.plot(price_data.index, price_data, label='Price', color='black')
ax1.set_title(f'{sample_asset} Price Trend')
ax1.legend()

# RSI Chart
ax2.plot(plot_data.index, plot_data['RSI'], label='RSI', color='purple')
ax2.axhline(70, linestyle='--', color='red', alpha=0.5, label='Overbought (70)')
ax2.axhline(30, linestyle='--', color='green', alpha=0.5, label='Oversold (30)')
ax2.set_title('Relative Strength Index (RSI)')
ax2.legend()

# MACD Chart
ax3.plot(plot_data.index, plot_data['MACD'], label='MACD', color='blue')
ax3.plot(plot_data.index, plot_data['Signal_Line'], label='Signal Line', color='orange')
ax3.bar(plot_data.index, plot_data['MACD'] - plot_data['Signal_Line'], color='gray', alpha=0.3, label='Histogram')
ax3.set_title('MACD')
ax3.legend()

plt.show()

## 5. Conclusion on Features
The visualizations confirm that features like RSI and Volatility capture different aspects of the market dynamics. The correlation matrix allows us to prune highly collinear features if necessary. The distribution plots help validate if the chosen features have discriminative power for our target labels.