# Random Variable

### This code for run in Google Colab

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# --- 1. Load Historical Data ---
# List of assets to analyze
tickers = ["AAPL", "JNJ", "JPM", "XOM"]
base_url = "https://raw.githubusercontent.com/tranmaithang/tuhocquantfinance/master/assets/files/market_data/"

all_data = []

print("Loading data from public URL...")
for ticker in tickers:
    # Construct the full URL for each CSV file
    url = f"{base_url}{ticker}.csv"
    try:
        # Load the CSV file directly from the URL
        temp_df = pd.read_csv(url)
        # Ensure Date is datetime if loading from CSV
        temp_df['timestamp'] = pd.to_datetime(temp_df['timestamp'])
        temp_df['ticker'] = ticker
        all_data.append(temp_df)
        print(f"Loaded {ticker} successfully!")
    except Exception as e:
        print(f"Error loading {ticker}: {e}")

# Combine all individual dataframes into one master dataframe
if all_data:
    # Use ignore_index=True to create a fresh index for the merged data
    df = pd.concat(all_data, ignore_index=True)
else:
    print("No data was loaded. Please check the URLs or internet connection.")

# Transform to Wide Format for Finance Analysis
df_pivot = df.pivot(index='timestamp', columns='ticker', values='close')

Loading data from public URL...
Loaded AAPL successfully!
Loaded JNJ successfully!
Loaded JPM successfully!
Loaded XOM successfully!


### This code for run in Local

In [2]:
import numpy as np
import pandas as pd
import os

# --- 1. Load Historical Data ---
tickers = ["AAPL", "JNJ", "JPM", "XOM"]
folder_name = "market_data"
all_data = []

# Load only specified files from the local folder
print(f"Loading tickers from: {folder_name}...")

for ticker in tickers:
    # Construct the file path for each ticker
    file_name = f"{ticker}.csv"
    file_path = os.path.join(folder_name, file_name)
    
    # Check if the file exists before attempting to read it
    if os.path.exists(file_path):
        try:
            # Read the CSV file
            temp_df = pd.read_csv(file_path)
             # Ensure Date is datetime if loading from CSV
            temp_df['timestamp'] = pd.to_datetime(temp_df['timestamp'])
            temp_df['ticker'] = ticker
            all_data.append(temp_df)
            print(f"Successfully loaded: {ticker}")
        except Exception as e:
            print(f"Error reading {file_name}: {e}")
    else:
        # Inform the user if a requested ticker file is missing
        print(f"Warning: File {file_name} not found in '{folder_name}'. Skipping.")

# Combine all individual dataframes into one master dataframe
if all_data:
    # Use ignore_index=True to create a fresh index for the merged data
    df = pd.concat(all_data, ignore_index=True)
else:
    print("No data was loaded. Please check the URLs or internet connection.")

# Transform to Wide Format for Finance Analysis
df_pivot = df.pivot(index='timestamp', columns='ticker', values='close')

Loading tickers from: market_data...
Successfully loaded: AAPL
Successfully loaded: JNJ
Successfully loaded: JPM
Successfully loaded: XOM


In [3]:
# Inspect data
df_pivot

ticker,AAPL,JNJ,JPM,XOM
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-01-16 05:00:00,183.63,160.52,167.99,97.69
2024-01-17 05:00:00,182.68,160.43,167.09,96.98
2024-01-18 05:00:00,188.63,161.21,167.42,96.80
2024-01-19 05:00:00,191.56,161.68,170.31,96.95
2024-01-22 05:00:00,193.89,162.47,170.11,96.82
...,...,...,...,...
2026-01-08 05:00:00,259.04,205.75,329.79,122.91
2026-01-09 05:00:00,259.37,204.39,329.19,124.61
2026-01-12 05:00:00,260.25,209.72,324.49,124.03
2026-01-13 05:00:00,261.05,213.65,310.90,126.54


In [4]:
# --- 2. EXPECTED RETURN (E[R]) ---
# Calculate Log Returns: ln(P_t / P_{t-1})
log_returns = np.log(df_pivot / df_pivot.shift(1)).dropna()

# Method 1: Mathematical Definition E[X] = sum(X) / n
expected_return_def = log_returns.sum() / len(log_returns)

# Method 2: Python built-in function
expected_return_func = log_returns.mean()

print("--- Comparison: Expected Returns (Annually) ---")
print(expected_return_func * 252)

--- Comparison: Expected Returns (Annually) ---
ticker
AAPL    0.174843
JNJ     0.155222
JPM     0.304700
XOM     0.144496
dtype: float64


In [5]:
# --- 3. VARIANCE & STD DEV ---
# Method 1: Mathematical Definition Var(X) = E[X^2] - (E[X])^2
variance_def = (log_returns**2).mean() - (log_returns.mean())**2

# Method 2: Python built-in function
# Note: ddof=0 is used to match the population variance definition in math
variance_func = log_returns.var(ddof=0)

print("--- Comparison: Volatility (Annually) ---")
print(np.sqrt(variance_func) * np.sqrt(252))

--- Comparison: Volatility (Annually) ---
ticker
AAPL    0.276182
JNJ     0.177937
JPM     0.245571
XOM     0.219428
dtype: float64


In [6]:
# --- 4. COVARIANCE & CORRELATION MATRIX ---
# Calculate the Covariance Matrix
cov_matrix = log_returns.cov()

# Calculate the Correlation Matrix
cor_matrix = log_returns.corr()

print("\n--- Covariance Matrix ---")
print(cov_matrix)
print("\n--- Correlation Matrix ---")
print(cor_matrix)


--- Covariance Matrix ---
ticker      AAPL       JNJ       JPM       XOM
ticker                                        
AAPL    0.000303  0.000008  0.000083  0.000056
JNJ     0.000008  0.000126  0.000019  0.000024
JPM     0.000083  0.000019  0.000240  0.000070
XOM     0.000056  0.000024  0.000070  0.000191

--- Correlation Matrix ---
ticker      AAPL       JNJ       JPM       XOM
ticker                                        
AAPL    1.000000  0.039730  0.307609  0.233967
JNJ     0.039730  1.000000  0.111412  0.157417
JPM     0.307609  0.111412  1.000000  0.328261
XOM     0.233967  0.157417  0.328261  1.000000


In [7]:
# --- 5. PORTFOLIO RISK CALCULATION ---
# Define weights (25% for each stock)
weights = np.array([0.25, 0.25, 0.25, 0.25])

# Formula: Var_p = w.T * Sigma * w
portfolio_variance_daily = weights.T @ cov_matrix @ weights
portfolio_volatility_daily = np.sqrt(portfolio_variance_daily)

# Annualize the volatility
portfolio_volatility_annual = portfolio_volatility_daily * np.sqrt(252)
print(f"Annualized Portfolio Volatility: {portfolio_volatility_annual:.4f}")

# Compare with individual volatilities
individual_vols = np.sqrt(np.diag(cov_matrix)) * np.sqrt(252)
print("\nIndividual Annualized Volatilities:")
print(individual_vols)

Annualized Portfolio Volatility: 0.1476

Individual Annualized Volatilities:
[0.27645808 0.17811493 0.24581655 0.21964695]
