# Portfolio Management Analysis

## Introduction to Portfolio Management - Coursera Course

This notebook contains analysis and exercises for the Introduction to Portfolio Management course.

### Learning Objectives:
- Understand portfolio theory fundamentals
- Analyze risk and return relationships
- Implement portfolio optimization techniques
- Evaluate portfolio performance metrics

---


## 1. Import Required Libraries

First, let's import the necessary Python libraries for our analysis.


In [None]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Financial analysis
import yfinance as yf  # For downloading financial data
from scipy import stats
from scipy.optimize import minimize

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

print("Libraries imported successfully!")
print(f"Pandas version: {pd.__version__}")
print(f"NumPy version: {np.__version__}")


: 

## 2. Load and Explore Data

Let's load the financial data files and explore their structure.


In [None]:
# Load sample price data
sample_prices = pd.read_csv('data/sample_prices.csv', index_col=0, parse_dates=True)

# Display basic information about the data
print("Sample Prices Data:")
print(f"Shape: {sample_prices.shape}")
print(f"Date range: {sample_prices.index.min()} to {sample_prices.index.max()}")
print("\nFirst few rows:")
print(sample_prices.head())

print("\nData types:")
print(sample_prices.dtypes)

print("\nBasic statistics:")
print(sample_prices.describe())


## 3. Calculate Returns

Calculate simple and logarithmic returns from price data.


In [None]:
# Calculate simple returns
simple_returns = sample_prices.pct_change().dropna()

# Calculate log returns
log_returns = np.log(sample_prices / sample_prices.shift(1)).dropna()

print("Simple Returns:")
print(simple_returns.head())

print("\nLog Returns:")
print(log_returns.head())

print("\nReturns Statistics:")
print(simple_returns.describe())


## 4. Risk and Return Analysis

Analyze the risk-return characteristics of individual assets.


In [None]:
# Calculate annualized returns and volatility
annualized_returns = simple_returns.mean() * 252  # Assuming 252 trading days
annualized_volatility = simple_returns.std() * np.sqrt(252)

# Create a summary dataframe
risk_return_summary = pd.DataFrame({
    'Annualized Return': annualized_returns,
    'Annualized Volatility': annualized_volatility,
    'Sharpe Ratio': annualized_returns / annualized_volatility
})

print("Risk-Return Summary:")
print(risk_return_summary.round(4))

# Visualize risk vs return
plt.figure(figsize=(10, 6))
plt.scatter(risk_return_summary['Annualized Volatility'], 
           risk_return_summary['Annualized Return'],
           s=100, alpha=0.7)

# Add labels for each point
for i, asset in enumerate(risk_return_summary.index):
    plt.annotate(asset, 
                (risk_return_summary['Annualized Volatility'].iloc[i],
                 risk_return_summary['Annualized Return'].iloc[i]),
                xytext=(5, 5), textcoords='offset points')

plt.xlabel('Annualized Volatility')
plt.ylabel('Annualized Return')
plt.title('Risk vs Return Analysis')
plt.grid(True, alpha=0.3)
plt.show()


## 5. Portfolio Optimization

Implement basic portfolio optimization techniques using modern portfolio theory.


In [None]:
# Portfolio optimization functions
def portfolio_performance(weights, returns, cov_matrix):
    """
    Calculate portfolio performance metrics
    """
    portfolio_return = np.sum(weights * returns.mean()) * 252
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix * 252, weights)))
    sharpe_ratio = portfolio_return / portfolio_volatility
    return portfolio_return, portfolio_volatility, sharpe_ratio

def negative_sharpe_ratio(weights, returns, cov_matrix):
    """
    Negative Sharpe ratio for minimization
    """
    return -portfolio_performance(weights, returns, cov_matrix)[2]

# Equal weight portfolio
n_assets = len(simple_returns.columns)
equal_weights = np.array([1/n_assets] * n_assets)

# Calculate covariance matrix
cov_matrix = simple_returns.cov()

# Equal weight portfolio performance
ew_return, ew_volatility, ew_sharpe = portfolio_performance(
    equal_weights, simple_returns, cov_matrix
)

print("Equal Weight Portfolio:")
print(f"Return: {ew_return:.4f}")
print(f"Volatility: {ew_volatility:.4f}")
print(f"Sharpe Ratio: {ew_sharpe:.4f}")

# Optimize portfolio for maximum Sharpe ratio
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
bounds = tuple((0, 1) for _ in range(n_assets))

optimal_result = minimize(negative_sharpe_ratio, 
                         equal_weights, 
                         args=(simple_returns, cov_matrix),
                         method='SLSQP', 
                         bounds=bounds, 
                         constraints=constraints)

optimal_weights = optimal_result.x
opt_return, opt_volatility, opt_sharpe = portfolio_performance(
    optimal_weights, simple_returns, cov_matrix
)

print("\nOptimal Portfolio:")
print(f"Return: {opt_return:.4f}")
print(f"Volatility: {opt_volatility:.4f}")
print(f"Sharpe Ratio: {opt_sharpe:.4f}")

print("\nOptimal Weights:")
for asset, weight in zip(simple_returns.columns, optimal_weights):
    print(f"{asset}: {weight:.4f}")


## 6. Portfolio Data Analysis - Lo 20 and Hi 20 Quintiles

Analysis of the Portfolios_Formed_on_ME_monthly_EW.csv data focusing on the lowest and highest quintile portfolios (Lo 20 and Hi 20).

In [None]:
# Compare Lo 20 and Hi 20 portfolios
if 'Lo 20' in portfolio_df.columns and 'Hi 20' in portfolio_df.columns:
    print("\n" + "="*60)
    print("PORTFOLIO COMPARISON SUMMARY")
    print("="*60)
    
    # Create comparison table
    comparison_df = pd.DataFrame({
        'Lo 20 (Lowest Quintile)': [
            f"{lo20_results['annualized_geometric']*100:.4f}%",
            f"{lo20_results['annualized_volatility']*100:.4f}%",
            f"{lo20_results['observations']} months"
        ],
        'Hi 20 (Highest Quintile)': [
            f"{hi20_results['annualized_geometric']*100:.4f}%",
            f"{hi20_results['annualized_volatility']*100:.4f}%",
            f"{hi20_results['observations']} months"
        ]
    }, index=['Annualized Return', 'Annualized Volatility', 'Data Points'])
    
    print(comparison_df)
    
    # Calculate size premium (small minus big)
    size_premium = lo20_results['annualized_geometric'] - hi20_results['annualized_geometric']
    print(f"\nSize Premium (Lo 20 - Hi 20): {size_premium*100:.4f}%")
    print(f"This represents the additional return from investing in small-cap stocks vs large-cap stocks.")

# Visualize the returns over time
plt.figure(figsize=(15, 8))

if 'Lo 20' in portfolio_df.columns and 'Hi 20' in portfolio_df.columns:
    # Clean the data
    lo20_clean = portfolio_df['Lo 20'].replace(-99.99, np.nan).dropna() / 100
    hi20_clean = portfolio_df['Hi 20'].replace(-99.99, np.nan).dropna() / 100
    
    # Calculate cumulative returns
    lo20_cumulative = (1 + lo20_clean).cumprod()
    hi20_cumulative = (1 + hi20_clean).cumprod()
    
    plt.subplot(2, 1, 1)
    plt.plot(lo20_cumulative.index, lo20_cumulative, label='Lo 20 (Small Caps)', linewidth=2)
    plt.plot(hi20_cumulative.index, hi20_cumulative, label='Hi 20 (Large Caps)', linewidth=2)
    plt.title('Cumulative Returns: Lo 20 vs Hi 20 Portfolios', fontsize=14)
    plt.ylabel('Cumulative Return (Log Scale)')
    plt.yscale('log')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.subplot(2, 1, 2)
    plt.plot(lo20_clean.index, lo20_clean * 100, label='Lo 20 Monthly Returns', alpha=0.7)
    plt.plot(hi20_clean.index, hi20_clean * 100, label='Hi 20 Monthly Returns', alpha=0.7)
    plt.title('Monthly Returns Over Time', fontsize=14)
    plt.ylabel('Monthly Return (%)')
    plt.xlabel('Date')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

In [None]:
# Calculate annualized returns for Lo 20 and Hi 20 portfolios
# The data appears to be in percentage form, so we need to convert to decimal

def calculate_annualized_return(returns_series, series_name):
    """
    Calculate annualized return using geometric mean method
    """
    # Clean data: replace missing values (-99.99) with NaN and remove them
    clean_returns = returns_series.replace(-99.99, np.nan).dropna()
    
    # Convert from percentage to decimal
    returns_decimal = clean_returns / 100
    
    print(f"\n{series_name} Portfolio Analysis:")
    print(f"Number of observations: {len(returns_decimal)}")
    print(f"Period: {len(returns_decimal)} months ({len(returns_decimal)/12:.1f} years)")
    print(f"Data range: {returns_decimal.index.min()} to {returns_decimal.index.max()}")
    
    # Method 1: Arithmetic mean annualized
    mean_monthly_return = returns_decimal.mean()
    annualized_return_arithmetic = mean_monthly_return * 12
    
    # Method 2: Geometric mean (compound) - more accurate for long periods
    compound_return = (1 + returns_decimal).prod()
    years = len(returns_decimal) / 12
    annualized_return_geometric = compound_return**(1/years) - 1
    
    # Calculate volatility
    monthly_std = returns_decimal.std()
    annualized_volatility = monthly_std * np.sqrt(12)
    
    print(f"Average monthly return: {mean_monthly_return:.6f} ({mean_monthly_return*100:.4f}%)")
    print(f"Monthly volatility: {monthly_std:.6f} ({monthly_std*100:.4f}%)")
    print(f"Annualized return (arithmetic): {annualized_return_arithmetic:.6f} ({annualized_return_arithmetic*100:.4f}%)")
    print(f"Annualized return (geometric): {annualized_return_geometric:.6f} ({annualized_return_geometric*100:.4f}%)")
    print(f"Annualized volatility: {annualized_volatility:.6f} ({annualized_volatility*100:.4f}%)")
    
    return {
        'mean_monthly': mean_monthly_return,
        'annualized_arithmetic': annualized_return_arithmetic,
        'annualized_geometric': annualized_return_geometric,
        'annualized_volatility': annualized_volatility,
        'observations': len(returns_decimal)
    }

# Analyze Lo 20 portfolio
if 'Lo 20' in portfolio_df.columns:
    lo20_results = calculate_annualized_return(portfolio_df['Lo 20'], 'Lo 20')
    print(f"\n*** ANSWER: The annualized return of the Lo 20 portfolio is {lo20_results['annualized_geometric']*100:.4f}% ***")

# Analyze Hi 20 portfolio
if 'Hi 20' in portfolio_df.columns:
    hi20_results = calculate_annualized_return(portfolio_df['Hi 20'], 'Hi 20')

In [None]:
# Load the portfolio data
portfolio_df = pd.read_csv('data/Portfolios_Formed_on_ME_monthly_EW.csv')

# Display basic info about the dataset
print("Portfolio Dataset Info:")
print(f"Shape: {portfolio_df.shape}")
print(f"\nColumn names:")
print(portfolio_df.columns.tolist())
print(f"\nFirst few rows:")
print(portfolio_df.head())

# Convert first column to date and set as index
portfolio_df.iloc[:, 0] = pd.to_datetime(portfolio_df.iloc[:, 0].astype(str), format='%Y%m')
portfolio_df.set_index(portfolio_df.columns[0], inplace=True)

print(f"\nData after processing:")
print(f"Date range: {portfolio_df.index.min()} to {portfolio_df.index.max()}")
print(f"Number of months: {len(portfolio_df)}")
print(f"Number of years: {len(portfolio_df)/12:.1f}")

# Check for Lo 20 and Hi 20 columns
print(f"\nLo 20 column exists: {'Lo 20' in portfolio_df.columns}")
print(f"Hi 20 column exists: {'Hi 20' in portfolio_df.columns}")

if 'Lo 20' in portfolio_df.columns:
    print(f"\nLo 20 sample data:")
    print(portfolio_df['Lo 20'].head(10))