<a href="https://colab.research.google.com/github/tanduong/N/blob/master/PorfolioOptimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance numpy pandas scipy matplotlib plotly joblib vnstock

In [None]:
from vnstock import Vnstock
from vnstock import Listing, Quote, Company, Finance, Trading, Screener
''

In [4]:
from joblib import Memory
import os

# Set up cache directory
memory = Memory(location='./cache', verbose=0)

@memory.cache
def get_ticker_data(ticker, start, end):
  try:
    stock = Vnstock().stock(symbol=ticker, source='VCI')
    df = stock.quote.history(start=start, end=end, interval='1D')
    data = df[['time', 'close']].copy()
    data.rename(columns={'time': 'Date', 'close': 'Close'}, inplace=True)
    data['Ticker'] = ticker
    return data
  except Exception as e:
    print(f"Error fetching data for {ticker}: {e}")
    return pd.DataFrame()

def get_data(tickers, start, end):
    data = pd.concat([get_ticker_data(ticker, start, end) for ticker in tickers])
    return data.pivot(index='Date', columns='Ticker', values='Close')

def format_weights(weights, tickers, threshold=0.001):
    nonzero = [(ticker, round(float(w), 4)) for ticker, w in zip(tickers, weights) if abs(w) > threshold]
    return ', '.join(f'{ticker}: {w:.2f}' for ticker, w in nonzero)

In [5]:
import io
import pandas as pd

custom_portfolio = pd.read_csv(io.StringIO('''
ACB,7000,$21.80,"$152,600.00",9.89%
FUEVFVND,300,$25.88,"$7,764.00",0.50%
GMD,1000,$42.20,"$42,200.00",2.74%
HPG,17500,$21.30,"$372,750.00",24.16%
MSN,0,$50.30,$0.00,0.00%
NVL,10000,$8.10,"$81,000.00",5.25%
PNJ,4000,$62.80,"$251,200.00",16.28%
REE,100,$61.00,"$6,100.00",0.40%
VCB,0,$52.50,$0.00,0.00%
VNM,12100,$52.00,"$629,200.00",40.78%
'''), header=None)

custom_portfolio = pd.DataFrame({'Ticker': custom_portfolio[0], 'Weight': custom_portfolio[4].str.replace('%', '').astype(float) / 100
})

custom_portfolio = custom_portfolio.set_index('Ticker')['Weight'].to_dict()

In [53]:
import numpy as np
import pandas as pd
import yfinance as yf
from scipy.optimize import minimize
import plotly.express as px
import plotly.graph_objects as go

custom_bounds = {
    'VCB': (0, 0.15),
}

MAX_WEIGHT = 0.1

def portfolio_performance(expected_returns, cov_matrix, weights):
    ret = np.dot(weights, expected_returns)
    vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    return ret, vol

def minimize_volatility(tickers, expected_returns, cov_matrix, target_return):
    num_assets = len(tickers)
    bounds = tuple(custom_bounds.get(ticker, (0, MAX_WEIGHT)) for ticker in tickers)

    constraints = (
        {'type': 'eq', 'fun': lambda w: np.sum(w) - 1},
        {'type': 'eq', 'fun': lambda w: np.dot(w, expected_returns) - target_return}
    )
    initial_guess = num_assets * [1. / num_assets]

    result = minimize(
        lambda w: portfolio_performance(expected_returns, cov_matrix, w)[1],
        initial_guess,
        method='SLSQP',
        bounds=bounds,
        constraints=constraints
    )
    return result

def optimize_portfolio(tickers, expected_returns, cov_matrix, target_returns):
  results = []
  for target in target_returns:
      # Step 4: Optimize performance
      res = minimize_volatility(tickers, expected_returns, cov_matrix, target)
      if res.success:
          ret, vol = portfolio_performance(expected_returns, cov_matrix, res.x)
          results.append({
              'Return': ret,
              'Volatility': vol,
              'Weights': res.x,
              **{f'{ticker}_weight': round(w, 4) for ticker, w in zip(tickers, res.x)}
          })
  return pd.DataFrame(results)

def plot_portfolios(tickers, expected_returns, cov_matrix, results, custom_portfolio={}):
  single_asset_results = []

  for i, ticker in enumerate(tickers):
      w = np.zeros(len(tickers))
      w[i] = 1.0  # 100% in one asset
      ret, vol = portfolio_performance(expected_returns, cov_matrix, w)
      single_asset_results.append({
          'Return': ret,
          'Volatility': vol,
          'Label': f'{ticker}'
      })

  single_df = pd.DataFrame(single_asset_results)

  # Plot efficient frontier
  fig = go.Figure()

  fig.add_trace(go.Scatter(
      x=results['Volatility'], y=results['Return'],
      mode='markers',
      marker=dict(size=8, color='blue', line=dict(width=1, color='DarkSlateGrey')),
      name='Efficient Frontier',
      text=[f"Weights: {format_weights(row['Weights'], tickers)}" for _, row in results.iterrows()],
      hoverinfo='text+y+x'
  ))

  # Add single-stock points
  fig.add_trace(go.Scatter(
      x=single_df['Volatility'], y=single_df['Return'],
      mode='markers+text',
      marker=dict(size=10, color='red', symbol='diamond'),
      text=single_df['Label'],
      name='Single Stocks',
      hoverinfo='text+y+x',
      textposition='top center'
  ))

  fig.update_layout(
      title='Efficient Frontier with Single Stock Portfolios',
      xaxis_title='Risk (Std Dev)',
      yaxis_title='Expected Return',
      template='plotly_white',
      height=1000,
      width=1200
  )


  if custom_portfolio:
    weights = np.array([custom_portfolio.get(t, 0.0) for t in tickers])

    # Compute performance
    custom_return = np.dot(weights, expected_returns)
    custom_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))

    fig.add_trace(go.Scatter(
        x=[custom_volatility],
        y=[custom_return],
        mode='markers+text',
        marker=dict(size=12, color='green', symbol='star'),
        name='Custom Portfolio',
        textposition='bottom center',
        hoverinfo='text+y+x',
        text=["Custom Portfolio"],
        hovertext=[', '.join(f'{ticker}: {weight*100:.0f}%' for ticker, weight in custom_portfolio.items())],
    ))

  fig.show()

def plot_cov(cov_matrix):
  import plotly.express as px

  fig = px.imshow(
      cov_matrix,
      text_auto=".2f",
      color_continuous_scale='Blues',
      title='Covariance Matrix',
      labels=dict(color='Covariance')
  )
  fig.update_layout(width=1400, height=1400)
  fig.show()

In [7]:
def top_correlated_tickers(cov_matrix, ticker, top_n=10):
    # Convert covariance matrix to correlation matrix
    std_devs = np.sqrt(np.diag(cov_matrix))
    corr_matrix = cov_matrix / np.outer(std_devs, std_devs)
    corr_df = pd.DataFrame(corr_matrix, index=cov_matrix.index, columns=cov_matrix.columns)

    # Drop self-correlation and sort
    correlations = corr_df[ticker].drop(ticker).sort_values(ascending=False)

    return correlations.head(top_n)

## VN30

In [8]:
stock = Vnstock().stock(symbol='VN30F1M', source='VCI')
vn30tickers = list(stock.listing.symbols_by_group('VN30'))

2025-04-10 15:12:24 - vnstock.common.data.data_explorer - INFO - Không phải là mã chứng khoán, thông tin công ty và tài chính không khả dụng.
INFO:vnstock.common.data.data_explorer:Không phải là mã chứng khoán, thông tin công ty và tài chính không khả dụng.


In [9]:
# Step 1: Download historical price data (using 'Close')
# tickers = ['ACB', 'PNJ', 'VNM', 'HPG', 'MWG', 'REE', 'VIC', 'VCB', 'GAS', 'BID', 'HAG']

start = '2015-12-31'
end = '2025-04-10'
data = get_data(vn30tickers, start, end)
returns = data.pct_change().dropna()

# Step 2: Calculate expected returns and covariance matrix
expected_returns = returns.mean() * 252  # annualized
cov_matrix = returns.cov() * 252         # annualized

# Step 3: Generate efficient frontier
target_returns = np.linspace(-0.05, 0.5, 50)

results = optimize_portfolio(vn30tickers, expected_returns, cov_matrix, target_returns)


📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.
Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.

Mounted at /content/drive


The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.


In [10]:
plot_portfolios(vn30tickers, expected_returns, cov_matrix, results, custom_portfolio)

In [11]:
plot_cov(cov_matrix)

In [12]:
top_correlated_tickers(cov_matrix, 'HPG', top_n=10)

Unnamed: 0_level_0,HPG
Ticker,Unnamed: 1_level_1
SSI,0.621103
MBB,0.581193
TCB,0.542661
CTG,0.541581
GVR,0.513887
ACB,0.511417
STB,0.510508
VPB,0.506596
VIB,0.500855
TPB,0.490019


In [13]:
top_correlated_tickers(cov_matrix, 'VNM', top_n=10)

Unnamed: 0_level_0,VNM
Ticker,Unnamed: 1_level_1
BVH,0.38728
FPT,0.383886
VCB,0.372956
BID,0.366963
MSN,0.358456
ACB,0.346853
VRE,0.345042
MBB,0.342484
SSI,0.337693
PLX,0.33587


In [14]:
top_correlated_tickers(cov_matrix, 'ACB', top_n=10)

Unnamed: 0_level_0,ACB
Ticker,Unnamed: 1_level_1
MBB,0.738374
TCB,0.69104
CTG,0.662392
STB,0.629827
BID,0.610802
VIB,0.59776
VPB,0.592146
LPB,0.591753
HDB,0.587338
TPB,0.583482


In [15]:
# top_correlated_tickers(cov_matrix, 'PNJ', top_n=10)

## VN100

In [16]:
tickers = list(stock.listing.symbols_by_group('VN100'))
# tickers

In [17]:
expected_returns = None
cov_matrix = None
results = None
returns = None
data = None


# Step 1: Download historical price data (using 'Close')
# Should be longer than a year, should be few years
# start = '2015-12-31'
# end = '2025-04-10'

data = get_data(tickers, start, end)
returns = data.pct_change().dropna()

# Step 2: Calculate expected returns and covariance matrix
expected_returns = returns.mean() * 252  # annualized
cov_matrix = returns.cov() * 252         # annualized

Error fetching data for IMP: Failed to fetch data: 502 - Bad Gateway



The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.



In [18]:
# Step 3: Generate efficient frontier
target_returns = np.linspace(-0.05, 0.5, 30)
tickers = list(data.columns)
results = optimize_portfolio(tickers, expected_returns, cov_matrix, target_returns)

In [19]:
plot_portfolios(tickers, expected_returns, cov_matrix, results, custom_portfolio)

In [40]:
plot_cov(cov_matrix)

In [32]:
import plotly.graph_objects as go

def plot_price(data, ticker):
  fig = go.Figure()

  fig.add_trace(go.Scatter(
      x=data.index,          # datetime index
      y=data[ticker],       # close price
      mode='lines',
      name='Price',
      line=dict(color='blue')
  ))

  fig.update_layout(
      title=f'{ticker} Price Over Time',
      xaxis_title='Date',
      yaxis_title='Close Price',
      template='plotly_white',
      height=600,
      width=1400
  )

  fig.show()

plot_price(data, 'VIC')

In [33]:
plot_price(data, 'GMD')

In [34]:
plot_price(data, 'VNM')

In [35]:
plot_price(data, 'PNJ')

In [36]:
plot_price(data, 'VCB')

In [37]:
plot_price(data, 'ACB')

In [38]:
plot_price(data, 'FPT')

In [41]:
plot_price(data, 'GAS')

In [44]:
plot_price(data, 'HPG')

In [45]:
plot_price(data, 'NLG')

In [81]:
tickers = ['MBB', 'VCB', 'ACB', 'TCB', 'CTG', 'BID', 'TPB']

data = get_data(tickers, start, end)


In [82]:
returns = data.ffill().pct_change().dropna()

# # Step 2: Calculate expected returns and covariance matrix
expected_returns = returns.mean() * 252  # annualized
cov_matrix = returns.cov() * 252         # annualized

In [83]:
custom_bounds = {
    'VCB': (0, 0.7),
    'MBB': (0, 0.7),
    'ACB': (0, 0.7),
    'TCB': (0, 0.7),
    'CTG': (0, 0.7),
    'BID': (0, 0.7),
    'TCB': (0, 0.7)
}

target_returns = np.linspace(-0.05, 0.5, 30)
tickers = list(data.columns)
results = optimize_portfolio(tickers, expected_returns, cov_matrix, target_returns)

In [84]:
results

Unnamed: 0,Return,Volatility,Weights,ACB_weight,BID_weight,CTG_weight,MBB_weight,TCB_weight,TPB_weight,VCB_weight
0,0.139655,0.236743,"[0.04510152605431818, 0.0, 2.6020852139652106e...",0.0451,0.0,0.0,0.0,0.302,0.1,0.5529
1,0.158621,0.231636,"[0.2717709146038973, 0.0, 2.8189256484623115e-...",0.2718,0.0,0.0,0.1363,0.0,0.1,0.4919


In [85]:
plot_portfolios(tickers, expected_returns, cov_matrix, results, custom_portfolio)

## 1 years ago

In [102]:
expected_returns = None
cov_matrix = None
results = None
returns = None
data = None

tickers = list(stock.listing.symbols_by_group('VN30'))


# Step 1: Download historical price data (using 'Close')
# Should be longer than a year, should be few years
# start = '2015-12-31'
# end = '2024-04-10'

data = get_data(tickers, start, end)
returns = data.pct_change().dropna()

# Step 2: Calculate expected returns and covariance matrix
expected_returns = returns.mean() * 252  # annualized
cov_matrix = returns.cov() * 252         # annualized


The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.



In [103]:
# Step 3: Generate efficient frontier
target_returns = np.linspace(-0.05, 0.5, 30)

tickers = list(data.columns)

results = optimize_portfolio(tickers, expected_returns, cov_matrix, target_returns)

In [104]:
plot_portfolios(tickers, expected_returns, cov_matrix, results, custom_portfolio)

In [105]:
plot_cov(cov_matrix)

In [1]:
data['VIC']

NameError: name 'data' is not defined