In [1]:
from vnstock import Listing, Quote, Company, Finance
import pandas as pd
from typing import Callable
import warnings
import time

warnings.filterwarnings("ignore")

In [2]:
listing = Listing(source="VCI")

In [3]:
vn30_symbols = listing.symbols_by_group("VN30").to_list()

In [4]:
df_company = listing.symbols_by_industries()
df_company = df_company[df_company["symbol"].isin(vn30_symbols)].reset_index(drop=True)

company_cols = ["symbol", "organ_name", "icb_code1", "icb_code2", "icb_code3", "icb_code4"]
df_company = df_company[company_cols]
df_company.head(5)

Unnamed: 0,symbol,organ_name,icb_code1,icb_code2,icb_code3,icb_code4
0,BID,Ngân hàng Thương mại Cổ phần Đầu tư và Phát tr...,8301,8300,8350,8355
1,DGC,Công ty Cổ phần Tập đoàn Hóa chất Đức Giang,1000,1300,1350,1357
2,BCM,Tổng Công ty Đầu tư và Phát triển Công nghiệp ...,8000,8600,8630,8633
3,FPT,Công ty Cổ phần FPT,9000,9500,9530,9537
4,HDB,Ngân hàng Thương mại Cổ phần Phát Triển Thành ...,8301,8300,8350,8355


In [5]:
def get_company_objects(symbols: list[str], source: str = "VCI") -> list[Company]:
    company_objects = []
    for s in symbols:
        company_objects.append(Company(symbol=s, source=source))
    return company_objects


def get_company_details(func: Callable[[Company], pd.DataFrame], company_objects: list[Company], **kwargs) -> pd.DataFrame:
    list_df = []
    for c in company_objects:
        df = func(c, **kwargs)
        df["symbol"] = c.symbol
        list_df.append(df)

    df_all = pd.concat(list_df, axis=0, ignore_index=True)
    return df_all

In [6]:
vn30_companies = get_company_objects(vn30_symbols, "VCI")

In [7]:
df_company_details = get_company_details(Company.overview, vn30_companies)[["symbol", "issue_share"]]
df_company_details.head(5)

Unnamed: 0,symbol,issue_share
0,ACB,5136656599
1,BCM,1035000000
2,BID,7021361917
3,CTG,5369991748
4,DGC,379779286


In [8]:
df_company_v2 = df_company.merge(df_company_details, on="symbol", how="inner")
df_company_v2.head(5)

Unnamed: 0,symbol,organ_name,icb_code1,icb_code2,icb_code3,icb_code4,issue_share
0,BID,Ngân hàng Thương mại Cổ phần Đầu tư và Phát tr...,8301,8300,8350,8355,7021361917
1,DGC,Công ty Cổ phần Tập đoàn Hóa chất Đức Giang,1000,1300,1350,1357,379779286
2,BCM,Tổng Công ty Đầu tư và Phát triển Công nghiệp ...,8000,8600,8630,8633,1035000000
3,FPT,Công ty Cổ phần FPT,9000,9500,9530,9537,1481330122
4,HDB,Ngân hàng Thương mại Cổ phần Phát Triển Thành ...,8301,8300,8350,8355,3510142254


In [9]:
df_industry = listing.industries_icb()

industry_cols = ["icb_code", "level", "icb_name", "en_icb_name"]
df_industry = df_industry[industry_cols]
df_industry

Unnamed: 0,icb_code,level,icb_name,en_icb_name
0,0530,3,Sản xuất Dầu khí,Oil & Gas Producers
1,0570,3,"Thiết bị, Dịch vụ và Phân phối Dầu khí","Oil Equipment, Services & Distribution"
2,1350,3,Hóa chất,Chemicals
3,1730,3,Lâm nghiệp và Giấy,Forestry & Paper
4,1750,3,Kim loại,Industrial Metals & Mining
...,...,...,...,...
150,6000,1,Viễn thông,Telecommunications
151,7000,1,Tiện ích Cộng đồng,Utilities
152,8000,1,Tài chính,Financials
153,8301,1,Ngân hàng,Banks


In [10]:
def get_quote_objects(symbols: list[str], source: str = "VCI") -> list[Quote]:
    quote_objects = []
    for s in symbols:
        quote_objects.append(Quote(symbol=s, source=source))
    return quote_objects


def get_ohlcv(func: Callable[[Quote], pd.DataFrame], quote_objects: list[Quote], **kwargs) -> pd.DataFrame:
    list_df = []
    for q in quote_objects:
        df = func(q, **kwargs)
        df["symbol"] = q.symbol
        list_df.append(df)
        
    df_all = pd.concat(list_df, axis=0, ignore_index=True)
    cols = ["symbol", "time", "open", "high", "low", "close", "volume"]
    df_all = df_all[cols].sort_values(by=["time"], ascending=[True]).reset_index(drop=True)

    return df_all 

In [11]:
vn30_quotes = get_quote_objects(symbols=vn30_symbols, source="VCI")

In [12]:
time.sleep(60)

ohlcv_1d_args = {"start": "2022-01-01", "interval": "1D"}

df_ohlcv_1d = get_ohlcv(Quote.history, vn30_quotes, **ohlcv_1d_args)
df_ohlcv_1d

Unnamed: 0,symbol,time,open,high,low,close,volume
0,ACB,2022-01-04,16.32,16.32,16.06,16.13,4555000
1,CTG,2022-01-04,30.83,31.28,30.61,31.10,12031300
2,DGC,2022-01-04,65.16,65.57,64.79,64.87,643100
3,FPT,2022-01-04,47.90,48.31,47.85,47.95,1935000
4,GAS,2022-01-04,69.04,72.60,69.04,72.60,1730300
...,...,...,...,...,...,...,...
27265,VIB,2025-08-27,22.75,23.30,22.35,22.35,21032900
27266,VIC,2025-08-27,133.20,135.60,132.00,132.00,2351500
27267,VJC,2025-08-27,144.50,145.10,141.80,144.10,2022600
27268,MWG,2025-08-27,75.00,76.60,74.60,75.00,12565000


In [13]:
time.sleep(60)

ohlcv_1m_args = {"start": "2022-01-01", "interval": "1m"}

df_ohlcv_1m = get_ohlcv(Quote.history, vn30_quotes, **ohlcv_1m_args)
df_ohlcv_1m

Unnamed: 0,symbol,time,open,high,low,close,volume
0,ACB,2023-09-11 09:15:00,16.00,16.04,16.00,16.04,65100
1,VJC,2023-09-11 09:15:00,101.50,101.50,101.50,101.50,33700
2,VIC,2023-09-11 09:15:00,60.10,60.10,59.50,59.90,511100
3,BCM,2023-09-11 09:15:00,70.93,71.03,70.74,70.93,11300
4,VIB,2023-09-11 09:15:00,14.16,14.16,14.16,14.16,132800
...,...,...,...,...,...,...,...
3090797,PLX,2025-08-27 14:45:00,36.40,36.40,36.40,36.40,44000
3090798,SAB,2025-08-27 14:45:00,46.00,46.00,46.00,46.00,78100
3090799,SHB,2025-08-27 14:45:00,17.65,17.65,17.65,17.65,2869100
3090800,SSB,2025-08-27 14:45:00,21.40,21.40,21.40,21.40,185800


In [14]:
def get_finance_objects(symbols: list[str], source: str = "VCI") -> list[Finance]:
    finance_objects = []
    for s in symbols:
        finance_objects.append(Finance(symbol=s, source=source))
    return finance_objects


def get_finance_ratios(func: Callable[[Finance], pd.DataFrame], finance_objects: list[Finance], **kwargs) -> pd.DataFrame:
    list_df = []
    for f in finance_objects:
        df = func(f, **kwargs)
        list_df.append(df)
        
    df_all = pd.concat(list_df, axis=0, ignore_index=True)
    if ("Meta", "yearReport") in df_all.columns:
        df_all = df_all[df_all[("Meta", "yearReport")] >= 2020]
    df_all = df_all.sort_values(by=("Meta", "yearReport"), ascending=True).reset_index(drop=True)
    return df_all

In [15]:
vn30_finances = get_finance_objects(vn30_symbols, source="VCI")

In [16]:
time.sleep(90)

finance_args = {"period": "year", "lang": "en"}

df_ratio = get_finance_ratios(Finance.ratio, vn30_finances, **finance_args)
df_ratio

Unnamed: 0_level_0,Meta,Meta,Meta,Chỉ tiêu cơ cấu nguồn vốn,Chỉ tiêu cơ cấu nguồn vốn,Chỉ tiêu khả năng sinh lợi,Chỉ tiêu khả năng sinh lợi,Chỉ tiêu khả năng sinh lợi,Chỉ tiêu khả năng sinh lợi,Chỉ tiêu thanh khoản,...,Chỉ tiêu khả năng sinh lợi,Chỉ tiêu khả năng sinh lợi,Chỉ tiêu khả năng sinh lợi,Chỉ tiêu khả năng sinh lợi,Chỉ tiêu khả năng sinh lợi,Chỉ tiêu thanh khoản,Chỉ tiêu thanh khoản,Chỉ tiêu thanh khoản,Chỉ tiêu thanh khoản,Chỉ tiêu định giá
Unnamed: 0_level_1,ticker,yearReport,lengthReport,Fixed Asset-To-Equity,Owners' Equity/Charter Capital,Net Profit Margin (%),ROE (%),ROA (%),Dividend yield (%),Financial Leverage,...,EBIT Margin (%),Gross Profit Margin (%),ROIC (%),EBITDA (Bn. VND),EBIT (Bn. VND),Current Ratio,Cash Ratio,Quick Ratio,Interest Coverage,EV/EBITDA
0,ACB,2020,5,0.106712,0.690102,0.526866,0.243075,0.018557,0.000000,12.540286,...,,,,,,,,,,
1,BCM,2020,5,0.113407,1.601698,0.322476,0.120564,0.043164,0.012500,2.962832,...,0.293493,0.501210,0.059464,2.145587e+12,1.909650e+12,1.390992,0.082767,0.258938,-3.278584,36.986021
2,BID,2020,5,0.130855,1.134347,0.195454,0.091845,0.004805,0.005391,19.042690,...,,,,,,,,,,
3,CTG,2020,5,0.126698,1.591049,0.385620,0.169043,0.010656,0.023599,15.701336,...,,,,,,,,,,
4,DGC,2020,5,0.498955,1.070999,0.145382,0.241169,0.171103,0.009381,1.444683,...,0.158189,0.237183,0.189123,1.254348e+12,9.865402e+11,1.898409,0.156097,0.625280,-49.769735,19.655223
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,FPT,2024,5,0.414698,2.411855,0.125011,0.286909,0.118787,0.020000,2.015252,...,0.167200,0.377069,0.207368,1.304365e+13,1.050835e+13,1.307145,0.267407,0.594123,-19.049307,11.709826
146,VJC,2024,5,0.827529,2.893574,0.019476,0.086807,0.015068,0.000000,5.801561,...,0.043871,0.099289,0.053004,3.773966e+12,3.160673e+12,1.151348,0.165610,0.999644,-0.984035,30.722464
147,VNM,2024,5,0.346946,1.730870,0.152022,0.293621,0.174380,0.067002,1.521768,...,0.168406,0.414197,0.228934,1.249971e+13,1.040455e+13,2.034376,0.120653,0.458351,-37.235641,10.545662
148,VPB,2024,5,0.013742,1.856273,0.321489,0.111379,0.018361,0.014577,6.272932,...,,,,,,,,,,


In [17]:
df_ratio.columns = [col[1] if col[1] != "" else col[0] for col in df_ratio.columns]
df_ratio.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 37 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   ticker                           150 non-null    object 
 1   yearReport                       150 non-null    int64  
 2   lengthReport                     150 non-null    int64  
 3   Fixed Asset-To-Equity            150 non-null    float64
 4   Owners' Equity/Charter Capital   150 non-null    float64
 5   Net Profit Margin (%)            150 non-null    float64
 6   ROE (%)                          150 non-null    float64
 7   ROA (%)                          150 non-null    float64
 8   Dividend yield (%)               145 non-null    float64
 9   Financial Leverage               150 non-null    float64
 10  Market Capital (Bn. VND)         150 non-null    float64
 11  Outstanding Share (Mil. Shares)  150 non-null    float64
 12  P/E                   

In [24]:
df_ratio.columns

Index(['ticker', 'yearReport', 'lengthReport', 'Fixed Asset-To-Equity',
       'Owners' Equity/Charter Capital', 'Net Profit Margin (%)', 'ROE (%)',
       'ROA (%)', 'Dividend yield (%)', 'Financial Leverage',
       'Market Capital (Bn. VND)', 'Outstanding Share (Mil. Shares)', 'P/E',
       'P/B', 'P/S', 'P/Cash Flow', 'EPS (VND)', 'BVPS (VND)',
       '(ST+LT borrowings)/Equity', 'Debt/Equity', 'Asset Turnover',
       'Fixed Asset Turnover', 'Days Sales Outstanding',
       'Days Inventory Outstanding', 'Days Payable Outstanding', 'Cash Cycle',
       'Inventory Turnover', 'EBIT Margin (%)', 'Gross Profit Margin (%)',
       'ROIC (%)', 'EBITDA (Bn. VND)', 'EBIT (Bn. VND)', 'Current Ratio',
       'Cash Ratio', 'Quick Ratio', 'Interest Coverage', 'EV/EBITDA'],
      dtype='object')

In [23]:
df_ratio[df_ratio.isnull().any(axis=1)]

Unnamed: 0,ticker,yearReport,lengthReport,Fixed Asset-To-Equity,Owners' Equity/Charter Capital,Net Profit Margin (%),ROE (%),ROA (%),Dividend yield (%),Financial Leverage,...,EBIT Margin (%),Gross Profit Margin (%),ROIC (%),EBITDA (Bn. VND),EBIT (Bn. VND),Current Ratio,Cash Ratio,Quick Ratio,Interest Coverage,EV/EBITDA
0,ACB,2020,5,0.106712,0.690102,0.526866,0.243075,0.018557,0.000000,12.540286,...,,,,,,,,,,
2,BID,2020,5,0.130855,1.134347,0.195454,0.091845,0.004805,0.005391,19.042690,...,,,,,,,,,,
3,CTG,2020,5,0.126698,1.591049,0.385620,0.169043,0.010656,0.023599,15.701336,...,,,,,,,,,,
9,HDB,2020,5,0.042885,0.703791,0.357123,0.206148,0.016942,0.000000,12.918024,...,,,,,,,,,,
10,MBB,2020,5,0.086047,0.820997,0.407473,0.191277,0.018988,0.000000,9.879983,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,SSB,2024,5,0.039055,1.230329,0.483226,0.147505,0.016275,,9.304922,...,,,,,,,,,,
140,SHB,2024,5,0.091756,1.428208,0.439049,0.172364,0.013530,0.028736,12.872606,...,,,,,,,,,,
141,LPB,2024,5,0.063034,1.450760,0.631484,0.251005,0.021815,0.054945,11.729357,...,,,,,,,,,,
142,MBB,2024,5,0.046390,1.918295,0.550001,0.214728,0.022135,0.010889,9.642962,...,,,,,,,,,,


In [19]:
df_company_v2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   symbol       30 non-null     object
 1   organ_name   30 non-null     object
 2   icb_code1    30 non-null     object
 3   icb_code2    30 non-null     object
 4   icb_code3    30 non-null     object
 5   icb_code4    30 non-null     object
 6   issue_share  30 non-null     int64 
dtypes: int64(1), object(6)
memory usage: 1.8+ KB


In [20]:
df_industry.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155 entries, 0 to 154
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   icb_code     155 non-null    object
 1   level        155 non-null    int64 
 2   icb_name     155 non-null    object
 3   en_icb_name  155 non-null    object
dtypes: int64(1), object(3)
memory usage: 5.0+ KB


In [21]:
df_ohlcv_1d.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27270 entries, 0 to 27269
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   symbol  27270 non-null  object        
 1   time    27270 non-null  datetime64[ns]
 2   open    27270 non-null  float64       
 3   high    27270 non-null  float64       
 4   low     27270 non-null  float64       
 5   close   27270 non-null  float64       
 6   volume  27270 non-null  int64         
dtypes: datetime64[ns](1), float64(4), int64(1), object(1)
memory usage: 1.5+ MB


In [22]:
df_ohlcv_1m.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3090802 entries, 0 to 3090801
Data columns (total 7 columns):
 #   Column  Dtype         
---  ------  -----         
 0   symbol  object        
 1   time    datetime64[ns]
 2   open    float64       
 3   high    float64       
 4   low     float64       
 5   close   float64       
 6   volume  int64         
dtypes: datetime64[ns](1), float64(4), int64(1), object(1)
memory usage: 165.1+ MB
