In [17]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('vn_stock_companies.csv')

# Filter the rows where group_code is 'VNINDEX' or 'HNX'
vnindex_df = df[df['group_code'] != 'UpcomIndex']

# Get the list of tickers
symbollist = vnindex_df['ticker'].tolist()
# Lấy mã cổ phiếu không lấy mã cổ phiếu quỹ
filtered_symbollist = [symbol for symbol in symbollist if len(symbol) <= 4]

In [18]:
from vnstock import *
import numpy as np
financial_data = []  # List to store the financial data objects

for symbol in filtered_symbollist:
    try:
        data = financial_report(symbol=symbol, report_type='IncomeStatement', frequency='Yearly')
        
        # Remove the 'Q5 ' from years
        data.columns = [col.replace('Q5 ', '') for col in data.columns]
        
        # Find the row index where "CHỈ TIÊU" matches one of the three values
        row_index = data[data['CHỈ TIÊU'].str.contains('Lợi nhuận của Cổ đông của Công ty mẹ|Lợi nhuận sau thuế của chủ sở hữu, tập đoàn|Lợi nhuận sau thuế phân bổ cho chủ sở hữu|Lợi nhuận sau thuế')].index[0]
        
        # Use the row index to get the net income data
        net_income = data.loc[row_index].to_dict()
        
        # Remove the first entry in the dictionary
        del net_income['CHỈ TIÊU']
        
        years = list(net_income.keys())[5:]
        compound_rate = {}
        
        for year in years:
            year_int = int(year)
            sum_net_income_5_years = sum(net_income[str(y)] for y in range(year_int - 5, year_int))
            compound_rate[year] = (net_income[year] - net_income[str(year_int - 5)]) / sum_net_income_5_years
            
        # Calculate average 5 year ROE
        df = financial_ratio(symbol, 'yearly', True)
        final_average_roe = 0.0
        if 'roe' in df.columns and len(df) >= 5:
            average_roe = df['roe'].head(5).mean()
            if not np.isnan(average_roe):
                final_average_roe = average_roe
        
        # Get latest Price to Earning ratio
        pe = df.loc[0, "priceToEarning"]
        
        financial_data.append({'ticker': symbol, 'net_income': net_income, 'compound_rate': compound_rate, 'average_5y_roe': final_average_roe, 'pe': pe})
    except Exception as e:
        print(f"Error fetching data for symbol {symbol}: {str(e)}")
        continue


Error fetching data for symbol VTZ: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol GMH: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol HMR: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol PCH: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol NO1: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol CAG: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol AGG: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol ACG: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol APH: Excel file format cannot be determined, you must specify an 

In [19]:
from vnstock import *
import numpy as np
financial_data = []  # List to store the financial data objects

for symbol in filtered_symbollist:
    net_income = {}
    compound_rate = {}
    try:
        data = financial_report(symbol=symbol, report_type='IncomeStatement', frequency='Yearly')
        
        # Remove the 'Q5 ' from years
        data.columns = [col.replace('Q5 ', '') for col in data.columns]
        
        # Find the row index where "CHỈ TIÊU" matches one of the three values
        row_index = data[data['CHỈ TIÊU'].str.contains('Lợi nhuận của Cổ đông của Công ty mẹ|Lợi nhuận sau thuế của chủ sở hữu, tập đoàn|Lợi nhuận sau thuế phân bổ cho chủ sở hữu|Lợi nhuận sau thuế')].index[0]
        
        # Use the row index to get the net income data
        net_income = data.loc[row_index].to_dict()
        
        # Remove the first entry in the dictionary
        del net_income['CHỈ TIÊU']
        
        years = list(net_income.keys())[5:]
        compound_rate = {}
        
        for year in years:
            year_int = int(year)
            sum_net_income_5_years = sum(net_income[str(y)] for y in range(year_int - 5, year_int))
            compound_rate[year] = (net_income[year] - net_income[str(year_int - 5)]) / sum_net_income_5_years
    except Exception as e:
        print(f"Error fetching data for symbol {symbol} in financial_report: {str(e)}")

    final_average_roe = 0.0
    pe = None
    try:
        # Calculate average 5 year ROE
        df = financial_ratio(symbol, 'yearly', True)
        
        if 'roe' in df.columns and len(df) >= 5:
            average_roe = df['roe'].head(5).mean()
            if not np.isnan(average_roe):
                final_average_roe = average_roe
        
        # Get latest Price to Earning ratio
        pe = df.loc[0, "priceToEarning"]
    except Exception as e:
        print(f"Error fetching data for symbol {symbol} in financial_ratio: {str(e)}")
        
    financial_data.append({'ticker': symbol, 'net_income': net_income, 'compound_rate': compound_rate, 'average_5y_roe': final_average_roe, 'pe': pe})


Error fetching data for symbol VTZ in financial_report: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol VTZ in financial_ratio: 'priceToEarning'
Error fetching data for symbol GMH in financial_report: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol GMH in financial_ratio: 'priceToEarning'
Error fetching data for symbol HMR in financial_report: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol PCH in financial_report: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol PCH in financial_ratio: 'priceToEarning'
Error fetching data for symbol NO1 in financial_report: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol NO1 in financial_ratio: 'priceToEarning'
Error fetching data for symbol CAG in financial

In [20]:
for entry in financial_data:
    compound_rates = entry['compound_rate']
    latest_years = list(compound_rates.keys())[-5:]
    average_5y_compound_rate = sum(compound_rates[year] for year in latest_years) / 5
    entry['average_5y_compound_rate'] = average_5y_compound_rate

# sort financial_data based on the highest roe and average_5y_compound_rate
financial_data = sorted(financial_data, key=lambda x: (x['average_5y_roe'], x['average_5y_compound_rate']), reverse=True)


In [21]:
import json

# ... your code to populate financial_data ...

# Define the file path to save the data
file_path = 'financial_data.json'

# Write financial_data to the file in JSON format
with open(file_path, 'w') as file:
    json.dump(financial_data, file)

In [22]:
import pandas as pd

# Assuming financial_data is a dictionary
df = pd.DataFrame(financial_data)

# Write the DataFrame to a CSV file
df.to_csv('financial_data.csv', index=False)

In [23]:
data = financial_report(symbol="TKG", report_type='IncomeStatement', frequency='Yearly')
data

Unnamed: 0,CHỈ TIÊU,2018,2019,2020,2021,2022
0,Doanh số,116530200000.0,134223700000.0,131865900000.0,154343500000.0,123000800000.0
1,Các khoản giảm trừ,0.0,0.0,-99385930.0,-4388141000.0,0.0
2,Doanh số thuần,116530200000.0,134223700000.0,131766500000.0,149955400000.0,123000800000.0
3,Giá vốn hàng bán,-108829100000.0,-125803500000.0,-119343900000.0,-136816300000.0,-115976600000.0
4,Lãi gộp,7701116000.0,8420174000.0,12422560000.0,13139080000.0,7024197000.0
5,Thu nhập tài chính,2159051.0,756458900.0,158287400.0,47398650.0,377562700.0
6,Chi phí tài chính,-2390387000.0,-2405859000.0,-2535799000.0,-2501399000.0,-2590073000.0
7,Trong đó: Chi phí lãi vay,-2390370000.0,-2402292000.0,-2514192000.0,-2430252000.0,-2504665000.0
8,Lãi/(lỗ) từ công ty liên doanh,0.0,0.0,0.0,0.0,0.0
9,Chi phí bán hàng,-472739800.0,-309885700.0,-2825141000.0,-3218656000.0,-1723653000.0


In [24]:
df = financial_ratio("TKG", 'yearly', True)
#value = df.loc[0, "priceToEarning"]
df