In [2]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('vn_stock_companies.csv')

# Filter the rows where group_code is 'VNINDEX' or 'HNX'
vnindex_df = df[df['group_code'] != 'UpcomIndex']

# Get the list of tickers
symbollist = vnindex_df['ticker'].tolist()
# Lấy mã cổ phiếu không lấy mã cổ phiếu quỹ
filtered_symbollist = [symbol for symbol in symbollist if len(symbol) <= 4]

In [3]:
from vnstock import *
import numpy as np
financial_data = []  # List to store the financial data objects

for symbol in filtered_symbollist:
    try:
        data = financial_report(symbol=symbol, report_type='IncomeStatement', frequency='Yearly')
        
        # Remove the 'Q5 ' from years
        data.columns = [col.replace('Q5 ', '') for col in data.columns]
        
        # Find the row index where "CHỈ TIÊU" matches one of the three values
        row_index = data[data['CHỈ TIÊU'].str.contains('Lợi nhuận của Cổ đông của Công ty mẹ|Lợi nhuận sau thuế của chủ sở hữu, tập đoàn|Lợi nhuận sau thuế phân bổ cho chủ sở hữu|Lợi nhuận sau thuế')].index[0]
        
        # Use the row index to get the net income data
        net_income = data.loc[row_index].to_dict()
        
        # Remove the first entry in the dictionary
        del net_income['CHỈ TIÊU']
        
        years = list(net_income.keys())[5:]
        compound_rate = {}
        
        for year in years:
            year_int = int(year)
            sum_net_income_5_years = sum(net_income[str(y)] for y in range(year_int - 5, year_int))
            compound_rate[year] = (net_income[year] - net_income[str(year_int - 5)]) / sum_net_income_5_years
            
        # Calculate average 5 year ROE
        df = financial_ratio(symbol, 'yearly', True)
        final_average_roe = 0.0
        if 'roe' in df.columns and len(df) >= 5:
            average_roe = df['roe'].head(5).mean()
            if not np.isnan(average_roe):
                final_average_roe = average_roe
        
        # Get latest Price to Earning ratio
        pe = df.loc[0, "priceToEarning"]
        
        financial_data.append({'ticker': symbol, 'net_income': net_income, 'compound_rate': compound_rate, 'average_5y_roe': final_average_roe, 'pe': pe})
    except Exception as e:
        print(f"Error fetching data for symbol {symbol}: {str(e)}")
        continue


Error fetching data for symbol VTZ: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol GMH: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol HMR: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol PCH: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol NO1: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol CAG: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol AGG: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol ACG: Excel file format cannot be determined, you must specify an engine manually.
Error fetching data for symbol APH: Excel file format cannot be determined, you must specify an 

In [4]:
for entry in financial_data:
    compound_rates = entry['compound_rate']
    latest_years = list(compound_rates.keys())[-5:]
    average_5y_compound_rate = sum(compound_rates[year] for year in latest_years) / 5
    entry['average_5y_compound_rate'] = average_5y_compound_rate

# sort financial_data based on the highest roe and average_5y_compound_rate
financial_data = sorted(financial_data, key=lambda x: (x['average_5y_roe'], x['average_5y_compound_rate']), reverse=True)


In [5]:
import json

# ... your code to populate financial_data ...

# Define the file path to save the data
file_path = 'financial_data.json'

# Write financial_data to the file in JSON format
with open(file_path, 'w') as file:
    json.dump(financial_data, file)

In [7]:
import pandas as pd

# Assuming financial_data is a dictionary
df = pd.DataFrame(financial_data)

# Write the DataFrame to a CSV file
df.to_csv('financial_data.csv', index=False)

In [10]:
df = financial_ratio("SCS", 'yearly', True)
value = df.loc[0, "priceToEarning"]
df

Unnamed: 0,ticker,quarter,year,priceToEarning,priceToBook,valueBeforeEbitda,dividend,roe,roa,daysReceivable,...,loanOnAsset,loanOnDeposit,depositOnEarnAsset,badDebtOnAsset,liquidityOnLiability,payableOnEquity,cancelDebt,ebitdaOnStockChange,bookValuePerShareChange,creditGrowth
0,SCS,5,2022,11.0,4.9,9.0,,0.482,0.435,337,...,,,,,,0.1,,-0.205,0.148,
1,SCS,5,2021,13.7,6.2,14.1,,0.498,0.449,238,...,,,,,,0.1,,0.188,0.223,
2,SCS,5,2020,13.3,6.1,12.2,,0.46,0.429,200,...,,,,,,0.1,,-0.101,-0.453,
3,SCS,5,2019,10.9,5.5,9.0,,0.532,0.491,32,...,,,,,,0.1,,0.123,0.135,
