Ranking the securities sector based on Profit and Health

Profit:
- ROE
- ROA
- NIM

Health:
- Loans-to-Equity
- Debt-to-Equity
- Top % Share
- Coefficient Variation of FVTPL

# 1. Import

### 1.1 Library

In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import pyodbc
import pymssql
import sys

sys.path.append(r"F:\Tùng\Tung\Python\DashBoard\vnd_data")
import get_vnd_data as vnd

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Customize the display of the table
pd.set_option('chained_assignment', None)

### 1.2 Import data

#### 1.2.1 Raw data

In [7]:
# Assign pathlink
path_income_securities = r"F:\Tùng\Tung\Python\BSC_DataRankingStocks\cache\is_securities.csv"
path_bs_securities = r"F:\Tùng\Tung\Python\BSC_DataRankingStocks\cache\bs_securities.csv"

# Import data, it includes Income statement and Balance Sheet
df_is = pd.read_csv(path_income_securities)
df_is.drop(['Unnamed: 0'], axis=1, inplace=True)
df_bs = pd.read_csv(path_bs_securities)
df_bs.drop(['Unnamed: 0'], axis=1, inplace=True)

# Preprocess data
df_is = df_is.loc[df_is['QUARTER'] != 0]
df_bs = df_bs.loc[df_bs['QUARTER'] != 0]
df_is.fillna(0, inplace=True)
df_bs.fillna(0, inplace=True)

# Assign the list of stocks
list_sec = df_is['SYMBOL'].unique()

In [8]:
# Briefly check the lastest data
print(f"There are : {len(df_is.loc[(df_is['YEAR'] == 2023) & (df_is['QUARTER'] == 1)])} stocks in Income Statement")
print(f"There are : {len(df_bs.loc[(df_bs['YEAR'] == 2023) & (df_bs['QUARTER'] == 1)])} stocks in Balance Sheet")

There are : 35 stocks in Income Statement
There are : 35 stocks in Balance Sheet


#### 1.2.2 Provision data 
Due to SQL lacks this field

In [9]:
""" Due to BSC SQL Server does not have the data about provision for losses from mortgage assets,
uncollectible receivables and borrowing expenses in the Income Statement. Therefore, this step is to
implement the data. Specifically, the data is collected from VND's resources."""

provision_for_losses = []

for i in list_sec:
    print(f"Stock: {i}")
    df_i = vnd.get_income_statement(i)
    df_i['fiscalDate'] = pd.to_datetime(df_i['fiscalDate'])
    df_i['Year'] = df_i['fiscalDate'].dt.year
    df_i['Quarter'] = df_i['fiscalDate'].dt.quarter
    df_i = df_i.loc[df_i['itemCode'] == 700053]
    
    provision_for_losses.append(df_i)

print("Finish: Successfully get the data")
provision_for_losses = pd.concat(provision_for_losses)

# Process and Remove unnecessary columns
provision_for_losses.drop([
    'reportType', 'modelType', 'fiscalDate', 'createdDate', 'modifiedDate',
    'itemCode'
],
                          axis=1,
                          inplace=True)
provision_for_losses.rename(columns={
    "code": "Symbol",
    "numericValue": "ProvisionForLosses"
},
                            inplace=True)
provision_for_losses.sort_values(by=['Symbol', 'Year', 'Quarter'],
                                 ascending=[True, True, True],
                                 inplace=True)


Stock: SSI
Stock: BVS
Stock: KLS
Stock: HCM
Stock: SHS
Stock: APS
Stock: TAS
Stock: CLS
Stock: VDS
Stock: SME
Stock: CTS
Stock: SVS
Stock: AGR
Stock: VIG
Stock: WSS
Stock: VIX
Stock: VND
Stock: APG
Stock: AVS
Stock: SBS
Stock: PHS
Stock: ORS
Stock: HBS
Stock: PSI
Stock: GBS
Stock: BSI
Stock: IVS
Stock: TVS
Stock: TVB
Stock: MBS
Stock: HAC
Stock: FTS
Stock: FSC
Stock: VCI
Stock: ART
Stock: DSC
Stock: HFT
Stock: EVS
Stock: BMS
Stock: TCI
Stock: CSI
Stock: VFS
Stock: AAS
Stock: VUA
Stock: ABW
Finish: Successfully get the data


In [12]:
provision_for_losses.loc[provision_for_losses['Symbol'] == 'VND']

Unnamed: 0,Symbol,ProvisionForLosses,Year,Quarter
4613,VND,0.0,2009,4
4529,VND,0.0,2010,1
4445,VND,0.0,2010,2
4361,VND,0.0,2010,3
4277,VND,0.0,2010,4
4193,VND,0.0,2011,1
4109,VND,0.0,2011,2
4025,VND,0.0,2011,3
3941,VND,0.0,2011,4
3857,VND,0.0,2012,1


#### 1.2.3 Add data for WSS
- Manually: Add data for WSS at Quarter: 4, Year: 2022 & Quarter 1 Year 2023

In [24]:
# """ The data lacks WSS from Quarter 4 - Year 2022 up to now. Therefore, this step is to add related data.
# When there's no problem about the data, may be, this step could be removed."""

# provision_for_losses = provision_for_losses.append(
#     [{
#         "Symbol": 'WSS',
#         'ProvisionForLosses': 0,
#         'Year': 2022,
#         'Quarter': 4
#     },
#     {
#         "Symbol": 'WSS',
#         'ProvisionForLosses': 0,
#         'Year': 2023,
#         'Quarter': 1
#     }],
#     ignore_index=True)


#### 1.2.3 Preprocess data

In [25]:
# Merge all the data belonging to the Income Statement
df_is = df_is.merge(provision_for_losses, how="inner", on=['Symbol', 'Year', 'Quarter'])

# Sort all the data by Symbol, Year and Quarter
df_is.sort_values(by=['Symbol', 'Year', 'Quarter'], ascending=[True, True, True], inplace=True)
df_bs.sort_values(by=['Symbol', 'Year', 'Quarter'], ascending=[True, True, True], inplace=True)

In [26]:
# Check data in the Income Statement (df_is) or Balance Sheet (df_bs)
df_bs.tail(3)
df_is.tail(3)

Unnamed: 0,Symbol,Year,Quarter,Sales,IncomeFVTPL,IncomeHTM,IncomeLoansReceivables,IncomeAFS,IncomeDerivatives,RevenueBrokerageServices,...,RevenueAdvisoryServices,RevenueAuctionTrustServices,RevenueCustodyServices,OtherRevenues,FVTPL,Revenues,InterestExpenses,NetIncome,NetIncome2,ProvisionForLosses
381,WSS,2022,3,2400719000.0,57259950.0,1557584000.0,137377094.0,0.0,0.0,357757859.0,...,236904546.0,0.0,53836008.0,0.0,-49577870000.0,648498400.0,0.0,-50129010000.0,-50129010000.0,0.0
389,WSS,2022,4,7953785000.0,3309535000.0,1472664000.0,205915638.0,0.0,0.0,501073806.0,...,103000000.0,0.0,57596277.0,2304000000.0,12452890000.0,661670100.0,0.0,10684820000.0,10684820000.0,7418329000.0
365,WSS,2023,1,7721269000.0,3766240000.0,2603359000.0,199220807.0,0.0,0.0,260902403.0,...,840700000.0,0.0,50846572.0,0.0,-19839050000.0,1152449000.0,0.0,-18276650000.0,-18276650000.0,0.0


## 2. Process data

### 2.1 Profit Rank

In [27]:
# Get the suitable columns from the balance sheet and the income statement
df_profit = pd.merge(df_bs[['Symbol', 'Year', 'Quarter', 'Assets', 'Equity', 'Loans']],
         df_is[[
             'Symbol', 'Year', 'Quarter', 'IncomeLoansReceivables',
             'InterestExpenses', 'ProvisionForLosses', 'NetIncome2'
         ]],
         on=['Symbol', 'Year', 'Quarter'])


#### 2.1.1 Calculate ratios

In [28]:
# Calculate ratios of Individual Stocks
df_profit['Equity_m'] = df_profit.groupby('Symbol')['Equity'].shift(4).to_list()
df_profit['Assets_m'] = df_profit.groupby('Symbol')['Assets'].shift(4).to_list()

df_profit['NetIncome2_ttm'] = df_profit.groupby('Symbol')['NetIncome2'].rolling(4).sum().to_list()

df_profit['Equity_m'] = df_profit[['Equity', 'Equity_m']].mean(axis=1)
df_profit['Assets_m'] = df_profit[['Assets_m', 'Assets_m']].mean(axis=1)

df_profit['ROE_ttm'] = df_profit['NetIncome2_ttm']/df_profit['Equity_m']
df_profit['ROA_ttm'] = df_profit['NetIncome2_ttm']/df_profit['Assets_m']
df_profit['NIM_securities'] = (df_profit['IncomeLoansReceivables'] - df_profit['InterestExpenses'] - df_profit['ProvisionForLosses'])/df_profit['Loans']

In [29]:
# Briefly check the data
df_profit.tail(10)

Unnamed: 0,Symbol,Year,Quarter,Assets,Equity,Loans,IncomeLoansReceivables,InterestExpenses,ProvisionForLosses,NetIncome2,Equity_m,Assets_m,NetIncome2_ttm,ROE_ttm,ROA_ttm,NIM_securities
999,WSS,2020,4,526173000000.0,522566800000.0,16188050000.0,771973890.0,0.0,592411000.0,22202930000.0,517584100000.0,561160900000.0,10377760000.0,0.02005,0.018493,0.011092
1000,WSS,2021,1,504197400000.0,495904800000.0,18064650000.0,779699160.0,0.0,0.0,-26661970000.0,497043800000.0,546837900000.0,-2278042000.0,-0.004583,-0.004166,0.043162
1001,WSS,2021,2,549130200000.0,540825000000.0,16723530000.0,834466650.0,0.0,579532500.0,44599940000.0,518523800000.0,500329700000.0,44282180000.0,0.0854,0.088506,0.015244
1002,WSS,2021,3,508391400000.0,503545600000.0,16940330000.0,836744191.0,0.0,0.0,-37567660000.0,501954700000.0,504333900000.0,2573235000.0,0.005126,0.005102,0.049394
1003,WSS,2021,4,537112300000.0,531923100000.0,27089500000.0,858760415.0,0.0,0.0,28377500000.0,527244900000.0,526173000000.0,8747812000.0,0.016592,0.016625,0.031701
1004,WSS,2022,1,524664600000.0,519397400000.0,6810713000.0,353393903.0,0.0,0.0,-12449350000.0,507651100000.0,504197400000.0,22960430000.0,0.045229,0.045539,0.051888
1005,WSS,2022,2,558317400000.0,553037300000.0,3502602000.0,158945289.0,0.0,-8637119000.0,41107130000.0,546931100000.0,549130200000.0,19467620000.0,0.035594,0.035452,2.511294
1006,WSS,2022,3,506619100000.0,502908300000.0,4747612000.0,137377094.0,0.0,0.0,-50129010000.0,503226900000.0,508391400000.0,6906277000.0,0.013724,0.013585,0.028936
1007,WSS,2022,4,520463900000.0,513593100000.0,5301289000.0,205915638.0,0.0,7418329000.0,10684820000.0,522758100000.0,537112300000.0,-10786410000.0,-0.020634,-0.020082,-1.360502
1008,WSS,2023,1,498907300000.0,494960000000.0,4408187000.0,199220807.0,0.0,0.0,-18276650000.0,507178700000.0,524664600000.0,-16613710000.0,-0.032757,-0.031665,0.045193


In [30]:
# Calculate ratios of the securities sector
df_sector = df_profit.groupby(["Year", "Quarter"]).agg({
    "NetIncome2": "sum",
    "Equity_m": "sum",
    "Assets_m": "sum",
    "IncomeLoansReceivables": "sum",
    "InterestExpenses": "sum",
    "ProvisionForLosses": "sum",
    "Loans": "sum"
}).reset_index()

df_sector['ROE_sector_ttm'] = df_sector['NetIncome2'] / df_sector['Equity_m']
df_sector['ROA_sector_ttm'] = df_sector['NetIncome2'] / df_sector['Assets_m']
df_sector['NIM_sector_securities'] = (
    df_sector['IncomeLoansReceivables'] - df_sector['InterestExpenses'] -
    df_sector['ProvisionForLosses']) / df_sector['Loans']

# Merge data from individual stocks and their sector
df_profit = pd.merge(df_profit,
                     df_sector[[
                         'Year', 'Quarter', 'ROE_sector_ttm', 'ROA_sector_ttm',
                         'NIM_sector_securities']],
                     how='outer',
                     on=['Year', 'Quarter'])

In [31]:
df_profit

Unnamed: 0,Symbol,Year,Quarter,Assets,Equity,Loans,IncomeLoansReceivables,InterestExpenses,ProvisionForLosses,NetIncome2,Equity_m,Assets_m,NetIncome2_ttm,ROE_ttm,ROA_ttm,NIM_securities,ROE_sector_ttm,ROA_sector_ttm,NIM_sector_securities
0,AAS,2017,2,2.994484e+11,2.975447e+11,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,-2.373963e+09,2.975447e+11,,,,,,0.038021,0.023874,0.018470
1,AGR,2017,2,1.829330e+12,1.764758e+12,8.101539e+11,1.960556e+10,3.901370e+08,-5.948486e+08,-1.579073e+08,1.697121e+12,1.643046e+12,6.102549e+10,0.035958,0.037142,0.024452,0.038021,0.023874,0.018470
2,APG,2017,2,1.514391e+11,1.409130e+11,4.602290e+09,2.671460e+09,1.889715e+08,-1.454166e+09,4.720197e+09,1.386270e+11,1.501515e+11,4.559289e+09,0.032889,0.030365,0.855369,0.038021,0.023874,0.018470
3,APS,2017,2,3.897218e+11,3.785342e+11,1.384310e+11,1.631061e+09,0.000000e+00,1.030957e+09,1.128620e+10,3.701326e+11,3.684263e+11,1.455369e+10,0.039320,0.039502,0.004335,0.038021,0.023874,0.018470
4,ART,2017,2,2.152634e+11,1.825072e+11,3.843098e+10,1.233068e+10,0.000000e+00,0.000000e+00,2.942111e+10,1.539758e+11,1.444918e+11,5.760131e+10,0.374093,0.398648,0.320853,0.038021,0.023874,0.018470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1004,VIG,2020,3,2.094797e+11,1.922399e+11,1.286623e+10,4.825037e+07,0.000000e+00,0.000000e+00,-1.115782e+09,2.027308e+11,2.278003e+11,-1.452616e+09,-0.007165,-0.006377,0.003750,0.034145,0.017981,0.007058
1005,VIX,2020,3,1.931665e+12,1.616398e+12,5.355616e+11,1.464401e+10,5.072986e+09,0.000000e+00,1.495084e+11,1.511127e+12,1.886240e+12,2.105413e+11,0.139327,0.111620,0.017871,0.034145,0.017981,0.007058
1006,VND,2020,3,1.341835e+13,3.568174e+12,2.666850e+12,7.495514e+10,8.418405e+10,3.083944e+09,2.481300e+11,3.333416e+12,1.144596e+13,5.906641e+11,0.177195,0.051605,-0.004617,0.034145,0.017981,0.007058
1007,VUA,2020,3,3.489518e+11,3.411913e+11,6.316564e+10,2.108796e+08,0.000000e+00,0.000000e+00,9.441757e+09,3.449976e+11,3.545107e+11,1.159991e+09,0.003362,0.003272,0.003339,0.034145,0.017981,0.007058


#### 2.1.2 Scoring profit criteria
- Rank and score for profit criteria

In [32]:
# Rank
df_profit['roe_score'] = np.where(df_profit['ROE_ttm'] > df_profit['ROE_sector_ttm'], 1, 0)
df_profit['roa_score'] = np.where(df_profit['ROA_ttm'] > df_profit['ROA_sector_ttm'], 1, 0)
df_profit['nim_score'] = np.where(df_profit['NIM_securities'] > df_profit['NIM_sector_securities'], 1, 0)
df_profit['profit_score'] = round((df_profit['roe_score']+df_profit['roa_score']+df_profit['nim_score'])*4/3,2)

rank_profit = []

for _, items in df_profit.iterrows():
    if items['profit_score'] < 1:
        rank_profit.append("D")
    elif items['profit_score'] < 2:
        rank_profit.append("C")
    elif items['profit_score'] < 3:
        rank_profit.append("B")
    else:
        rank_profit.append("A")

df_profit['rank_profit'] = rank_profit
df_profit.tail(5)

Unnamed: 0,Symbol,Year,Quarter,Assets,Equity,Loans,IncomeLoansReceivables,InterestExpenses,ProvisionForLosses,NetIncome2,...,ROA_ttm,NIM_securities,ROE_sector_ttm,ROA_sector_ttm,NIM_sector_securities,roe_score,roa_score,nim_score,profit_score,rank_profit
1004,VIG,2020,3,209479700000.0,192239900000.0,12866230000.0,48250370.0,0.0,0.0,-1115782000.0,...,-0.006377,0.00375,0.034145,0.017981,0.007058,0,0,0,0.0,D
1005,VIX,2020,3,1931665000000.0,1616398000000.0,535561600000.0,14644010000.0,5072986000.0,0.0,149508400000.0,...,0.11162,0.017871,0.034145,0.017981,0.007058,1,1,1,4.0,A
1006,VND,2020,3,13418350000000.0,3568174000000.0,2666850000000.0,74955140000.0,84184050000.0,3083944000.0,248130000000.0,...,0.051605,-0.004617,0.034145,0.017981,0.007058,1,1,0,2.67,B
1007,VUA,2020,3,348951800000.0,341191300000.0,63165640000.0,210879600.0,0.0,0.0,9441757000.0,...,0.003272,0.003339,0.034145,0.017981,0.007058,0,0,0,0.0,D
1008,WSS,2020,3,504333900000.0,500363800000.0,15929810000.0,765880200.0,0.0,0.0,4141281000.0,...,0.138997,0.048078,0.034145,0.017981,0.007058,1,1,1,4.0,A


In [None]:
# # Statistics
# print("ROE_Score = 1: " + str(len(df_profit.loc[(df_profit['Year'] == 2023) & (df_profit['Quarter'] == 1) & (df_profit['roe_score'] == 1)])))
# print("ROE_Score = 0: " + str(len(df_profit.loc[(df_profit['Year'] == 2023) & (df_profit['Quarter'] == 1) & (df_profit['roe_score'] == 0)])))
# print("ROA_Score = 1: " + str(len(df_profit.loc[(df_profit['Year'] == 2023) & (df_profit['Quarter'] == 1) & (df_profit['roa_score'] == 1)])))
# print("ROA_Score = 0: " + str(len(df_profit.loc[(df_profit['Year'] == 2023) & (df_profit['Quarter'] == 1) & (df_profit['roa_score'] == 0)])))
# print("nim_score = 1: " + str(len(df_profit.loc[(df_profit['Year'] == 2023) & (df_profit['Quarter'] == 1) & (df_profit['nim_score'] == 1)])))
# print("nim_score = 0: " + str(len(df_profit.loc[(df_profit['Year'] == 2023) & (df_profit['Quarter'] == 1) & (df_profit['nim_score'] == 0)])))

In [None]:
# # Briefly check data given the year and quarter
# df_profit.loc[(df_profit['Year'] == 2023) & (df_profit['Quarter'] == 1)][['Symbol', 'Year', 'Quarter', 'roe_score', 'roa_score', 'nim_score', 'profit_score']]

### 2.2 Health Rank

#### 2.2.1 Merge data

In [33]:
# Get the suitable columns from the balance sheet and the income statement
df_health = pd.merge(
    df_bs[['Symbol', 'Year', 'Quarter', 'Loans', 'Debt', 'Equity']],
    df_is[[
        'Symbol', 'Year', 'Quarter', 'Sales', 'IncomeFVTPL', 'IncomeHTM',
        'IncomeLoansReceivables', 'IncomeAFS', 'IncomeDerivatives',
        'RevenueBrokerageServices', 'RevenueUnderwritingIssuuanceServices',
        'RevenueAdvisoryServices', 'RevenueAuctionTrustServices',
        'RevenueCustodyServices', 'OtherRevenues', 'FVTPL'
    ]],
    on=['Symbol', 'Year', 'Quarter'])


In [34]:
# Briefly check the data
df_health.tail(3)

Unnamed: 0,Symbol,Year,Quarter,Loans,Debt,Equity,Sales,IncomeFVTPL,IncomeHTM,IncomeLoansReceivables,IncomeAFS,IncomeDerivatives,RevenueBrokerageServices,RevenueUnderwritingIssuuanceServices,RevenueAdvisoryServices,RevenueAuctionTrustServices,RevenueCustodyServices,OtherRevenues,FVTPL
1006,WSS,2022,3,4747612000.0,0.0,502908300000.0,2400719000.0,57259950.0,1557584000.0,137377094.0,0.0,0.0,357757859.0,0.0,236904546.0,0.0,53836008.0,0.0,-49577870000.0
1007,WSS,2022,4,5301289000.0,0.0,513593100000.0,7953785000.0,3309535000.0,1472664000.0,205915638.0,0.0,0.0,501073806.0,0.0,103000000.0,0.0,57596277.0,2304000000.0,12452890000.0
1008,WSS,2023,1,4408187000.0,0.0,494960000000.0,7721269000.0,3766240000.0,2603359000.0,199220807.0,0.0,0.0,260902403.0,0.0,840700000.0,0.0,50846572.0,0.0,-19839050000.0


#### 2.2.2 Calculate ratios

- Loans-to-equity

In [35]:
# Calculate Loans-to-equity
df_health['Loans_8Q'] = df_health.groupby('Symbol')['Loans'].rolling(8).sum().to_list()
df_health['Equity_8Q'] = df_health.groupby('Symbol')['Equity'].rolling(8).sum().to_list()
# df_health['Equity_8Q'] = df_health[['Equity', 'Equity_8Q']].mean(axis=1)
df_health['LTE_8Q'] = df_health['Loans_8Q']/df_health['Equity_8Q']
df_health['LTE'] = df_health['Loans']/df_health['Equity']

In [36]:
# Function for scoring based on loans-to-equity
def score_lte(panel_data) -> pd.DataFrame():    
    score_lte = []
    for _, item in panel_data.iterrows():
        if item['LTE'] >= item['LTE_8Q']*1.6:
            score_lte.append(0)
        elif item['LTE'] >= item['LTE_8Q']*1.3:
            score_lte.append(0.5)
        elif item['LTE'] >= item['LTE_8Q']*0.7:
            score_lte.append(1)
        else:
            score_lte.append(0)
    
    panel_data['score_lte'] = score_lte
    
    
    return panel_data

In [37]:
df_health = score_lte(panel_data=df_health)

- Debt-to-Equity

In [38]:
# Calculate Debt-to-equity
df_health['debt_to_equity'] = df_health['Debt']/df_health['Equity']

# Calculate and compare based on median value of the sector
dte_median = df_health.groupby(
    ['Year',
     'Quarter'])['debt_to_equity'].median().reset_index(name='dte_median')
df_health = pd.merge(df_health,
               dte_median[['Year', 'Quarter', 'dte_median']],
               how='outer',
               on=['Year', 'Quarter'])
score_dte = []
for _, item in df_health.iterrows():
    if item['debt_to_equity'] > item['dte_median']:
        score_dte.append(0)
    elif item['debt_to_equity'] == item['dte_median']:
        score_dte.append(0.5)
    else:
        score_dte.append(1)

df_health['score_dte'] = score_dte


- Diversified sales

In [42]:
# Calculate the ratios of each lines which contribute to the sales
for i in range(7, 18):
    print("Calculating: " + df_health.columns[i])
    df_health[f"{df_health.columns[i]}_%"] = df_health[df_health.columns[i]]/df_health[df_health.columns[6]]

Calculating: IncomeFVTPL
Calculating: IncomeHTM
Calculating: IncomeLoansReceivables
Calculating: IncomeAFS
Calculating: IncomeDerivatives
Calculating: RevenueBrokerageServices
Calculating: RevenueUnderwritingIssuuanceServices
Calculating: RevenueAdvisoryServices
Calculating: RevenueAuctionTrustServices
Calculating: RevenueCustodyServices
Calculating: OtherRevenues


In [43]:
dict1 = df_health[[
    'IncomeFVTPL_%', 'IncomeHTM_%', 'IncomeLoansReceivables_%', 'IncomeAFS_%',
    'IncomeDerivatives_%', 'RevenueBrokerageServices_%',
    'RevenueUnderwritingIssuuanceServices_%', 'RevenueAdvisoryServices_%',
    'RevenueAuctionTrustServices_%', 'RevenueCustodyServices_%',
    'OtherRevenues_%'
]].to_dict('records')

In [44]:
def top_share(dictionary: dict, percent_sales: float):
    """ Count the number of top lines which contribute to the given percentage of sales
    ================================================================
    Parameters:
        dictionary: dict
        percent_sales: float
            Defaults equal to 0.8
    """
    df1 = pd.DataFrame.from_dict(data=dictionary, orient='index')
    df1.sort_values([0],ascending=False, inplace=True)
    df1['position_score'] = np.where(df1[0].cumsum()>=percent_sales, 1, 0)
    a = df1.loc[df1['position_score'] == 0].count()['position_score']
    
    return a


def score_share(a: int):
    """ Ranking based on top lines
    ================================================================
    Meanings:
        If there are at least 3 lines contributing to the given percentage of sales. Score = 1
        If there are at least 2 lines contributing to the given percentage of sales. Score = 0.5
        If there are at least 1 lines contributing to the given percentage of sales. Score = 0
    """
    if a>2:
        score = 1
    elif a>1:
        score = 0.5
    else:
        score = 0
    
    return score

In [45]:
# Scoring top_share
for i in range(0, len(dict1)):
    a = top_share(dict1[i], percent_sales=0.8)
    score = score_share(a)
    print(score)
    dict1[i]['score_share'] = score
    
df_health['score_share'] = pd.DataFrame(data=dict1)['score_share']

0
0.5
0
0
0
0
0.5
0.5
0
1
0
0.5
0.5
0
0
0
0.5
0
0.5
0
0
0
0
0.5
1
0
0
0
0.5
0.5
0.5
0
0
0.5
0.5
0
0.5
0.5
0
0
0
0.5
0.5
0
1
0
0
0.5
0
0.5
0.5
0.5
0
0.5
0.5
0
1
0
0.5
1
0
0
0
0.5
0.5
0
0
0
0.5
0
0
0.5
0
0
0
0
0.5
0.5
0
1
0
0
0.5
0.5
1
0.5
0.5
0
0.5
0.5
0.5
1
0
0.5
1
0
0
0
0.5
0.5
0.5
0
0
0.5
0
0
0.5
0.5
0.5
0
0
0.5
0.5
0
0.5
0
0.5
0.5
0
0.5
0.5
0.5
0
0
0.5
0
0
0
0
0.5
1
0
0
0
0
0.5
1
0
0
0.5
0
0.5
0
0
0.5
0
0
0.5
0.5
0
0.5
0
0.5
0
0.5
0
0.5
0.5
0
0.5
0
0
0.5
0
0
1
0
0
0
0.5
0
0.5
0
0
1
0
0.5
0
0.5
0
0
0.5
0
0.5
0.5
0
1
0
0.5
0.5
1
0
0.5
0.5
0
1
0
0
0.5
0
0.5
1
0
0
0.5
0.5
0
0.5
0
0
0.5
0
0.5
0
0.5
0
0
0
0
0.5
0.5
0.5
0
0
0
0
0
0
0.5
0.5
0
1
0
0
1
0
0.5
1
0
0
0
0.5
0
0.5
0
0
1
0
0
0
1
0
0
0.5
0.5
0.5
0.5
0.5
0
0
0
0
0.5
0.5
0.5
0
0
1
0
0
1
0
0.5
1
0.5
0
0
0.5
0
0.5
0
0
1
0
0
0
0
0
0
0
0
0
0.5
0.5
0.5
0
0
0
0
0
0.5
0
0
1
0
0
1
0
0
1
0.5
0
0.5
0.5
0
0.5
0
0
1
0
0.5
0
0.5
0
0
0.5
0
0
0.5
0
0
0
0
0.5
0
0.5
0.5
0
1
0
0.5
1
0
0
0.5
0
0.5
0
0.5
0.5
1
0
0
1
0
0
0
0.5
0.5
0
0
0
0.5
0.5
0.5
0.5
0


- Coefficient Variation: FVTPL

In [48]:
# Calculate the coefficient variation the income from financial assets recognized through profit/loss
def coef_variation_fvtpl(panel_data, window=12) -> pd.DataFrame():
    """ Calculate the coefficient variation the income from financial assets recognized through profit/loss (FVTPL)
    ================================================================
    Parameters:
        panel_data: pd.DataFrame()
        window: int
            The number of period to calculate coefficient variation for FVTPL
    """
    panel_data['FVTPL_m'] = panel_data.groupby('Symbol')['FVTPL'].rolling(window=window).mean().to_list()
    panel_data['FVTPL_std'] = panel_data.groupby('Symbol')['FVTPL'].rolling(window=window).std().to_list()
    panel_data[f'coef_var_{window}q'] = panel_data['FVTPL_m']/panel_data['FVTPL_std']
    
    # del panel_data['FVTPL_m']
    # del panel_data['FVTPL_std']
    
    return panel_data

In [49]:
# Score based on coeffiecient variation of FVTPL
def score_coef(panel_data) -> pd.DataFrame():
    """ Score based on coeffiecient variation of FVTPL
    ================================================================
    Parameters:
        panel_data: pd.DataFrame()
    
    """
    coef_var_12_med = panel_data.groupby([
        'Year', 'Quarter'
    ])['coef_var_12q'].median().reset_index(name='coef_var_12_med')
    panel_data = pd.merge(panel_data, coef_var_12_med, how='outer', on=['Year', 'Quarter'])
    
    score_coef_var = []
    for _, item in panel_data.iterrows():
        if item['coef_var_12q'] > item['coef_var_12_med']:
            score_coef_var.append(0)
        elif item['coef_var_12q'] == item['coef_var_12_med']:
            score_coef_var.append(0.5)
        else:
            score_coef_var.append(1)
    panel_data['score_coef'] = score_coef_var
    
    # del panel_data['coef_var_12_med']
    
    
    return panel_data

In [50]:
df_health = coef_variation_fvtpl(panel_data=df_health, window=12)
df_health = score_coef(panel_data=df_health)

#### 2.2.3 Scoring health criteria

In [51]:
df_health['health_score'] = (df_health['score_share'] +
                             df_health['score_lte'] + 
                             df_health['score_coef'] +
                             df_health['score_dte'])

rank_health = []

for _, items in df_health.iterrows():
    if items['health_score'] > 3:
        rank_health.append("Safe +")
    elif items['health_score'] > 2:
        rank_health.append("Safe")
    elif items['health_score'] > 1:
        rank_health.append("Warning")
    else:
        rank_health.append("Danger")

df_health['rank_health'] = rank_health

### 2.3 Merge data

#### 2.3.1 Merge data
- New profit rank: `df_proft`
- New health rank: `df_health`

In [None]:
# # Export raw ratios
# pd.merge(
#     df_profit,
#     df_health,
#     how='inner',
#     on=['Symbol', 'Year', 'Quarter', 'Equity', 'Loans']
# ).to_excel(r"E:\Tung\Python\BSC_DataRankingStocks\data_raw_ratios\securities_ratios.xlsx")

In [79]:
conn = pyodbc.connect(
    r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=V:\iBroker\stock_database.accdb;'
)
df_isr = pd.read_sql("SELECT * FROM income_statement_securities", con=conn)
conn.commit()

col_df_isr = "],[".join(i for i in df_isr.columns.to_list())

list_col_isr = [
    'Symbol', 'Year', 'Quarter', 'IncomeFVTPL_%', 'IncomeHTM_%',
    'IncomeLoansReceivables_%', 'IncomeAFS_%', 'IncomeDerivatives_%',
    'RevenueBrokerageServices_%', 'RevenueUnderwritingIssuuanceServices_%',
    'RevenueAdvisoryServices_%', 'RevenueAuctionTrustServices_%',
    'RevenueCustodyServices_%', 'OtherRevenues_%', 'FVTPL_m', 'FVTPL_std'
]

conn = pyodbc.connect(
    r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=V:\iBroker\stock_database.accdb;'
)
cursor = conn.cursor()
for _, row in df_health[list_col_isr].astype(str).iterrows():
    sql = "INSERT INTO income_statement_securities (["+col_df_isr+"]) VALUES "+ str(tuple(row))
    cursor.execute(sql)
    conn.commit()
    
print("Successfully saved data")


In [90]:
sfri = pd.merge(
    df_profit[['Symbol', 'Year', 'Quarter', 'NIM_securities']],
    df_health[['Symbol', 'Year', 'Quarter', 'LTE_8Q', 'LTE', 'debt_to_equity', 'coef_var_12q']],
    how='inner',
    on=['Symbol', 'Year', 'Quarter']
)

In [94]:
conn = pyodbc.connect(
    r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=V:\iBroker\stock_database.accdb;'
)
df_sfri = pd.read_sql("SELECT * FROM stock_financial_raio_securities", con=conn)

col_df_sfri = "],[".join(i for i in df_sfri.columns.to_list())

cursor = conn.cursor()
for _, row in sfri.astype(str).iterrows():
    sql = "INSERT INTO stock_financial_raio_securities (["+col_df_sfri+"]) VALUES "+ str(tuple(row))
    cursor.execute(sql)
    conn.commit()
    
print("Successfully saved data")

In [99]:
df_final = pd.merge(
    df_profit[['Symbol', 'Year', 'Quarter', 
               'roe_score', 'roa_score', 'nim_score', 
               'profit_score', 'rank_profit']], 
    df_health[['Symbol', 'Year', 'Quarter', 
               'score_lte', 'score_dte', 'score_share', 'score_coef', 
               'health_score', 'rank_health']], 
    how='inner', 
    on=['Symbol', 'Year', 'Quarter']
)

In [100]:
df_final.sort_values(by=['Symbol', 'Year', 'Quarter'], ascending=[True, True, True], inplace=True)

In [None]:
# # Calculate and compare based on average value of the sector
# dte_sector = df1.groupby(['Year', 'Quarter']).agg({
#     'Debt': "sum",
#     "Equity": "sum"
# }).reset_index()
# dte_sector['dte_sector'] = dte_sector['Debt'] / dte_sector['Equity']
# df1 = pd.merge(df1,
#                dte_sector[['Year', 'Quarter', 'dte_sector']],
#                how='outer',
#                on=['Year', 'Quarter'])
# score_dte = []
# for _, item in df1.iterrows():
#     if item['debt_to_equity'] > item['dte_sector']:
#         score_dte.append(0)
#     elif item['debt_to_equity'] == item['dte_sector']:
#         score_dte.append(0.5)
#     else:
#         score_dte.append(1)

# df1['score_dte'] = score_dte
# df1

#### 2.3.2 Import Raw data 
To get current growth and valuation score

In [101]:
# Get raw final result
conn = pyodbc.connect(
    r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=V:\iBroker\stock_database.accdb;'
)
df_raw = pd.read_sql('select * from ptsp_stock_fundamental_score', conn)

conn.close()

df_raw = df_raw.loc[df_raw['Symbol'].isin(list_sec)]
df_raw[['Year', 'Quarter']] = df_raw[['Year', 'Quarter']].astype(int)

#### 2.3.3 Merge all data
- New profit rank
- New health rank
- Current growth rank
- Currnet valuation rank

In [102]:
df_final = pd.merge(df_final,
                    df_raw[[
                        'Symbol', 'Year', 'Quarter', 'score_EPS_above_average',
                        'score_EPS_growth', 'score_EPS_above_sector',
                        'score_EPS_above_group', 'score_growth', 'rank_growth',
                        'score_PE_5Y', 'score_PB_5Y', 'score_PE_sector',
                        'score_PB_sector', 'score_valuation', 'rank_valuation',
                        'score_final', 'rank_final', 'Update'
                    ]],
                    how='inner',
                    on=['Symbol', 'Year', 'Quarter'])

In [103]:
# Change type of data to calculate final score
list_col = [
    'roe_score',
    'roa_score',
    'nim_score',
    'profit_score',
    'score_lte',
    'score_dte',
    'score_share',
    'score_coef',
    'health_score',
    'score_EPS_above_average',
    'score_EPS_growth',
    'score_EPS_above_sector',
    'score_EPS_above_group',
    'score_growth',
    'score_PE_5Y',
    'score_PB_5Y',
    'score_PE_sector',
    'score_PB_sector',
    'score_valuation',
]

for i in list_col:
    df_final[i] = df_final[i].astype(float)

In [104]:
df_final['score_final'] = round(
    np.mean(df_final[[
        'profit_score', 'health_score', 'score_growth', 'score_valuation'
    ]], axis=1), 2)

for _, items in df_final.iterrows():
    if items['score_final'] < 1:
        items['rank_final'] = "D"
    elif items['score_final'] < 2:
        items['rank_final'] = "C"
    elif items['score_final'] < 3:
        items['rank_final'] = "B" 
    else:
        items['rank_final'] = "A"

## 3. Save to DB Access

### 3.1 Get data fields in new table
- `ptsp_stock_fundamental_score_financial`

In [105]:
# "],[".join(i for i in df_db.columns.to_list())
col_df_db = '[Symbol],[Year],[Quarter],[score_roe_sector],[score_roa_sector],[score_nim_sector],[score_profit],[rank_profit],[score_lte],[score_dte],[score_diversified_sale],[score_coef_variation],[score_health],[rank_health],[score_EPS_above_average],[score_EPS_growth],[score_EPS_above_sector],[score_EPS_above_group],[score_growth],[rank_growth],[score_PE_5Y],[score_PB_5Y],[score_PE_sector],[score_PB_sector],[score_valuation],[rank_valuation],[score_final],[rank_final],[update]'

### 3.2 Save data to new table

In [106]:
conn = pyodbc.connect(
    r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=V:\iBroker\stock_database.accdb;'
)
cursor = conn.cursor()
for _, row in df_final.astype(str).iterrows():
    sql = "INSERT INTO ptsp_stock_fundamental_score_financial ("+col_df_db+") VALUES "+ str(tuple(row))
    cursor.execute(sql)
    conn.commit()
    
print("Successfully saved data")


Successfully saved data
