Ranking the securities sector based on Profit and Health

Profit:
- ROE
- ROA
- NIM

Health:
- Loans-to-Equity
- Debt-to-Equity
- Top % Share
- Coefficient Variation of FVTPL

# 1. Import

### 1.1 Library

In [240]:
import pandas as pd
import numpy as np
import datetime as dt
import pyodbc
import pymssql
import sys

sys.path.append(r"F:\Tùng\Tung\Python\DashBoard\vnd_data")
import get_vnd_data as vnd

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Customize the display of the table
pd.set_option('chained_assignment', None)

In [241]:
# Get today
now = dt.datetime.today().strftime("%Y%m%d")
# now = "20230809"


### 1.2 Import data

#### 1.2.1 Raw data

In [242]:
# Assign pathlink
path_income_securities = r"F:\Tùng\Tung\Python\BSC_DataRankingStocks\cache\is_securities.csv"
path_bs_securities = r"F:\Tùng\Tung\Python\BSC_DataRankingStocks\cache\bs_securities.csv"

# Import data, it includes Income statement and Balance Sheet
df_is = pd.read_csv(path_income_securities)
df_is.drop(['Unnamed: 0'], axis=1, inplace=True)
df_bs = pd.read_csv(path_bs_securities)
df_bs.drop(['Unnamed: 0'], axis=1, inplace=True)

# Preprocess data
df_is = df_is.loc[df_is['Quarter'] != 0]
df_bs = df_bs.loc[df_bs['Quarter'] != 0]
df_is.fillna(0, inplace=True)
df_bs.fillna(0, inplace=True)

# Assign the list of stocks
list_sec = df_is['Symbol'].unique()

In [243]:
# # Briefly check the lastest data
# print(f"There are : {len(df_is.loc[(df_is['Year'] == 2023) & (df_is['Quarter'] == 2)])} stocks in Income Statement")
# print(f"There are : {len(df_bs.loc[(df_bs['Year'] == 2023) & (df_bs['Quarter'] == 2)])} stocks in Balance Sheet")

There are : 4 stocks in Income Statement
There are : 4 stocks in Balance Sheet


#### 1.2.2 Provision data 
Due to SQL lacks this field

In [244]:
""" Due to BSC SQL Server does not have the data about provision for losses from mortgage assets,
uncollectible receivables and borrowing expenses in the Income Statement. Therefore, this step is to
implement the data. Specifically, the data is collected from VND's resources."""

provision_for_losses = []

for i in list_sec:
    print(f"Stock: {i}")
    df_i = vnd.get_income_statement(i)
    df_i['fiscalDate'] = pd.to_datetime(df_i['fiscalDate'])
    df_i['Year'] = df_i['fiscalDate'].dt.year
    df_i['Quarter'] = df_i['fiscalDate'].dt.quarter
    df_i = df_i.loc[df_i['itemCode'] == 700053]
    
    provision_for_losses.append(df_i)

print("Finish: Successfully get the data")
provision_for_losses = pd.concat(provision_for_losses)

# Process and Remove unnecessary columns
provision_for_losses.drop(
    [
        'reportType', 'modelType', 'fiscalDate', 
        'createdDate', 'modifiedDate','itemCode'
    ],
    axis=1,
    inplace=True
)
provision_for_losses.rename(
    columns={
        "code": "Symbol",
        "numericValue": "ProvisionForLosses"
    },
    inplace=True
)
provision_for_losses.sort_values(
    by=['Symbol', 'Year', 'Quarter'],
    ascending=[True, True, True],
    inplace=True
)


Stock: AAS
Stock: ABW
Stock: AGR
Stock: APG
Stock: APS
Stock: ART
Stock: AVS
Stock: BMS
Stock: BSI
Stock: BVS
Stock: CLS
Stock: CSI
Stock: CTS
Stock: DSC
Stock: EVS
Stock: FSC
Stock: FTS
Stock: GBS
Stock: HAC
Stock: HBS
Stock: HCM
Stock: HFT
Stock: IVS
Stock: KLS
Stock: MBS
Stock: ORS
Stock: PHS
Stock: PSI
Stock: SBS
Stock: SHS
Stock: SME
Stock: SSI
Stock: SVS
Stock: TAS
Stock: TCI
Stock: TVB
Stock: TVS
Stock: VCI
Stock: VDS
Stock: VFS
Stock: VIG
Stock: VIX
Stock: VND
Stock: VUA
Stock: WSS
Finish: Successfully get the data


In [245]:
# provision_for_losses.loc[provision_for_losses['Symbol'] == 'WSS']

Unnamed: 0,Symbol,ProvisionForLosses,Year,Quarter
4697,WSS,0.0,2009,3
4613,WSS,0.0,2009,4
4529,WSS,0.0,2010,1
4445,WSS,0.0,2010,2
4361,WSS,0.0,2010,3
4277,WSS,0.0,2010,4
4193,WSS,0.0,2011,1
4109,WSS,0.0,2011,2
4025,WSS,0.0,2011,3
3941,WSS,0.0,2011,4


#### 1.2.3 Add data for WSS
- Manually: Add data for WSS at Quarter: 4, Year: 2022 & Quarter 1 Year 2023

In [246]:
# """ The data lacks WSS from Quarter 4 - Year 2022 up to now. Therefore, this step is to add related data.
# When there's no problem about the data, may be, this step could be removed."""

# provision_for_losses = provision_for_losses.append(
#     [{
#         "Symbol": 'WSS',
#         'ProvisionForLosses': 0,
#         'Year': 2022,
#         'Quarter': 4
#     },
#     {
#         "Symbol": 'WSS',
#         'ProvisionForLosses': 0,
#         'Year': 2023,
#         'Quarter': 1
#     }],
#     ignore_index=True)


#### 1.2.3 Preprocess data

In [247]:
# Merge all the data belonging to the Income Statement
df_is = df_is.merge(
    provision_for_losses, 
    how="inner", 
    on=['Symbol', 'Year', 'Quarter']
)

# Sort all the data by Symbol, Year and Quarter for df_is and df_bs
df_is.sort_values(
    by=['Symbol', 'Year', 'Quarter'], 
    ascending=[True, True, True], 
    inplace=True
)
df_bs.sort_values(
    by=['Symbol', 'Year', 'Quarter'], 
    ascending=[True, True, True], 
    inplace=True
)

In [1]:
# Check data in the Income Statement (df_is) or Balance Sheet (df_bs)
# df_bs.tail(3)
# df_is.tail(3)

## 2. Process data

### 2.1 Profit Rank

In [249]:
# Get the suitable columns from the balance sheet and the income statement
df_profit = pd.merge(
    left=df_bs[[
        'Symbol', 'Year', 'Quarter', 'Assets', 'Equity', 'Loans'
    ]],
    right=df_is[[
        'Symbol', 'Year', 'Quarter', 'IncomeLoansReceivables',
        'InterestExpenses', 'ProvisionForLosses', 'NetIncome2'
    ]],
    on=['Symbol', 'Year', 'Quarter']
)


In [250]:
df_profit.head(3)

Unnamed: 0,Symbol,Year,Quarter,Assets,Equity,Loans,IncomeLoansReceivables,InterestExpenses,ProvisionForLosses,NetIncome2
0,AAS,2017,2,299448400000.0,297544700000.0,0.0,0.0,0.0,0.0,-2373963000.0
1,AAS,2017,3,302846200000.0,300286500000.0,0.0,54793564.0,0.0,0.0,2741754000.0
2,AAS,2017,4,307354500000.0,304843600000.0,230354853.0,39111621.0,0.0,0.0,4557140000.0


#### 2.1.1 Calculate ratios

In [251]:
# Calculate ratios of Individual Stocks
df_profit['Equity_m'] = df_profit.groupby('Symbol')['Equity'].shift(4).to_list()
df_profit['Assets_m'] = df_profit.groupby('Symbol')['Assets'].shift(4).to_list()

df_profit['NetIncome2_ttm'] = df_profit.groupby('Symbol')['NetIncome2'].rolling(4).sum().to_list()

df_profit['Equity_m'] = df_profit[['Equity', 'Equity_m']].mean(axis=1)
df_profit['Assets_m'] = df_profit[['Assets_m', 'Assets_m']].mean(axis=1)

df_profit['ROE_ttm'] = df_profit['NetIncome2_ttm']/df_profit['Equity_m']
df_profit['ROA_ttm'] = df_profit['NetIncome2_ttm']/df_profit['Assets_m']
df_profit['nim_securities'] = (df_profit['IncomeLoansReceivables'] - df_profit['InterestExpenses'] - df_profit['ProvisionForLosses'])/df_profit['Loans']

In [4]:
# Briefly check the data
# df_profit.tail(3)

In [253]:
# Calculate ratios of the securities sector
df_sector = df_profit.groupby(["Year", "Quarter"]).agg({
    "NetIncome2": "sum",
    "Equity_m": "sum",
    "Assets_m": "sum",
    "IncomeLoansReceivables": "sum",
    "InterestExpenses": "sum",
    "ProvisionForLosses": "sum",
    "Loans": "sum"
}).reset_index()

df_sector['ROE_sector_ttm'] = df_sector['NetIncome2'] / df_sector['Equity_m']
df_sector['ROA_sector_ttm'] = df_sector['NetIncome2'] / df_sector['Assets_m']
df_sector['NIM_sector_securities'] = (
    df_sector['IncomeLoansReceivables'] - df_sector['InterestExpenses'] -
    df_sector['ProvisionForLosses']) / df_sector['Loans']

# Merge data from individual stocks and their sector
df_profit = pd.merge(
    df_profit,
    df_sector[[
        'Year', 'Quarter', 'ROE_sector_ttm', 'ROA_sector_ttm',
        'NIM_sector_securities'
    ]],
    how='outer',
    on=['Year', 'Quarter']
)

In [2]:
# df_profit.tail(3)

#### 2.1.2 Scoring profit criteria
- Rank and score for profit criteria

In [255]:
# Rank
df_profit['score_roe_sector'] = np.where(df_profit['ROE_ttm'] > df_profit['ROE_sector_ttm'], 1, 0)
df_profit['score_roa_sector'] = np.where(df_profit['ROA_ttm'] > df_profit['ROA_sector_ttm'], 1, 0)
df_profit['score_nim_sector'] = np.where(df_profit['nim_securities'] > df_profit['NIM_sector_securities'], 1, 0)
df_profit['score_profit'] = round((df_profit['score_roe_sector']+df_profit['score_roa_sector']+df_profit['score_nim_sector'])*4/3,2)

rank_profit = []

for _, items in df_profit.iterrows():
    if items['score_profit'] < 1:
        rank_profit.append("D")
    elif items['score_profit'] < 2:
        rank_profit.append("C")
    elif items['score_profit'] < 3:
        rank_profit.append("B")
    else:
        rank_profit.append("A")

df_profit['rank_profit'] = rank_profit

In [256]:
df_profit.tail(5)

Unnamed: 0,Symbol,Year,Quarter,Assets,Equity,Loans,IncomeLoansReceivables,InterestExpenses,ProvisionForLosses,NetIncome2,...,ROA_ttm,nim_securities,ROE_sector_ttm,ROA_sector_ttm,NIM_sector_securities,score_roe_sector,score_roa_sector,score_nim_sector,score_profit,rank_profit
1013,WSS,2020,3,504333900000.0,500363800000.0,15929810000.0,765880200.0,0.0,0.0,4141281000.0,...,0.138997,0.048078,0.034145,0.017981,0.007058,1,1,1,4.0,A
1014,AGR,2023,2,3044723000000.0,2434249000000.0,1371715000000.0,36952090000.0,1419507000.0,3505381000.0,41584840000.0,...,0.062883,0.023348,0.051786,0.034793,0.016892,1,1,1,4.0,A
1015,BMS,2023,2,1085576000000.0,802752400000.0,3758776000.0,12969790.0,1334864000.0,0.0,85350630000.0,...,0.02701,-0.351682,0.051786,0.034793,0.016892,0,0,0,0.0,D
1016,MBS,2023,2,10760720000000.0,4699000000000.0,5326076000000.0,139140200000.0,79527290000.0,0.0,123587800000.0,...,0.039367,0.011193,0.051786,0.034793,0.016892,1,1,0,2.67,B
1017,VIX,2023,2,8552368000000.0,8439075000000.0,1584911000000.0,49660400000.0,0.0,0.0,565562200000.0,...,0.065847,0.031333,0.051786,0.034793,0.016892,1,1,1,4.0,A


In [258]:
# # Briefly check data given the year and quarter
# df_profit.loc[(df_profit['Year'] == 2023) & (df_profit['Quarter'] == 1)][['Symbol', 'Year', 'Quarter', 'roe_score', 'roa_score', 'nim_score', 'profit_score']]

### 2.2 Health Rank

#### 2.2.1 Merge data

In [259]:
# Get the suitable columns from the balance sheet and the income statement
df_health = pd.merge(
    df_bs[['Symbol', 'Year', 'Quarter', 'Loans', 'Debt', 'Equity']],
    df_is[[
        'Symbol', 'Year', 'Quarter', 'Sales', 'IncomeFVTPL', 'IncomeHTM',
        'IncomeLoansReceivables', 'IncomeAFS', 'IncomeDerivatives',
        'RevenueBrokerageServices', 'RevenueUnderwritingIssuuanceServices',
        'RevenueAdvisoryServices', 'RevenueAuctionTrustServices',
        'RevenueCustodyServices', 'OtherRevenues', 'FVTPL'
    ]],
    on=['Symbol', 'Year', 'Quarter']
)


In [3]:
# Briefly check the data
# df_health.tail(3)

#### 2.2.2 Calculate ratios

- Loans-to-equity

In [261]:
# Calculate Loans-to-equity
df_health['Loans_8Q'] = df_health.groupby('Symbol')['Loans'].rolling(8).sum().to_list()
df_health['Equity_8Q'] = df_health.groupby('Symbol')['Equity'].rolling(8).sum().to_list()
# df_health['Equity_8Q'] = df_health[['Equity', 'Equity_8Q']].mean(axis=1)
df_health['lte_8q'] = df_health['Loans_8Q']/df_health['Equity_8Q']
df_health['lte'] = df_health['Loans']/df_health['Equity']

In [262]:
# Function for scoring based on loans-to-equity
def score_lte(panel_data) -> pd.DataFrame():    
    score_lte = []
    for _, item in panel_data.iterrows():
        if item['lte'] >= item['lte_8q']*1.6:
            score_lte.append(0)
        elif item['lte'] >= item['lte_8q']*1.3:
            score_lte.append(0.5)
        elif item['lte'] >= item['lte_8q']*0.7:
            score_lte.append(1)
        else:
            score_lte.append(0)
    
    panel_data['score_lte'] = score_lte
    
    
    return panel_data

In [263]:
df_health = score_lte(panel_data=df_health)

In [264]:
# df_health.head(3)

Unnamed: 0,Symbol,Year,Quarter,Loans,Debt,Equity,Sales,IncomeFVTPL,IncomeHTM,IncomeLoansReceivables,...,RevenueAdvisoryServices,RevenueAuctionTrustServices,RevenueCustodyServices,OtherRevenues,FVTPL,Loans_8Q,Equity_8Q,lte_8q,lte,score_lte
0,AAS,2017,2,0.0,0.0,297544700000.0,282191200.0,0.0,0.0,0.0,...,280000000.0,0.0,2191200.0,0.0,0.0,,,,0.0,0.0
1,AAS,2017,3,0.0,0.0,300286500000.0,5602235000.0,4885710000.0,0.0,54793564.0,...,345000000.0,0.0,20257010.0,0.0,4885710000.0,,,,0.0,0.0
2,AAS,2017,4,230354853.0,0.0,304843600000.0,8269968000.0,5177039000.0,0.0,39111621.0,...,90000000.0,0.0,33834600.0,2557447000.0,5177039000.0,,,,0.000756,0.0


- Debt-to-Equity

In [265]:
# Calculate Debt-to-equity
df_health['debt_to_equity'] = df_health['Debt']/df_health['Equity']

# Calculate and compare based on median value of the sector
dte_median = df_health.groupby(
    ['Year',
     'Quarter'])['debt_to_equity'].median().reset_index(name='dte_median')
df_health = pd.merge(
    df_health,
    dte_median[['Year', 'Quarter', 'dte_median']],
    how='outer',
    on=['Year', 'Quarter']
)
score_dte = []
for _, item in df_health.iterrows():
    if item['debt_to_equity'] > item['dte_median']:
        score_dte.append(0)
    elif item['debt_to_equity'] == item['dte_median']:
        score_dte.append(0.5)
    else:
        score_dte.append(1)

df_health['score_dte'] = score_dte


In [266]:
df_health.columns

Index(['Symbol', 'Year', 'Quarter', 'Loans', 'Debt', 'Equity', 'Sales',
       'IncomeFVTPL', 'IncomeHTM', 'IncomeLoansReceivables', 'IncomeAFS',
       'IncomeDerivatives', 'RevenueBrokerageServices',
       'RevenueUnderwritingIssuuanceServices', 'RevenueAdvisoryServices',
       'RevenueAuctionTrustServices', 'RevenueCustodyServices',
       'OtherRevenues', 'FVTPL', 'Loans_8Q', 'Equity_8Q', 'lte_8q', 'lte',
       'score_lte', 'debt_to_equity', 'dte_median', 'score_dte'],
      dtype='object')

- Diversified sales

In [267]:
# Calculate the ratios of each lines which contribute to the sales
for i in range(7, 18):
    print("Calculating: " + df_health.columns[i])
    df_health[f"{df_health.columns[i]}_%"] = df_health[df_health.columns[i]]/df_health[df_health.columns[6]]

Calculating: IncomeFVTPL
Calculating: IncomeHTM
Calculating: IncomeLoansReceivables
Calculating: IncomeAFS
Calculating: IncomeDerivatives
Calculating: RevenueBrokerageServices
Calculating: RevenueUnderwritingIssuuanceServices
Calculating: RevenueAdvisoryServices
Calculating: RevenueAuctionTrustServices
Calculating: RevenueCustodyServices
Calculating: OtherRevenues


In [268]:
dict1 = df_health[[
    'IncomeFVTPL_%', 'IncomeHTM_%', 'IncomeLoansReceivables_%', 'IncomeAFS_%',
    'IncomeDerivatives_%', 'RevenueBrokerageServices_%',
    'RevenueUnderwritingIssuuanceServices_%', 'RevenueAdvisoryServices_%',
    'RevenueAuctionTrustServices_%', 'RevenueCustodyServices_%',
    'OtherRevenues_%'
]].to_dict('records')

In [269]:
def top_share(dictionary: dict, percent_sales: float):
    """ Count the number of top lines which contribute to the given percentage of sales
    ================================================================
    Parameters:
        dictionary: dict
        percent_sales: float
            Defaults equal to 0.8
    """
    df1 = pd.DataFrame.from_dict(
        data=dictionary, 
        orient='index'
    )
    df1.sort_values(
        by=[0],
        ascending=False, 
        inplace=True
    )
    df1['position_score'] = np.where(df1[0].cumsum()>=percent_sales, 1, 0)
    a = df1.loc[df1['position_score'] == 0].count()['position_score']
    
    return a

In [270]:
def score_share(a: int):
    """ Ranking based on top lines
    ================================================================
    Meanings:
        If there are at least 3 lines contributing to the given percentage of sales. Score = 1
        If there are at least 2 lines contributing to the given percentage of sales. Score = 0.5
        If there are at least 1 lines contributing to the given percentage of sales. Score = 0
    """
    if a>2:
        score = 1
    elif a>1:
        score = 0.5
    else:
        score = 0
    
    return score

In [271]:
# Scoring top_share
for i in range(0, len(dict1)):
    a = top_share(dict1[i], percent_sales=0.8)
    score = score_share(a)
    print(score)
    dict1[i]['score_diversified_sale'] = score
    
df_health['score_diversified_sale'] = pd.DataFrame(data=dict1)['score_diversified_sale']

0
0.5
0
0
0
0
0.5
0.5
0
1
0
0.5
0.5
0
0
0
0.5
0
0.5
0
0
0
0
0.5
1
0
0
0
0.5
0.5
0.5
0
0
0.5
0.5
0
0.5
0.5
0
0
0
0.5
0.5
0
1
0
0
0.5
0
0.5
0.5
0.5
0
0.5
0.5
0
1
0
0.5
1
0
0
0
0.5
0.5
0
0
0
0.5
0
0
0.5
0
0
0
0
0.5
0.5
0
1
0
0
0.5
0.5
1
0.5
0.5
0
0.5
0.5
0.5
1
0
0.5
1
0
0
0
0.5
0.5
0.5
0
0
0.5
0
0
0.5
0.5
0.5
0
0
0.5
0.5
0
0.5
0
0.5
0.5
0
0.5
0.5
0.5
0
0
0.5
0
0
0
0
0.5
1
0
0
0
0
0.5
1
0
0
0.5
0
0.5
0
0
0.5
0
0
0.5
0.5
0
0.5
0
0.5
0
0.5
0
0.5
0.5
0
0.5
0
0
0.5
0
0
1
0
0
0
0.5
0
0.5
0
0
1
0
0.5
0
0.5
0
0
0.5
0
0.5
0.5
0
1
0
0.5
0.5
1
0
0.5
0.5
0
1
0
0
0.5
0
0.5
1
0
0
0.5
0.5
0
0.5
0
0
0.5
0
0.5
0
0.5
0
0
0
0
0.5
0.5
0.5
0
0
0
0
0
0
0.5
0.5
0
1
0
0
1
0
0.5
1
0
0
0
0.5
0
0.5
0
0
1
0
0
0
1
0
0
0.5
0.5
0.5
0.5
0.5
0
0
0
0
0.5
0.5
0.5
0
0
1
0
0
1
0
0.5
1
0.5
0
0
0.5
0
0.5
0
0
1
0
0
0
0
0
0
0
0
0
0.5
0.5
0.5
0
0
0
0
0
0.5
0
0
1
0
0
1
0
0
1
0.5
0
0.5
0.5
0
0.5
0
0
1
0
0.5
0
0.5
0
0
0.5
0
0
0.5
0
0
0
0
0.5
0
0.5
0.5
0
1
0
0.5
1
0
0
0.5
0
0.5
0
0.5
0.5
1
0
0
1
0
0
0
0.5
0.5
0
0
0
0.5
0.5
0.5
0.5
0


- Coefficient Variation: FVTPL

In [272]:
# Calculate the coefficient variation the income from financial assets recognized through profit/loss
def coef_variation_fvtpl(panel_data, window=12) -> pd.DataFrame():
    """ Calculate the coefficient variation the income from financial assets recognized through profit/loss (FVTPL)
    ================================================================
    Parameters:
        panel_data: pd.DataFrame()
        window: int
            The number of period to calculate coefficient variation for FVTPL
    """
    panel_data['FVTPL_m'] = panel_data.groupby('Symbol')['FVTPL'].rolling(window=window).mean().to_list()
    panel_data['FVTPL_std'] = panel_data.groupby('Symbol')['FVTPL'].rolling(window=window).std().to_list()
    panel_data[f'coef_var_{window}q'] = panel_data['FVTPL_m']/panel_data['FVTPL_std']
    
    # del panel_data['FVTPL_m']
    # del panel_data['FVTPL_std']
    
    return panel_data

In [273]:
# Score based on coeffiecient variation of FVTPL
def score_coef(panel_data) -> pd.DataFrame():
    """ Score based on coeffiecient variation of FVTPL
    ================================================================
    Parameters:
        panel_data: pd.DataFrame()
    
    """
    coef_var_12_med = panel_data.groupby([
        'Year', 'Quarter'
    ])['coef_var_12q'].median().reset_index(name='coef_var_12_med')
    panel_data = pd.merge(panel_data, coef_var_12_med, how='outer', on=['Year', 'Quarter'])
    
    score_coef_var = []
    for _, item in panel_data.iterrows():
        if item['coef_var_12q'] > item['coef_var_12_med']:
            score_coef_var.append(0)
        elif item['coef_var_12q'] == item['coef_var_12_med']:
            score_coef_var.append(0.5)
        else:
            score_coef_var.append(1)
    panel_data['score_coef_variation'] = score_coef_var
    
    # del panel_data['coef_var_12_med']
    
    
    return panel_data

In [274]:
df_health = coef_variation_fvtpl(panel_data=df_health, window=12)
df_health = score_coef(panel_data=df_health)

In [275]:
df_health.head(3)

Unnamed: 0,Symbol,Year,Quarter,Loans,Debt,Equity,Sales,IncomeFVTPL,IncomeHTM,IncomeLoansReceivables,...,RevenueAdvisoryServices_%,RevenueAuctionTrustServices_%,RevenueCustodyServices_%,OtherRevenues_%,score_diversified_sale,FVTPL_m,FVTPL_std,coef_var_12q,coef_var_12_med,score_coef_variation
0,AAS,2017,2,0.0,0.0,297544700000.0,282191200.0,0.0,0.0,0.0,...,0.992235,0.0,0.007765,0.0,0.0,,,,0.430842,1.0
1,AGR,2017,2,810153900000.0,55000000000.0,1764758000000.0,47541420000.0,10242950000.0,5155657000.0,19605560000.0,...,0.003972,0.0,0.024012,0.014447,0.5,,,,0.430842,1.0
2,APG,2017,2,4602290000.0,0.0,140913000000.0,8799204000.0,9115721.0,257570000.0,2671460000.0,...,0.631086,0.0,0.003806,0.0,0.0,,,,0.430842,1.0


#### 2.2.3 Scoring health criteria

In [276]:
df_health['score_health'] = (df_health['score_lte'] +
                             df_health['score_dte'] + 
                             df_health['score_diversified_sale'] +
                             df_health['score_coef_variation'])

rank_health = []

for _, items in df_health.iterrows():
    if items['score_health'] > 3:
        rank_health.append("Safe +")
    elif items['score_health'] > 2:
        rank_health.append("Safe")
    elif items['score_health'] > 1:
        rank_health.append("Warning")
    else:
        rank_health.append("Danger")

df_health['rank_health'] = rank_health

### 2.3 Merge data

#### 2.3.1 Merge data
- New profit rank: `df_proft`
- New health rank: `df_health`

In [277]:
# # Export raw ratios
# pd.merge(
#     df_profit,
#     df_health,
#     how='inner',
#     on=['Symbol', 'Year', 'Quarter', 'Equity', 'Loans']
# ).to_excel(r"E:\Tung\Python\BSC_DataRankingStocks\data_raw_ratios\securities_ratios.xlsx")

In [278]:
conn = pyodbc.connect(
    r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=V:\iBroker\stock_database.accdb;'
)
df_isr = pd.read_sql(
    sql="SELECT * FROM income_statement_securities", 
    con=conn
)
conn.commit()

In [279]:
df_health.head(3)

Unnamed: 0,Symbol,Year,Quarter,Loans,Debt,Equity,Sales,IncomeFVTPL,IncomeHTM,IncomeLoansReceivables,...,RevenueCustodyServices_%,OtherRevenues_%,score_diversified_sale,FVTPL_m,FVTPL_std,coef_var_12q,coef_var_12_med,score_coef_variation,score_health,rank_health
0,AAS,2017,2,0.0,0.0,297544700000.0,282191200.0,0.0,0.0,0.0,...,0.007765,0.0,0.0,,,,0.430842,1.0,2.0,Warning
1,AGR,2017,2,810153900000.0,55000000000.0,1764758000000.0,47541420000.0,10242950000.0,5155657000.0,19605560000.0,...,0.024012,0.014447,0.5,,,,0.430842,1.0,1.5,Warning
2,APG,2017,2,4602290000.0,0.0,140913000000.0,8799204000.0,9115721.0,257570000.0,2671460000.0,...,0.003806,0.0,0.0,,,,0.430842,1.0,2.0,Warning


In [280]:
list_col_isr = [
    'Symbol', 'Year', 'Quarter', 'IncomeFVTPL_%', 'IncomeHTM_%',
    'IncomeLoansReceivables_%', 'IncomeAFS_%', 'IncomeDerivatives_%',
    'RevenueBrokerageServices_%', 'RevenueUnderwritingIssuuanceServices_%',
    'RevenueAdvisoryServices_%', 'RevenueAuctionTrustServices_%',
    'RevenueCustodyServices_%', 'OtherRevenues_%', 'FVTPL_m', 'FVTPL_std',
]

In [281]:
df_isr_new = pd.concat(
    [
        df_isr[list_col_isr],
        df_health[list_col_isr].astype(str)
    ]
).drop_duplicates(keep=False)

In [282]:
df_isr_new['Update'] = now

In [283]:
df_isr_new

Unnamed: 0,Symbol,Year,Quarter,IncomeFVTPL_%,IncomeHTM_%,IncomeLoansReceivables_%,IncomeAFS_%,IncomeDerivatives_%,RevenueBrokerageServices_%,RevenueUnderwritingIssuuanceServices_%,RevenueAdvisoryServices_%,RevenueAuctionTrustServices_%,RevenueCustodyServices_%,OtherRevenues_%,FVTPL_m,FVTPL_std,Update


In [284]:
list_col_isr.append('Update')

In [285]:
col_df_isr = "],[".join(i for i in df_isr.columns.to_list())

In [286]:
conn = pyodbc.connect(
    r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=V:\iBroker\stock_database.accdb;'
)
cursor = conn.cursor()
for _, row in df_isr_new[list_col_isr].astype(str).iterrows():
    sql = "INSERT INTO income_statement_securities (["+col_df_isr+"]) VALUES "+ str(tuple(row))
    cursor.execute(sql)
    conn.commit()
    
print("Successfully saved data")


Successfully saved data


In [312]:
# TABLE: stock_financial_ratio_securities
# Create sfri (table) by merging df_profit and df_health
sfri = pd.merge(
    df_profit[[
        'Symbol', 'Year', 'Quarter', 
        'nim_securities'
    ]],
    df_health[[
        'Symbol', 'Year', 'Quarter', 
        'lte_8q', 'lte', 'debt_to_equity', 
        'coef_var_12q'
    ]],
    how='inner',
    on=['Symbol', 'Year', 'Quarter']
)

In [313]:
sfri

Unnamed: 0,Symbol,Year,Quarter,nim_securities,lte_8q,lte,debt_to_equity,coef_var_12q
0,AAS,2017,2,,,0.000000,0.000000,
1,AGR,2017,2,0.024452,,0.459074,0.031166,
2,APG,2017,2,0.855369,,0.032661,0.000000,
3,APS,2017,2,0.004335,,0.365703,0.000000,
4,ART,2017,2,0.320853,,0.210572,0.000000,
...,...,...,...,...,...,...,...,...
1013,WSS,2020,3,0.048078,0.034134,0.031836,0.000000,-0.072163
1014,AGR,2023,2,0.023348,0.596847,0.563507,0.156106,-0.249379
1015,BMS,2023,2,-0.351682,0.047773,0.004682,0.000000,-0.259350
1016,MBS,2023,2,0.011193,1.510873,1.133449,1.241413,-0.063039


In [314]:
# Connect to database and get the corresponding data 
conn = pyodbc.connect(
    r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=V:\iBroker\stock_database.accdb;'
)
df_sfri = pd.read_sql(
    sql="SELECT * FROM stock_financial_ratio_securities", 
    con=conn
)

In [316]:
# Assign variable for selected columns
sfri_column = df_sfri.columns[:-1].to_list()

In [317]:
df_sfri[sfri_column]

Unnamed: 0,Symbol,Year,Quarter,nim_securities,lte_8q,lte,debt_to_equity,coef_var_12q
0,AAS,2017,2,,,0.0,0.0,
1,AGR,2017,2,0.024452476174482655,,0.4590735885727527,0.031165741945450378,
2,APG,2017,2,0.8553686520837365,,0.032660502486529,0.0,
3,APS,2017,2,0.004335042874114701,,0.3657027052088879,0.0,
4,ART,2017,2,0.32085256732349693,,0.21057245867393576,0.0,
...,...,...,...,...,...,...,...,...
1013,WSS,2020,3,0.04807843064364548,0.03413356036770226,0.03183645223632248,0.0,-0.07216280321305198
1014,AGR,2023,2,0.023348288541550487,0.5968468914823468,0.5635066661566884,0.1561056683235182,-0.2493786642950388
1015,BMS,2023,2,-0.3516822216782862,0.047773020674003075,0.004682359776041949,0.0,-0.2593497122228974
1016,MBS,2023,2,0.011192645708747237,1.5108733195530053,1.1334488513302838,1.2414127196660163,-0.06303927151453322


In [318]:
# To insert new values by dropping duplicate values
df_sfri_new = pd.concat(
    [
        df_sfri[sfri_column],
        sfri.astype(str)
    ]
).drop_duplicates(keep=False)

In [319]:
df_sfri_new

Unnamed: 0,Symbol,Year,Quarter,nim_securities,lte_8q,lte,debt_to_equity,coef_var_12q


In [320]:
# Implement update day
df_sfri_new['Update'] = now

In [321]:
# Set up variable for inserting data to the database
col_df_sfri = "],[".join(i for i in df_sfri.columns.to_list())

'Symbol],[Year],[Quarter],[nim_securities],[lte_8q],[lte],[debt_to_equity],[coef_var_12q],[Update'

In [322]:
# Save data
cursor = conn.cursor()
for _, row in df_sfri_new.astype(str).iterrows():
    sql = "INSERT INTO stock_financial_ratio_securities (["+col_df_sfri+"]) VALUES "+ str(tuple(row))
    cursor.execute(sql)
    conn.commit()
    
print("Successfully saved data")

Successfully saved data


In [371]:
df_final = pd.merge(
    df_profit[['Symbol', 'Year', 'Quarter', 
               'score_roe_sector', 'score_roa_sector', 'score_nim_sector', 
               'score_profit', 'rank_profit']], 
    df_health[['Symbol', 'Year', 'Quarter', 
               'score_lte', 'score_dte', 
               'score_diversified_sale', 'score_coef_variation', 
               'score_health', 'rank_health']], 
    how='inner', 
    on=['Symbol', 'Year', 'Quarter']
)

In [372]:
df_final.sort_values(
    by=['Symbol', 'Year', 'Quarter'], 
    ascending=[True, True, True], 
    inplace=True
)

In [373]:
df_final

Unnamed: 0,Symbol,Year,Quarter,score_roe_sector,score_roa_sector,score_nim_sector,score_profit,rank_profit,score_lte,score_dte,score_diversified_sale,score_coef_variation,score_health,rank_health
0,AAS,2017,2,0,0,0,0.00,D,0.0,1.0,0.0,1.0,2.0,Warning
35,AAS,2017,3,0,0,1,1.33,C,0.0,1.0,0.0,1.0,2.0,Warning
70,AAS,2017,4,0,0,1,1.33,C,0.0,1.0,0.0,0.0,1.0,Danger
105,AAS,2018,1,0,0,0,0.00,D,0.0,1.0,0.0,0.0,1.0,Danger
141,AAS,2019,1,1,1,1,4.00,A,0.0,1.0,0.5,1.0,2.5,Safe
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565,WSS,2022,1,0,1,1,2.67,B,0.0,1.0,0.5,1.0,2.5,Safe
600,WSS,2022,2,1,1,1,4.00,A,0.0,1.0,0.0,1.0,2.0,Warning
635,WSS,2022,3,1,1,1,4.00,A,0.0,1.0,0.5,1.0,2.5,Safe
670,WSS,2022,4,0,0,0,0.00,D,0.0,1.0,0.5,0.0,1.5,Warning


In [374]:
# # Calculate and compare based on average value of the sector
# dte_sector = df1.groupby(['Year', 'Quarter']).agg({
#     'Debt': "sum",
#     "Equity": "sum"
# }).reset_index()
# dte_sector['dte_sector'] = dte_sector['Debt'] / dte_sector['Equity']
# df1 = pd.merge(df1,
#                dte_sector[['Year', 'Quarter', 'dte_sector']],
#                how='outer',
#                on=['Year', 'Quarter'])
# score_dte = []
# for _, item in df1.iterrows():
#     if item['debt_to_equity'] > item['dte_sector']:
#         score_dte.append(0)
#     elif item['debt_to_equity'] == item['dte_sector']:
#         score_dte.append(0.5)
#     else:
#         score_dte.append(1)

# df1['score_dte'] = score_dte
# df1

#### 2.3.2 Import Raw data 
To get current growth and valuation score

In [375]:
# Connect to database and get the corresponding data 
conn = pyodbc.connect(
    r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=V:\iBroker\stock_database.accdb;'
)
df_raw = pd.read_sql(
    sql='select * from ptsp_stock_fundamental_score', 
    con=conn
)

conn.close()

In [376]:
df_raw = df_raw.loc[df_raw['Symbol'].isin(list_sec)]
df_raw[['Year', 'Quarter']] = df_raw[['Year', 'Quarter']].astype(int)

#### 2.3.3 Merge all data
- New profit rank
- New health rank
- Current growth rank
- Currnet valuation rank

In [377]:
df_final = pd.merge(
    df_final,
    df_raw[[
        'Symbol', 'Year', 'Quarter', 'score_EPS_above_average',
        'score_EPS_growth', 'score_EPS_above_sector',
        'score_EPS_above_group', 'score_growth', 'rank_growth',
        'score_PE_5Y', 'score_PB_5Y', 'score_PE_sector',
        'score_PB_sector', 'score_valuation', 'rank_valuation',
        'score_final', 'rank_final', 'Update'
    ]],
    how='inner',
    on=['Symbol', 'Year', 'Quarter']
)

In [378]:
# Change type of data to calculate final score
list_col = [
    'score_roe_sector',
    'score_roa_sector',
    'score_nim_sector',
    'score_profit',
    'score_lte',
    'score_dte',
    'score_diversified_sale',
    'score_coef_variation',
    'score_health',
    'score_EPS_above_average',
    'score_EPS_growth',
    'score_EPS_above_sector',
    'score_EPS_above_group',
    'score_growth',
    'score_PE_5Y',
    'score_PB_5Y',
    'score_PE_sector',
    'score_PB_sector',
    'score_valuation',
]

for i in list_col:
    df_final[i] = df_final[i].astype(float)

In [379]:
df_final['score_final'] = round(
    number=np.mean(
        df_final[[
            'score_profit', 
            'score_health', 
            'score_growth', 
            'score_valuation'
        ]], 
        axis=1
    ), 
    ndigits=2
)

for _, items in df_final.iterrows():
    if items['score_final'] < 1:
        items['rank_final'] = "D"
    elif items['score_final'] < 2:
        items['rank_final'] = "C"
    elif items['score_final'] < 3:
        items['rank_final'] = "B" 
    else:
        items['rank_final'] = "A"

In [380]:
df_final

Unnamed: 0,Symbol,Year,Quarter,score_roe_sector,score_roa_sector,score_nim_sector,score_profit,rank_profit,score_lte,score_dte,...,rank_growth,score_PE_5Y,score_PB_5Y,score_PE_sector,score_PB_sector,score_valuation,rank_valuation,score_final,rank_final,Update
0,AAS,2020,1,1.0,1.0,1.0,4.00,A,1.0,1.0,...,,0.0,0.0,,,0.0,D,2.33,D,20220422
1,AAS,2020,2,0.0,0.0,1.0,1.33,C,1.0,1.0,...,D,0.0,0.0,,0.0,0.0,D,0.83,C,20220422
2,AAS,2021,3,1.0,1.0,1.0,4.00,A,1.0,1.0,...,D,0.0,0.0,0.0,2.0,1.0,D,1.75,C,20220422
3,AAS,2021,4,1.0,1.0,1.0,4.00,A,0.5,1.0,...,B,0.0,0.0,0.0,1.0,0.5,D,2.25,B,20220422
4,AAS,2022,1,1.0,1.0,1.0,4.00,A,1.0,1.0,...,B,0.0,0.0,0.0,1.0,0.5,D,2.12,B,20220628
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
562,WSS,2022,1,0.0,1.0,1.0,2.67,B,0.0,1.0,...,A,2.0,1.0,0.0,0.0,1.5,C,2.42,C,20220628
563,WSS,2022,2,1.0,1.0,1.0,4.00,A,0.0,1.0,...,C,2.0,2.0,0.0,0.0,2.0,C,2.25,C,20221215
564,WSS,2022,3,1.0,1.0,1.0,4.00,A,0.0,1.0,...,D,2.0,2.0,0.0,0.0,2.0,C,2.12,D,20221227
565,WSS,2022,4,0.0,0.0,0.0,0.00,D,0.0,1.0,...,D,,2.0,,0.0,1.0,D,0.62,D,20230328


## 3. Save to DB Access

### 3.1 Get data fields in new table
- `ptsp_stock_fundamental_score_financial`

In [381]:
conn = pyodbc.connect(
    r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=V:\iBroker\stock_database.accdb;'
)
df_final_prev = pd.read_sql(
    sql='select * from ptsp_stock_fundamental_score_financial', 
    con=conn
)

conn.close()

In [382]:
df_final_prev = df_final_prev.loc[
    df_final_prev['score_lte'].notna()
]

In [383]:
df_final_prev

Unnamed: 0,Symbol,Year,Quarter,score_roe_sector,score_roa_sector,score_nim_sector,score_combined_ratio_sector,score_profit,rank_profit,z_LoanProvisionRatio,...,rank_growth,score_PE_5Y,score_PB_5Y,score_PE_sector,score_PB_sector,score_valuation,rank_valuation,score_final,rank_final,update
740,AAS,2020,1,1.0,1.0,1.0,,4.0,A,,...,,0.0,0.0,,,0.0,D,2.33,D,20230809
741,AAS,2020,2,0.0,0.0,1.0,,1.33,C,,...,D,0.0,0.0,,0.0,0.0,D,0.83,C,20230809
742,AAS,2021,3,1.0,1.0,1.0,,4.0,A,,...,D,0.0,0.0,0.0,2.0,1.0,D,1.75,C,20230809
743,AAS,2021,4,1.0,1.0,1.0,,4.0,A,,...,B,0.0,0.0,0.0,1.0,0.5,D,2.25,B,20230809
744,AAS,2022,1,1.0,1.0,1.0,,4.0,A,,...,B,0.0,0.0,0.0,1.0,0.5,D,2.12,B,20230809
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1302,WSS,2022,1,0.0,1.0,1.0,,2.67,B,,...,A,2.0,1.0,0.0,0.0,1.5,C,2.42,C,20230809
1303,WSS,2022,2,1.0,1.0,1.0,,4.0,A,,...,C,2.0,2.0,0.0,0.0,2.0,C,2.25,C,20230809
1304,WSS,2022,3,1.0,1.0,1.0,,4.0,A,,...,D,2.0,2.0,0.0,0.0,2.0,C,2.12,D,20230809
1305,WSS,2022,4,0.0,0.0,0.0,,0.0,D,,...,D,,2.0,,0.0,1.0,D,0.62,D,20230809


In [384]:
# Assign variable for selected columns
final_column = df_final.columns[:-1].to_list()

In [385]:
# To insert new values by dropping duplicate values
df_final_new = pd.concat(
    [
        df_final_prev[final_column],
        df_final[final_column].astype(str)
    ]
).drop_duplicates(keep=False)

In [386]:
df_final_new['Update'] = now

In [387]:
df_final_new

Unnamed: 0,Symbol,Year,Quarter,score_roe_sector,score_roa_sector,score_nim_sector,score_profit,rank_profit,score_lte,score_dte,...,rank_growth,score_PE_5Y,score_PB_5Y,score_PE_sector,score_PB_sector,score_valuation,rank_valuation,score_final,rank_final,Update


In [388]:
final_column.append('Update')

In [389]:
# "],[".join(i for i in df_db.columns.to_list())
col_df_db = '[Symbol],[Year],[Quarter],[score_roe_sector],[score_roa_sector],[score_nim_sector],[score_profit],[rank_profit],[score_lte],[score_dte],[score_diversified_sale],[score_coef_variation],[score_health],[rank_health],[score_EPS_above_average],[score_EPS_growth],[score_EPS_above_sector],[score_EPS_above_group],[score_growth],[rank_growth],[score_PE_5Y],[score_PB_5Y],[score_PE_sector],[score_PB_sector],[score_valuation],[rank_valuation],[score_final],[rank_final],[update]'

### 3.2 Save data to new table

In [390]:
conn = pyodbc.connect(
    r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=V:\iBroker\stock_database.accdb;'
)
cursor = conn.cursor()
for _, row in df_final_new[final_column].astype(str).iterrows():
    sql = "INSERT INTO ptsp_stock_fundamental_score_financial ("+col_df_db+") VALUES "+ str(tuple(row))
    cursor.execute(sql)
    conn.commit()
    
print("Successfully saved data")


Successfully saved data
