# INSURANCE BALANCE SHEET

Library

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import datetime as dt

Symbol

In [2]:
# Save as a table
sql_symbol = pd.read_csv("symbol_id.csv")
sql_symbol.drop(['Unnamed: 0'], axis=1, inplace=True)

# Save as a dict
dict_sql_symbol = dict(zip(sql_symbol['SymbolID'], sql_symbol['Symbol']))

Pathlink

In [3]:
# Assign pathlink
algo_pathlink = r"/Users/rainmeteror/Desktop/BSC_Algo_Database_UAT/OneDrive_1_5-29-2023/insurance/algo_ibs.xlsx"
sql_pathlink = r"/Users/rainmeteror/Desktop/BSC_Algo_Database_UAT/OneDrive_1_5-29-2023/insurance/sql_ibs.xlsx"
same_field_data = r"/Users/rainmeteror/Desktop/BSC_Algo_Database_UAT/OneDrive_1_5-29-2023/insurance/ibs_datafield.xlsx"
result_pathlink = r"/Users/rainmeteror/Desktop/BSC_Algo_Database_UAT/OneDrive_1_5-29-2023/result/check_ibs.xlsx"


Algo Database

In [4]:
# Read data from algo database
df_algo = pd.read_excel(algo_pathlink)
df_algo.drop(['Unnamed: 0', 'SCN', 'REPORTED_DATE', 'AUDITED', 'MONTH_IN_PERIOD', 'CREATED_DATE', 'UPDATED_DATE'], axis=1, inplace=True)
df_algo = df_algo.loc[df_algo['FREQ_CODE'] != 'Y']
df_algo['YEAR'] = df_algo['REPORT_DATE'].dt.year
df_algo['QUARTER'] = df_algo['REPORT_DATE'].dt.quarter
df_algo.drop(['REPORT_DATE', 'FREQ_CODE'], axis=1, inplace=True)

# Sort Values
df_algo.sort_values(by=['YEAR', 'QUARTER', 'SECURITY_CODE'], ascending=[True, True, True], inplace=True)

# Print how many rows are used
print("ALGO DATABASE INSURANCE: There are: " + str(len(df_algo)) + " rows")
print("ALGO DATABASE INSURANCE: There are: " + str(len(df_algo.columns)) + " columns")

ALGO DATABASE INSURANCE: There are: 168 rows
ALGO DATABASE INSURANCE: There are: 162 columns


In [5]:
df_algo.head(1)

Unnamed: 0,SECURITY_CODE,IBS_100,IBS_110,IBS_111,IBS_112,IBS_120,IBS_121,IBS_129,IBS_1291,IBS_130,...,IBS_4213,IBS_4214,IBS_4215,IBS_4216,IBS_4217,IBS_4218,IBS_4219,IBS_430,YEAR,QUARTER
0,ABI,2501333712678,80566546592,80252604892,313941700,1975247291700,0,0,1975247291700,124281297270,...,0,0,0.0,0,0.0,0.0,0,2679873082924,2020,1


Stockbiz Database

In [6]:
# Read data from sql database
df_sql = pd.read_excel(sql_pathlink)
df_sql.drop(['Unnamed: 0', 'ReportID', 'LastUpdated', 'ApprovedBy', 'Currency', 'CreatedBy', 'LastUpdatedBy',
             'ReportStatus', 'AssignedTo', 'CreatedAt'], axis=1, inplace=True)
df_sql = df_sql.loc[(df_sql['Quarter'] != 0) & (df_sql['Year'] > 2019)]
df_sql.sort_values(by=['Year', 'Quarter'], ascending=[True, True], inplace=True)

# Change the name of tickers
df_sql['SymbolID'] = df_sql['SymbolID'].map(dict_sql_symbol)

# Change the name of column SymbolID
df_sql.rename(
    columns={"SymbolID": "SECURITY_CODE",
             "Year": "YEAR",
             "Quarter": "QUARTER"},
    inplace=True
)

# Print how many rows are used
print("STOCKBIZ DATABASE BANKS: There are: " + str(len(df_sql)) + " rows")
print("STOCKBIZ DATABASE BANKS: There are: " + str(len(df_sql.columns)) + " rows")

STOCKBIZ DATABASE BANKS: There are: 143 rows
STOCKBIZ DATABASE BANKS: There are: 121 rows


In [7]:
df_sql.head(1)

Unnamed: 0,SECURITY_CODE,QUARTER,YEAR,ShortTermAssets,CashAndCashEquivalents,CashInHand,CashInBanks,CashInTransits,CashEquivalents,ShortTermInvestments,...,OtherFundsAndExpenses,BonusAndWelfareFund,BonusAndWelfareFundForInvestment,OperationFunds,FundForNonBusinessActivities,FundForNonBusinessActivitiesLastYear,FundForNonBusinessActivitiesThisYear,FundForInUseWelfareFixedAssets,MinorityInterests,TotalCapital
3,VNR,1,2020,5892072066920,41740082686,41740082686,0.0,,0.0,2776010082301,...,18509590000.0,18509590000.0,,,,,,,34594380000.0,7028435969065


In [8]:
# Get Symbol in insurance sectors
insurance_symbol = df_sql['SECURITY_CODE'].unique()

In [9]:
print("There are: "+ str(len(insurance_symbol))+ " stocks")

There are: 11 stocks


Process 2 tables

In [10]:
# Change the name of columns
same_column_name = pd.read_excel(same_field_data)
same_column_name.dropna(axis=0, inplace=True)

# Create a dict with key from bbs_sql and value from bbs_algo
dict_column_name = dict(zip(same_column_name['ibs_sql'], same_column_name['ibs_algo']))

In [11]:
same_column_name.head(1)

Unnamed: 0,ibs_algo,ibs_sql
0,IBS_100,ShortTermAssets


In [12]:
df_sql.dtypes

SECURITY_CODE                            object
QUARTER                                   int64
YEAR                                      int64
ShortTermAssets                           int64
CashAndCashEquivalents                    int64
                                         ...   
FundForNonBusinessActivitiesLastYear    float64
FundForNonBusinessActivitiesThisYear    float64
FundForInUseWelfareFixedAssets          float64
MinorityInterests                       float64
TotalCapital                              int64
Length: 121, dtype: object

In [13]:
# Change the name of df_sql based on name of the similar data field in df_algo
df_sql.rename(
    columns=dict_column_name,
    inplace=True
)

# Fill NA values in df_sql
df_sql = df_sql.fillna(0)

# Change the type of data which is same between two tables
df_sql[same_column_name['ibs_algo']] = df_sql[same_column_name['ibs_algo']].astype('Int64')

In [14]:
# Change the type of data in df_algo
# Just get the same data between two tables
df_algo[same_column_name['ibs_algo']] = df_algo[same_column_name['ibs_algo']].astype('Int64')

# Change the type of YEAR and QUARTER
df_algo =  df_algo.astype(
    {"YEAR": "Int64",
     "QUARTER": "Int64"},
)

# Get the data with same value in banking sector
df_algo = df_algo.loc[df_algo['SECURITY_CODE'].isin(insurance_symbol)]

In [15]:
# Get the final same columns
final_column = ["SECURITY_CODE", "YEAR", "QUARTER"]

for i in same_column_name['ibs_algo']:
    final_column.append(i)

In [16]:
df_sql = df_sql[final_column]
df_algo = df_algo[final_column]

In [17]:
def compare_table(df1, df2, columns) -> pd.DataFrame():
    
    return pd.merge(df1, df2, on=columns, how='outer', indicator=True).query("_merge != 'both'")

In [18]:
final_result = {"field": [], "value_not_same": []}
for i in range(3, len(final_column)):
    check_columns = ["SECURITY_CODE", 'YEAR', 'QUARTER', final_column[i]]
    result = compare_table(
        df1=df_sql[check_columns], 
        df2=df_algo[check_columns], 
        columns=check_columns)
    
    value_not_same = len(result)/(len(df_algo) + len(df_sql))*100
    
    final_result["field"].append(final_column[i])
    final_result["value_not_same"].append(value_not_same)

In [20]:
pd.DataFrame(final_result).to_excel(result_pathlink)