# 1. Importing Required Modules

In [3]:
from zipfile import ZipFile 
import pandas as pd
import os
import tabula

# 2. Extract data file from zip files

## 2.a. Create a function for data extraction

In [2]:
def security_data_consolidator(path_folder):

    '''
    security_data_consolidator function extracts all zip files containing txt data downloaded from KSEI website 
    that were saved on a predefined folder, and concatenate all the txt files inside the zip files.
    
    Parameters
    ----------
    path_folder: the path or folder containing zip files

    Returns
    -------
    None
    '''

    # Listing all zip files in the directory
    securities_mf_path = path_folder
    securities_mf_files = os.listdir(securities_mf_path)

    # Create an empty text file
    securities_mf_all_filename = securities_mf_path+'.txt'
    with open(os.path.join(securities_mf_path, securities_mf_all_filename), 'a', encoding="utf-8") as securities_mf_file:
        pass

    # Create temprary folder
    mf_temp_dir = 'tmp'
    if not os.path.exists(os.path.join(securities_mf_path, mf_temp_dir)):
        os.makedirs(os.path.join(securities_mf_path, mf_temp_dir))

    # Track the file processed
    file_processed = 0

    # Iterate over all files in dataset folder. 
    # Extract the txt file and put it in the tmp folder
    # Open the txt file, append its content to the empty file previously created
    # Delete the file in tmp folder
    for file in securities_mf_files:
        # Load the zip file and create a zip object 
        with ZipFile(os.path.join(securities_mf_path, file), 'r') as mf_zip: 
    
            # Extracting all the members of the zip  
            # into a specific location. 
            mf_zip.extractall(path=os.path.join(securities_mf_path, mf_temp_dir)) 
        
        # Close zip file
        mf_zip.close()

        # Open the file in tmp folder, read the content, and delete the file once its done
        securities_mf_tmp = os.listdir(os.path.join(securities_mf_path, mf_temp_dir))
        securities_mf_txt = open(os.path.join(os.path.join(securities_mf_path, mf_temp_dir),securities_mf_tmp[0]),"r", encoding="utf-8")
        file_content = securities_mf_txt.read()
        securities_mf_txt.close()
        os.remove(os.path.join(os.path.join(securities_mf_path, mf_temp_dir),securities_mf_tmp[0]))
        
        # Only the header line on the first file needs to be added to the new file
        if file_processed==0:
            all_mf_file = open(os.path.join(securities_mf_path, securities_mf_all_filename), 'a', encoding="utf-8")
            all_mf_file.write(file_content)
            all_mf_file.close()
        else:
            file_content_list = file_content.split('\n')[1:]
            all_mf_file = open(os.path.join(securities_mf_path, securities_mf_all_filename), 'a', encoding="utf-8")
            all_mf_file.write("\n".join(file_content_list))
            all_mf_file.close()
        
        # Increase file counter
        file_processed+=1

    # Delete tmp folder
    os.removedirs(os.path.join(securities_mf_path, mf_temp_dir))
    print(f'Success! File output: {os.path.join(securities_mf_path, securities_mf_all_filename)}')

## 2.b. Extract data from securities master file 

In [3]:
security_data_consolidator('Dataset_Masterfile_Efek')

Success! File output: Dataset_Masterfile_Efek\Dataset_Masterfile_Efek.txt


## 2.c. Extract data from securities ownership master file

In [4]:
security_data_consolidator('Dataset_Ownership_Efek')

Success! File output: Dataset_Ownership_Efek\Dataset_Ownership_Efek.txt


# 3. Read the combined data

## 3.1 Read security master file data

In [78]:
securities_mf = pd.read_csv('Dataset_Masterfile_Efek/Dataset_Masterfile_Efek.txt', sep='|')
securities_mf.head()

Unnamed: 0,Date,Code,Description,Type,Isin Code,Issuer,Status,Stock Exchange,Listing Date,Currency,...,Nominal Value,Num. of Sec,Originated Amt,Current Amt,Total Scripless,Local (%),Foreign (%),Total (%),Sector,Closing Price
0,31-MAY-2021,TRIM,TRIMEGAH SEKURITAS INDONESIA Tbk,EQUITY,ID1000052608,"TRIMEGAH SEKURITAS INDONESIA Tbk, PT",ACTIVE,IDX,,IDR,...,50,7109300000,,,7109300000,21,79,100,SECURITIES COMPANY,130
1,31-MAY-2021,TRIN,PERINTIS TRINITI PROPERTI Tbk,EQUITY,ID1000153604,"PERINTIS TRINITI PROPERTI Tbk, PT",ACTIVE,IDX,15-JAN-2020,IDR,...,100,4373364228,,,3973864228,91,0,91,PROPERTY AND REAL ESTATE,157
2,31-MAY-2021,TRIO,TRIKOMSEL OKE Tbk,EQUITY,ID1000112501,PT TRIKOMSEL OKE TBK,ACTIVE,IDX,14-APR-2009,IDR,...,100,26007494645,,,25928896198,51,49,100,RETAIL TRADE,426
3,31-MAY-2021,TRIS,TRISULA INTERNATIONAL Tbk,EQUITY,ID1000123102,"TRISULA INTERNATIONAL Tbk, PT",ACTIVE,IDX,28-JUN-2012,IDR,...,100,3141443806,,,364610758,8,4,12,"TEXTILE, GARMENT",144
4,31-MAY-2021,TRJA,TRANSKON JAYA Tbk,EQUITY,ID1000156805,"TRANSKON JAYA Tbk, PT",ACTIVE,IDX,27-AUG-2020,IDR,...,100,1510200000,,,375000000,25,0,25,TRANSPORTATION,168


## 3.2. Filter to only display bond instruments

Identify the type of financial securities in the master file. 

In [6]:
securities_mf['Type'].unique()

array(['EQUITY', 'CORPORATE BOND', 'GOVERNMENT BOND', 'WARRANT', 'RIGHT',
       'TERM NOTES', 'NEGOTIABLE CERTIFICATE OF DEPOSIT',
       'COMMERCIAL PAPER', 'SBSN', 'SPN', 'SUKUK', 'EBA',
       'DANA INVESTASI REAL ESTATE', 'EQUITY CROWDFUNDING (ECF)',
       'DEBT CROWD FUNDING', 'SUKUK CROWD FUNDING', 'STRUCTURED WARRANT',
       'BANK INDONESIA RUPIAH SECURITIES (SRBI)',
       'Bank Indonesia Foreign Currency Securities (SVBI)',
       'Bank Indonesia Foreign Currency Sukuk (SUVBI)', 'PERPETUAL BONDS'],
      dtype=object)

In [5]:
bonds_type = ['CORPORATE BOND', 'GOVERNMENT BOND', 
              'SBSN', 'SPN', 'SUKUK', 
              'DEBT CROWD FUNDING', 'SUKUK CROWD FUNDING',
              'BANK INDONESIA RUPIAH SECURITIES (SRBI)',
              'Bank Indonesia Foreign Currency Securities (SVBI)',
              'Bank Indonesia Foreign Currency Sukuk (SUVBI)', 'PERPETUAL BONDS']

Filter the master file to only display bonds data

In [79]:
securities_mf_bond = securities_mf[securities_mf['Type'].isin(bonds_type)].reset_index(drop=True)
securities_mf_bond.head()

Unnamed: 0,Date,Code,Description,Type,Isin Code,Issuer,Status,Stock Exchange,Listing Date,Currency,...,Nominal Value,Num. of Sec,Originated Amt,Current Amt,Total Scripless,Local (%),Foreign (%),Total (%),Sector,Closing Price
0,31-MAY-2021,ABSM01A,OBLIGASI AB SINAR MAS MULTIFINANCE I TAHUN 202...,CORPORATE BOND,IDA0001097A7,"AB SINAR MAS MULTIFINANCE, PT",ACTIVE,IDX,07-SEP-2020,IDR,...,,,115000000000,115000000000,115000000000,100,0,100,FINANCIAL INSTITUTION,1
1,31-MAY-2021,ABSM01B,OBLIGASI AB SINAR MAS MULTIFINANCE I TAHUN 202...,CORPORATE BOND,IDA0001097B5,"AB SINAR MAS MULTIFINANCE, PT",ACTIVE,IDX,07-SEP-2020,IDR,...,,,25000000000,25000000000,25000000000,100,0,100,FINANCIAL INSTITUTION,1
2,31-MAY-2021,ABSM01C,OBLIGASI AB SINAR MAS MULTIFINANCE I TAHUN 202...,CORPORATE BOND,IDA0001097C3,"AB SINAR MAS MULTIFINANCE, PT",ACTIVE,IDX,07-SEP-2020,IDR,...,,,35000000000,35000000000,35000000000,100,0,100,FINANCIAL INSTITUTION,1
3,31-MAY-2021,ADCP01A,OBLIGASI I ADHI COMMUTER PROPERTI TAHUN 2021 S...,CORPORATE BOND,IDA0001151A2,"ADHI COMMUTER PROPERTI, PT",ACTIVE,IDX,21-MAY-2021,IDR,...,,,491000000000,491000000000,491000000000,100,0,100,PROPERTY AND REAL ESTATE,1
4,31-MAY-2021,ADCP01B,OBLIGASI I ADHI COMMUTER PROPERTI TAHUN 2021 S...,CORPORATE BOND,IDA0001151B0,"ADHI COMMUTER PROPERTI, PT",ACTIVE,IDX,21-MAY-2021,IDR,...,,,9000000000,9000000000,9000000000,100,0,100,PROPERTY AND REAL ESTATE,1


In [11]:
securities_mf_bond.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43405 entries, 0 to 43404
Data columns (total 28 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Date             43405 non-null  object 
 1   Code             43405 non-null  object 
 2   Description      43405 non-null  object 
 3   Type             43405 non-null  object 
 4   Isin Code        43405 non-null  object 
 5   Issuer           43405 non-null  object 
 6   Status           43405 non-null  object 
 7   Stock Exchange   39433 non-null  object 
 8   Listing Date     39397 non-null  object 
 9   Currency         43405 non-null  object 
 10  Form             43405 non-null  object 
 11  Eff. Date Isin   50 non-null     object 
 12  Maturity Date    43405 non-null  object 
 13  Expire Date      0 non-null      object 
 14  Exercise Price   0 non-null      float64
 15  Interest         38526 non-null  float64
 16  Interest Type    43242 non-null  object 
 17  Interest Fre

## 3.3 Read Security Ownership Data

In [80]:
securities_os = pd.read_csv('Dataset_Ownership_Efek/Dataset_Ownership_Efek.txt', sep='|')
securities_os.head()

Unnamed: 0,Date,Code,Type,Sec. Num,Price,Local IS,Local CP,Local PF,Local IB,Local ID,...,Foreign IS,Foreign CP,Foreign PF,Foreign IB,Foreign ID,Foreign MF,Foreign SC,Foreign FD,Foreign OT,Total.1
0,31-MAY-2021,AALI,EQUITY,1924688333,8825,114938014,11274445,19468622,49700,100544313,...,1403910,3855240,7316838,13323164,1423464,32725069,26900419,208822,6914300,94071226
1,31-MAY-2021,ABBA,EQUITY,2755125000,246,0,1692656565,0,0,566614547,...,0,2120900,0,14969700,1800,0,1900,0,0,17094300
2,31-MAY-2021,ABDA,EQUITY,620806680,7050,21885,47538682,0,98,47919272,...,0,426136555,0,97403500,400,0,0,0,62,523540517
3,31-MAY-2021,ABMM,EQUITY,2753165000,825,2677000,6965900,1600,0,50690300,...,0,57405300,0,320433900,406600,10351700,0,0,151862900,540460400
4,31-MAY-2021,ACES,EQUITY,17150000000,1495,328564522,99277100,20041945,0,272974411,...,21176900,715615629,1246731977,511051505,1561100,2235817184,196662025,4720799,941505231,5874842350


## 3.4. Filter instrument ownership for bond data only

Filter ownership data to only display bond data

In [81]:
securities_os_bond = securities_os[securities_os['Type'].isin(bonds_type)].reset_index(drop=True)
securities_os_bond.head()

Unnamed: 0,Date,Code,Type,Sec. Num,Price,Local IS,Local CP,Local PF,Local IB,Local ID,...,Foreign IS,Foreign CP,Foreign PF,Foreign IB,Foreign ID,Foreign MF,Foreign SC,Foreign FD,Foreign OT,Total.1
0,31-MAY-2021,ABSM01A,CORPORATE BOND,115000000000,1,750000000,500000000,0,0,15650000000,...,0,0,0,0,0,0,0,0,0,0
1,31-MAY-2021,ABSM01B,CORPORATE BOND,25000000000,1,1000000000,0,0,0,4000000000,...,0,0,0,0,0,0,0,0,0,0
2,31-MAY-2021,ABSM01C,CORPORATE BOND,35000000000,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,31-MAY-2021,ADCP01A,CORPORATE BOND,491000000000,1,0,100000000000,10000000000,0,0,...,0,0,0,0,0,0,0,0,0,0
4,31-MAY-2021,ADCP01B,CORPORATE BOND,9000000000,1,5000000000,0,2000000000,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
securities_os_bond.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40198 entries, 0 to 40197
Data columns (total 25 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Date        40198 non-null  object 
 1   Code        40198 non-null  object 
 2   Type        40198 non-null  object 
 3   Sec. Num    38501 non-null  float64
 4   Price       40198 non-null  int64  
 5   Local IS    40198 non-null  float64
 6   Local CP    40198 non-null  float64
 7   Local PF    40198 non-null  int64  
 8   Local IB    40198 non-null  int64  
 9   Local ID    40198 non-null  int64  
 10  Local MF    40198 non-null  int64  
 11  Local SC    40198 non-null  int64  
 12  Local FD    40198 non-null  int64  
 13  Local OT    40198 non-null  int64  
 14  Total       40198 non-null  float64
 15  Foreign IS  40198 non-null  int64  
 16  Foreign CP  40198 non-null  int64  
 17  Foreign PF  40198 non-null  int64  
 18  Foreign IB  40198 non-null  int64  
 19  Foreign ID  40198 non-nul

# 4. Merge ownership and master data

## 4.1. Merge Data

Both dataframe are outer-joined using Date and Code columns.

In [82]:
bond_data = securities_mf_bond.merge(securities_os_bond, on=['Date','Code'], how='outer')
bond_data.head()

Unnamed: 0,Date,Code,Description,Type_x,Isin Code,Issuer,Status,Stock Exchange,Listing Date,Currency,...,Foreign IS,Foreign CP,Foreign PF,Foreign IB,Foreign ID,Foreign MF,Foreign SC,Foreign FD,Foreign OT,Total.1
0,31-MAY-2021,ABSM01A,OBLIGASI AB SINAR MAS MULTIFINANCE I TAHUN 202...,CORPORATE BOND,IDA0001097A7,"AB SINAR MAS MULTIFINANCE, PT",ACTIVE,IDX,07-SEP-2020,IDR,...,0,0,0,0,0,0,0,0,0,0
1,31-MAY-2021,ABSM01B,OBLIGASI AB SINAR MAS MULTIFINANCE I TAHUN 202...,CORPORATE BOND,IDA0001097B5,"AB SINAR MAS MULTIFINANCE, PT",ACTIVE,IDX,07-SEP-2020,IDR,...,0,0,0,0,0,0,0,0,0,0
2,31-MAY-2021,ABSM01C,OBLIGASI AB SINAR MAS MULTIFINANCE I TAHUN 202...,CORPORATE BOND,IDA0001097C3,"AB SINAR MAS MULTIFINANCE, PT",ACTIVE,IDX,07-SEP-2020,IDR,...,0,0,0,0,0,0,0,0,0,0
3,31-MAY-2021,ADCP01A,OBLIGASI I ADHI COMMUTER PROPERTI TAHUN 2021 S...,CORPORATE BOND,IDA0001151A2,"ADHI COMMUTER PROPERTI, PT",ACTIVE,IDX,21-MAY-2021,IDR,...,0,0,0,0,0,0,0,0,0,0
4,31-MAY-2021,ADCP01B,OBLIGASI I ADHI COMMUTER PROPERTI TAHUN 2021 S...,CORPORATE BOND,IDA0001151B0,"ADHI COMMUTER PROPERTI, PT",ACTIVE,IDX,21-MAY-2021,IDR,...,0,0,0,0,0,0,0,0,0,0


## 4.2 Rename total ownership columns 

Rename column Total and Total.1 which are initially duplicate columns from ownership data. Column 'Total' represents the total of instrument ownership by Indonesian citizen, while column 'Total.1' represents the total of instrument ownership by non-Indonesian citizen.

In [83]:
bond_data.rename(columns={'Total': 'Total_Local_Ownership', 'Total.1': 'Total_Foreign_Ownership'}, inplace=True)

## 4.3 Add total outstanding column

In [84]:
bond_data['Total_Outstanding'] = bond_data['Total_Local_Ownership']+bond_data['Total_Foreign_Ownership']
pd.options.display.float_format = '{:.0f}'.format
bond_data.head()

Unnamed: 0,Date,Code,Description,Type_x,Isin Code,Issuer,Status,Stock Exchange,Listing Date,Currency,...,Foreign CP,Foreign PF,Foreign IB,Foreign ID,Foreign MF,Foreign SC,Foreign FD,Foreign OT,Total_Foreign_Ownership,Total_Outstanding
0,31-MAY-2021,ABSM01A,OBLIGASI AB SINAR MAS MULTIFINANCE I TAHUN 202...,CORPORATE BOND,IDA0001097A7,"AB SINAR MAS MULTIFINANCE, PT",ACTIVE,IDX,07-SEP-2020,IDR,...,0,0,0,0,0,0,0,0,0,115000000000
1,31-MAY-2021,ABSM01B,OBLIGASI AB SINAR MAS MULTIFINANCE I TAHUN 202...,CORPORATE BOND,IDA0001097B5,"AB SINAR MAS MULTIFINANCE, PT",ACTIVE,IDX,07-SEP-2020,IDR,...,0,0,0,0,0,0,0,0,0,25000000000
2,31-MAY-2021,ABSM01C,OBLIGASI AB SINAR MAS MULTIFINANCE I TAHUN 202...,CORPORATE BOND,IDA0001097C3,"AB SINAR MAS MULTIFINANCE, PT",ACTIVE,IDX,07-SEP-2020,IDR,...,0,0,0,0,0,0,0,0,0,35000000000
3,31-MAY-2021,ADCP01A,OBLIGASI I ADHI COMMUTER PROPERTI TAHUN 2021 S...,CORPORATE BOND,IDA0001151A2,"ADHI COMMUTER PROPERTI, PT",ACTIVE,IDX,21-MAY-2021,IDR,...,0,0,0,0,0,0,0,0,0,491000000000
4,31-MAY-2021,ADCP01B,OBLIGASI I ADHI COMMUTER PROPERTI TAHUN 2021 S...,CORPORATE BOND,IDA0001151B0,"ADHI COMMUTER PROPERTI, PT",ACTIVE,IDX,21-MAY-2021,IDR,...,0,0,0,0,0,0,0,0,0,9000000000


In [18]:
bond_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44246 entries, 0 to 44245
Data columns (total 52 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Date                     44246 non-null  object 
 1   Code                     44246 non-null  object 
 2   Description              43405 non-null  object 
 3   Type_x                   43405 non-null  object 
 4   Isin Code                43405 non-null  object 
 5   Issuer                   43405 non-null  object 
 6   Status                   43405 non-null  object 
 7   Stock Exchange           39433 non-null  object 
 8   Listing Date             39397 non-null  object 
 9   Currency                 43405 non-null  object 
 10  Form                     43405 non-null  object 
 11  Eff. Date Isin           50 non-null     object 
 12  Maturity Date            43405 non-null  object 
 13  Expire Date              0 non-null      object 
 14  Exercise Price        

In [55]:
bond_data[bond_data['Code'].str.contains('PBSG')].head()

Unnamed: 0,Date,Code,Description,Type_x,Isin Code,Issuer,Status,Stock Exchange,Listing Date,Currency,...,Foreign IS,Foreign CP,Foreign PF,Foreign IB,Foreign ID,Foreign MF,Foreign SC,Foreign FD,Foreign OT,Total.1
18177,30-SEP-2022,PBSG001,SBSN Seri PBSG001,SBSN,IDP000005308,PEMERINTAH REPUBLIK INDONESIA,ACTIVE,IDX,23-SEP-2022,IDR,...,,,,,,,,,,
19154,31-OCT-2022,PBSG001,SBSN Seri PBSG001,SBSN,IDP000005308,PEMERINTAH REPUBLIK INDONESIA,ACTIVE,IDX,23-SEP-2022,IDR,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20341,30-NOV-2022,PBSG001,SBSN Seri PBSG001,SBSN,IDP000005308,PEMERINTAH REPUBLIK INDONESIA,ACTIVE,IDX,23-SEP-2022,IDR,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21530,30-DEC-2022,PBSG001,SBSN Seri PBSG001,SBSN,IDP000005308,PEMERINTAH REPUBLIK INDONESIA,ACTIVE,IDX,23-SEP-2022,IDR,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22856,31-JAN-2023,PBSG001,SBSN Seri PBSG001,SBSN,IDP000005308,PEMERINTAH REPUBLIK INDONESIA,ACTIVE,IDX,23-SEP-2022,IDR,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# 5. Filter GSS Bonds

In [85]:
green_sukuk = ['ST006','ST007','ST008','ST009','ST010T4','ST011T2','ST011T4','ST012T4','PBSG001']

In [65]:
bond_data[bond_data['Code'].str.contains('SWR')]['Type_y']

462       NaN
12172     NaN
13309     NaN
14475     NaN
30690     NaN
         ... 
44096    SBSN
44148    SBSN
44149    SBSN
44209    SBSN
44210    SBSN
Name: Type_y, Length: 78, dtype: object

In [86]:
gss_bond_data = bond_data[(bond_data['Description'].str.contains('BKLJ') | bond_data['Description'].str.contains('BERKELANJUTAN') |   # First keyword: sustainable or berkelanjutan
                          bond_data['Description'].str.contains('GREEN ') |    # Second keyword: green (with whitespace after)
                          bond_data['Description'].str.contains('LINGKUNGAN') |   # Third keyword: environment or lingkungan
                          bond_data['Code'].isin(green_sukuk) | (bond_data['Code'].str.contains('SWR') & bond_data['Type_y'].str.contains('SBSN'))) &   # sovereign sukuk is not listed in master data, therefore it has to be identified manually
                          bond_data['Foreign OT'].notnull()   # Ownership data is not null, representing that the bond has been traded in public
                          ].reset_index(drop=True)
gss_bond_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29441 entries, 0 to 29440
Data columns (total 52 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Date                     29441 non-null  object 
 1   Code                     29441 non-null  object 
 2   Description              29283 non-null  object 
 3   Type_x                   29283 non-null  object 
 4   Isin Code                29283 non-null  object 
 5   Issuer                   29283 non-null  object 
 6   Status                   29283 non-null  object 
 7   Stock Exchange           29171 non-null  object 
 8   Listing Date             29171 non-null  object 
 9   Currency                 29283 non-null  object 
 10  Form                     29283 non-null  object 
 11  Eff. Date Isin           0 non-null      object 
 12  Maturity Date            29283 non-null  object 
 13  Expire Date              0 non-null      object 
 14  Exercise Price        

# 6. Add label columns for identification of GSS Bonds Data

## 6.1 Create criteria function for labelling GSS Bonds Type

In [95]:
def gss_bond_type_labeller(row):
    
    '''
    gss_bond_type_labeller function labels each row in gss bonds dataframe based on the predefined criteria.
    
    Parameters
    ----------
    row: the row of a dataframe

    Returns
    -------
    row_label
    '''
    if isinstance(row['Description'],str):
        if ('SBSN' in row['Type_y'] and ('ST0' in row['Code']  or 'PBSG' in row['Code'])) or 'GREEN ' in row['Description'] or 'LINGKUNGAN' in row['Description']:
            row_label = 'Green Bonds'
        elif 'SOSIAL' in row['Description'] or ('SWR' in row['Code'] and 'SBSN' in row['Type_y']):
            row_label = 'Social Bonds'
        else:
            row_label = 'Sustainability Bonds'
    else:
        if ('SBSN' in row['Type_y'] and ('ST0' in row['Code']  or 'PBSG' in row['Code'])):
            row_label = 'Green Bonds'
        elif ('SWR' in row['Code'] and 'SBSN' in row['Type_y']):
            row_label = 'Social Bonds'
        else:
            row_label = 'Sustainability Bonds'        
    
    return row_label

## 6.2 Create criteria function for labelling GSS Bond issuer type

In [88]:
def gss_bond_issuer_labeller(row):
    
    '''
    gss_bond_issuer_labeller function labels each row in gss bonds dataframe based on the predefined criteria.
    
    Parameters
    ----------
    row: the row of a dataframe

    Returns
    -------
    row_label
    '''

    if 'SBSN' in row['Type_y']:
        row_label = 'Government'
    elif 'PERSERO' in row['Issuer'] or 'BANK PEMBANGUNAN DAERAH' in row['Issuer']:
        row_label = 'State-owned Enterprise'
    else:
        row_label = 'Private Corporation'
    return row_label

## 6.3 Apply row labelling

In [90]:
gss_bond_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29441 entries, 0 to 29440
Data columns (total 52 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Date                     29441 non-null  object 
 1   Code                     29441 non-null  object 
 2   Description              29283 non-null  object 
 3   Type_x                   29283 non-null  object 
 4   Isin Code                29283 non-null  object 
 5   Issuer                   29283 non-null  object 
 6   Status                   29283 non-null  object 
 7   Stock Exchange           29171 non-null  object 
 8   Listing Date             29171 non-null  object 
 9   Currency                 29283 non-null  object 
 10  Form                     29283 non-null  object 
 11  Eff. Date Isin           0 non-null      object 
 12  Maturity Date            29283 non-null  object 
 13  Expire Date              0 non-null      object 
 14  Exercise Price        

In [96]:
gss_bond_data['GSS_Bond_Type'] = gss_bond_data.apply(gss_bond_type_labeller, axis=1)
gss_bond_data['GSS_Bond_Issuer_Type'] = gss_bond_data.apply(gss_bond_issuer_labeller, axis=1)
gss_bond_data.head()

Unnamed: 0,Date,Code,Description,Type_x,Isin Code,Issuer,Status,Stock Exchange,Listing Date,Currency,...,Foreign IB,Foreign ID,Foreign MF,Foreign SC,Foreign FD,Foreign OT,Total_Foreign_Ownership,Total_Outstanding,GSS_Bond_Type,GSS_Bond_Issuer_Type
0,31-MAY-2021,ADHI02ACN2,OBLIGASI BERKELANJUTAN II ADHI KARYA TAHAP II ...,CORPORATE BOND,IDA0000984A7,"ADHI KARYA (PERSERO) Tbk, PT",ACTIVE,IDX,26-JUN-2019,IDR,...,0,0,0,0,0,0,0,556000000000,Sustainability Bonds,State-owned Enterprise
1,31-MAY-2021,ADHI02BCN2,OBLIGASI BERKELANJUTAN II ADHI KARYA TAHAP II ...,CORPORATE BOND,IDA0000984B5,"ADHI KARYA (PERSERO) Tbk, PT",ACTIVE,IDX,26-JUN-2019,IDR,...,0,0,0,0,0,0,0,473500000000,Sustainability Bonds,State-owned Enterprise
2,31-MAY-2021,ADHI02CN1,OBL BERKELANJUTAN II ADHI KARYA TAHAP I TAHUN ...,CORPORATE BOND,IDA000082002,"ADHI KARYA (PERSERO) Tbk, PT",ACTIVE,IDX,03-JUL-2017,IDR,...,0,0,0,0,0,0,0,2997000000000,Sustainability Bonds,State-owned Enterprise
3,31-MAY-2021,ADHI03CN1,OBLIGASI BERKELANJUTAN III ADHI KARYA TAHAP I ...,CORPORATE BOND,IDA000111900,"ADHI KARYA (PERSERO) Tbk, PT",ACTIVE,IDX,19-NOV-2020,IDR,...,0,0,0,0,0,0,0,289600000000,Sustainability Bonds,State-owned Enterprise
4,31-MAY-2021,ADMF03CCN4,OBL BKLJT III ADIRA FINANCE TAHAP IV TH 2016 SR C,CORPORATE BOND,IDA0000749C0,"ADIRA DINAMIKA MULTI FINANCE Tbk, PT",ACTIVE,IDX,27-JUL-2016,IDR,...,0,0,0,0,0,0,0,431000000000,Sustainability Bonds,Private Corporation


In [58]:
gss_bond_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29369 entries, 0 to 29368
Data columns (total 54 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Date                     29369 non-null  object 
 1   Code                     29369 non-null  object 
 2   Description              29283 non-null  object 
 3   Type_x                   29283 non-null  object 
 4   Isin Code                29283 non-null  object 
 5   Issuer                   29283 non-null  object 
 6   Status                   29283 non-null  object 
 7   Stock Exchange           29171 non-null  object 
 8   Listing Date             29171 non-null  object 
 9   Currency                 29283 non-null  object 
 10  Form                     29283 non-null  object 
 11  Eff. Date Isin           0 non-null      object 
 12  Maturity Date            29283 non-null  object 
 13  Expire Date              0 non-null      object 
 14  Exercise Price        

# 7. Export GSS Bonds data

In [98]:
gss_bond_data.to_csv('Indonesia_GSS_Bonds_Data.txt', sep='#',index=False)

In [97]:
gss_bond_data[gss_bond_data['Code'].str.contains('SWR')]

Unnamed: 0,Date,Code,Description,Type_x,Isin Code,Issuer,Status,Stock Exchange,Listing Date,Currency,...,Foreign IB,Foreign ID,Foreign MF,Foreign SC,Foreign FD,Foreign OT,Total_Foreign_Ownership,Total_Outstanding,GSS_Bond_Type,GSS_Bond_Issuer_Type
29285,31-MAY-2021,SWR001,,,,,,,,,...,0,0,0,0,0,0,0,14812000000,Social Bonds,Government
29288,30-JUN-2021,SWR001,,,,,,,,,...,0,0,0,0,0,0,0,14812000000,Social Bonds,Government
29289,30-JUN-2021,SWR002,,,,,,,,,...,0,0,0,0,0,0,0,8028000000,Social Bonds,Government
29292,30-JUL-2021,SWR001,,,,,,,,,...,0,0,0,0,0,0,0,14812000000,Social Bonds,Government
29293,30-JUL-2021,SWR002,,,,,,,,,...,0,0,0,0,0,0,0,8028000000,Social Bonds,Government
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29428,29-FEB-2024,SWR004,,,,,,,,,...,0,0,0,0,0,0,0,16220000000,Social Bonds,Government
29433,28-MAR-2024,SWR003,,,,,,,,,...,0,0,0,0,0,0,0,23744000000,Social Bonds,Government
29434,28-MAR-2024,SWR004,,,,,,,,,...,0,0,0,0,0,0,0,16220000000,Social Bonds,Government
29439,30-APR-2024,SWR003,,,,,,,,,...,0,0,0,0,0,0,0,23744000000,Social Bonds,Government


In [39]:
gss_bond_data.columns

Index(['Date', 'Code', 'Description', 'Type_x', 'Isin Code', 'Issuer',
       'Status', 'Stock Exchange', 'Listing Date', 'Currency', 'Form',
       'Eff. Date Isin', 'Maturity Date', 'Expire Date', 'Exercise Price',
       'Interest', 'Interest Type', 'Interest Freq', 'Nominal Value',
       'Num. of Sec', 'Originated Amt', 'Current Amt', 'Total Scripless',
       'Local (%)', 'Foreign (%)', 'Total (%)', 'Sector', 'Closing Price',
       'Type_y', 'Sec. Num', 'Price', 'Local IS', 'Local CP', 'Local PF',
       'Local IB', 'Local ID', 'Local MF', 'Local SC', 'Local FD', 'Local OT',
       'Total_Local_Ownership', 'Foreign IS', 'Foreign CP', 'Foreign PF',
       'Foreign IB', 'Foreign ID', 'Foreign MF', 'Foreign SC', 'Foreign FD',
       'Foreign OT', 'Total_Foreign_Ownership', 'Total_Outstanding',
       'GSS_Bond_Type', 'GSS_Bond_Issuer_Type'],
      dtype='object')

In [50]:
gss_bond_data.drop(['Type_y','Status', 'Nominal Value','Num. of Sec', 'Originated Amt', 'Current Amt', 'Total Scripless',
       'Local (%)', 'Foreign (%)', 'Total (%)','Eff. Date Isin','Maturity Date','Expire Date','Exercise Price','Description','Interest Type','Interest Freq','Stock Exchange','Currency','Form'], axis=1).to_csv('Indonesia_GSS_Bonds_Data_test.txt', sep='#',index=False)

# 8. Create dataset for GSS Bonds comparison with total bonds instrument in the market

In [35]:
bond_data.to_csv('Indonesia_Bonds_Data.txt', sep='#',index=False)

In [42]:
bond_data.drop(['Type_y','Status', 'Nominal Value','Num. of Sec', 'Originated Amt', 'Current Amt', 'Total Scripless',
       'Local (%)', 'Foreign (%)', 'Total (%)','Eff. Date Isin','Maturity Date','Expire Date','Exercise Price','Description','Interest Type','Interest Freq','Stock Exchange','Currency','Form'], axis=1).to_csv('Indonesia_Bonds_Data_test.txt', sep='#',index=False)

In [53]:
bond_data[bond_data['Code'].str.contains('SWR002')]

Unnamed: 0,Date,Code,Description,Type_x,Isin Code,Issuer,Status,Stock Exchange,Listing Date,Currency,...,Foreign CP,Foreign PF,Foreign IB,Foreign ID,Foreign MF,Foreign SC,Foreign FD,Foreign OT,Total_Foreign_Ownership,Total_Outstanding
462,31-MAY-2021,SWR002,SUKUK WAKAF SERI SWR002,SBSN,IDJ000018403,PEMERINTAH REPUBLIK INDONESIA,ACTIVE,,,IDR,...,,,,,,,,,,
43426,30-JUN-2021,SWR002,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8028000000.0
43438,30-JUL-2021,SWR002,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8028000000.0
43448,31-AUG-2021,SWR002,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8028000000.0
43459,30-SEP-2021,SWR002,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8028000000.0
43471,29-OCT-2021,SWR002,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8028000000.0
43483,30-NOV-2021,SWR002,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8028000000.0
43492,30-DEC-2021,SWR002,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8028000000.0
43502,31-JAN-2022,SWR002,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8028000000.0
43511,25-FEB-2022,SWR002,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8028000000.0
