In [1]:
import requests
from bs4 import BeautifulSoup
import json
from pdfminer.high_level import extract_text
from io import BytesIO
import pandas as pd

1.0 Obtain "Total Issued Shares"

In [2]:
def crawl_monthly_return(url,code):
    """
    Crawls the provided URL to extract total shares on the monthly return report

    Args:
        url (str): The base URL for the pdf to be crawled.
        code (str): The stock code for the PDF to be crawled. ("1787.HK")

    Returns:
        json: A json object containing the stock code and the total shares
    """    

    '''
    Part 1
    # packages used: pdfminer
    # Fetch the PDF from the web and extract text from the file
    '''
    response = requests.get(url)

    # Load the PDF into BytesIO
    pdf_file = BytesIO(response.content)

    # Extract text from PDF
    pdf_contents = extract_text(pdf_file)

    #store contents to list
    contents = list(filter(None, pdf_contents.split('\n')))
        
    '''
    Part 2
    Extracting Part II data from the PDF string 'contents'
    Extracted raw Part II data is stored in List (Filtered)
    '''
            
    Filtered = []
  
    # Function to Select Part II of the PDF
    isData = False   
    for x in contents:
        if 'Movements in Issued Shares' in x:
            isData = True
        if 'Details of Movements in Issued Shares' in x:
            isData = False
        if (isData == True):
            Filtered.append(x)

    print(Filtered)  # test line, prints the Part II text in (List) form

    '''
    Part 3
    Filter out the useful data from Part II text
    Extracted data stored in Dictionary (data)
    '''
  
    data = {}
    isHshares = 0    #parameter to check whether the stock is 港股
    sharesAmount = 0

    '''
    Loop through the List of texts in Part II of pdf, and then extract the total number of H shares.
    Extracted data MUST satisfy:
      1. stock type = 'H', or 'Not applicable'
      2. double check the stock code on the PDF, and the code is the same as input
    After finishing the data extraction, add a '0' to Stock code
      E.g. '1477.HK' -> '01477'
    '''
    for x in range(len(Filtered)):
        if (Filtered[x] == 'Type of shares'):
            if (Filtered[x+1] ==  'H' or Filtered[x+1] == 'Not applicable'):
                isHshares = 1
            else:
                isHshares = 0
        if (Filtered[x] == 'Stock code'):
            if (isHshares == 1 and Filtered[x+1] == ('0'+code).replace('.HK','')):
                isHshares = 1
            else:
                isHshares = 0

        if (Filtered[x] == 'Balance at close of the month' and isHshares == 1):
            data["stock_code"] = ('0'+code).replace('.HK','')
            sharesAmount += int(Filtered[x+1].replace(",", ""))

    # store data 
    data["total_issued_shares"] = sharesAmount   

    """ JSON Example: 
        {
            "stock_code": "01477",
            "total_issued_shares": 690903850
        }
    """
    return data

#data = crawl_monthly_return("https://www1.hkexnews.hk/listedco/listconews/sehk/2024/0102/2024010201366.pdf", "1787.HK")
#print(data["total_issued_shares"])

1.1 Obtain SDI data

In [3]:
# Fetch and extract data from the URLs in the SDI column
def crawl_sdi(url):
    """
    Crawls the provided URL to extract data on substantial shareholders and notices.

    Args:
        url (str): The base URL for the sdi page to be crawled.

    Returns:
        json: A json object containing extracted information, including a list of substantial
            shareholders and notices. 

    Raises:
        requests.exceptions.RequestException: If an error occurs during the request.
    """
    def fetch_form(urls, name_field):
        data_list = []
        for url in urls:
            if url:
                response = requests.get(url)
                response.raise_for_status()
                soup = BeautifulSoup(response.text, 'html.parser')
                
                # Find the data table
                data_table = soup.find('table', {'id': 'grdPaging'})
                if data_table:
                    for sub_row in data_table.find_all('tr')[1:]:  # Skip header
                        sub_cols = sub_row.find_all('td')
                        if len(sub_cols) >= 2:
                            name = sub_cols[1].get_text(strip=True)
                            url = base_url + sub_cols[0].find('a')['href'] if sub_cols[0].find('a') else None
                            
                            shares = None
                            sum_of_derivatives = 0
                            event_date = None
                            
                            if url:
                                url_response = requests.get(url)
                                url_response.raise_for_status()
                                url_soup = BeautifulSoup(url_response.text, 'html.parser')

                                date_span = url_soup.find('span', id='lblDEventDate')
                                event_date = date_span.get_text(strip=True).split('(')[0] if date_span else None

                                shares_table = url_soup.find('table', {'id': 'grdSh_AEvt'})
                                if shares_table:
                                    shares = []
                                    for row in shares_table.find_all('tr')[1:]:  # Skip header
                                        cols = row.find_all('td')
                                        total_number_of_shares = int(cols[1].get_text(strip=True).replace(',', ''))
                                        percentage = float(cols[2].get_text(strip=True))
                                        shares.append({"total_number_of_shares": total_number_of_shares, "percentage_figure": percentage})

                                derivatives_table = url_soup.find('table', {'id': 'grdDer_Dir'})
                                if derivatives_table:
                                    for row in derivatives_table.find_all('tr')[1:]:  # Skip header
                                        cols = row.find_all('td')
                                        derivative_str = cols[len(cols) - 1].get_text(strip=True).replace(',', '')
                                        if derivative_str.lstrip('-').isdigit():
                                            sum_of_derivatives += int(derivative_str)
                                
                            data_list.append({
                                name_field: name,
                                "date_of_relevant_event": event_date,
                                "long_position": shares,
                                "total_number_of_derivatives": sum_of_derivatives,
                            })
        return data_list

    # Base URL for the extracted links
    base_url = 'https://di.hkex.com.hk/di/'
    
    # Prepare lists for extracted information
    substantial_shareholders_urls, notices_urls = [], []
    substantial_shareholders_data, notices_data = [], []
    
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        table = soup.find('table', {'id': 'grdPaging'})

        if not table:
            print(f"No table found for {url}")
            return 

        for row in table.find_all('tr')[1:]:  # Skip header
            cols = row.find_all('td')
            if len(cols) < 3: continue

            stock_code = cols[0].get_text(strip=True)
            corporation_name = cols[1].get_text(strip=True)
            links = [a['href'] for a in cols[2].find_all('a')]

            substantial_shareholders_urls.append(base_url + links[1] if len(links) > 1 else None)
            notices_urls.append(base_url + links[5] if len(links) > 5 else None)

            # Fetch substantial shareholders data
            substantial_shareholders_data = fetch_form(substantial_shareholders_urls, "name_of_substantial_shareholder")
            # Fetch notices data
            notices_data = fetch_form(notices_urls, "name_of_noticed_shareholder")

            # Prepare the record
            record = {
                'stock_code': stock_code,
                'name_of_listed_corporation': corporation_name,
                'consolidated_list_of_substantial_shareholders': substantial_shareholders_data,
                'list_of_all_notices': notices_data,
            }

        print(f"Data extracted from {url}.")

    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
    
    print("Data extraction completed. JSON object generated.")

    """ JSON Example: 
        {
            "stock_code": "01477",
            "name_of_listed_corporation": "Ocumension Therapeutics - B",
            "consolidated_list_of_substantial_shareholders": [
                {
                    "name_of_substantial_shareholder": "6 Dimensions Capital GP, LLC",
                    "date_of_relevant_event": "21/12/2021",
                    "long_position": [
                    {
                        "total_number_of_shares": 126200000,
                        "percentage_figure": 18.92
                    }
                    ],
                    "total_number_of_derivatives": 0
                },
                {
                    "name_of_substantial_shareholder": "CHEN Ziqing",
                    "date_of_relevant_event": "21/12/2021",
                    "long_position": [
                    {
                        "total_number_of_shares": 126200000,
                        "percentage_figure": 18.92
                    }
                    ],
                    "total_number_of_derivatives": 0
                },
            ],
            "list_of_all_notices": [
                {
                    "name_of_noticed_shareholder": "Hu Zhaopeng",
                    "date_of_relevant_event": "13/12/2023",
                    "long_position": [
                    {
                        "total_number_of_shares": 4204658,
                        "percentage_figure": 0.6
                    }
                    ],
                    "total_number_of_derivatives": 564885
                },
                {
                    "name_of_noticed_shareholder": "Hu Zhaopeng",
                    "date_of_relevant_event": "11/12/2023",
                    "long_position": [
                    {
                        "total_number_of_shares": 4206585,
                        "percentage_figure": 0.6
                    }
                    ],
                    "total_number_of_derivatives": 564885
                }
            ]
        }  
    """  
    return record

#data = crawl_sdi("https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=1477&src=MAIN&lang=EN&g_lang=en")
#print(json.dumps(data, indent=4))

1.2 Obtain financial report's data (RAG)

In [4]:
# Output :
rag_data = {
    "stock_code": "01477",
     "consolidated_list_of_substantial_shareholders": [
        {
            "name_of_substantial_shareholder": "6 Dimensions Capital GP, LLC",
            "date_of_relevant_event": "21/12/2021",
            "long_position": [
                    {
                        "total_number_of_shares": 126200000,
                    }
            ],    
            "theresold": True
        }
    ]
}

1.3 Execute

In [5]:
import pandas as pd
from tqdm import tqdm
from datetime import datetime

# Load the CSV file
data = pd.read_csv('provided_data/faf_documents.csv')
required = pd.read_csv('provided_data/sample_submission.csv')
required_codes = required['ID'].to_numpy()
stock_codes = []
outputs = []

# Initialize tqdm for the progress bar
for index, row in tqdm(data.iterrows(), total=len(data), desc="Processing"):
    
    if not (row["RIC"] in required_codes):
        continue
    
    stock_codes.append(row["RIC"])
    
    # crawl data
    total_issued_shares = crawl_monthly_return(row["Monthly Return"], row["RIC"])["total_issued_shares"]
    sdi_data = crawl_sdi(row["SDI"])

    # calculate threshold
    threshold_shares = total_issued_shares * 0.05

    # initialize answer
    freefloat = total_issued_shares

    # Get substantial 
    for substantial_shareholder in sdi_data["consolidated_list_of_substantial_shareholders"]:
        shares = substantial_shareholder["long_position"][0]["total_number_of_shares"] - substantial_shareholder["total_number_of_derivatives"]
        if shares >= threshold_shares:
            freefloat -= shares

    # Get unique notices
    # Dictionary to store the most recent notice for each shareholder
    notices_dict = {} 
    for notice in sdi_data["list_of_all_notices"]:
        name = notice["name_of_noticed_shareholder"]
        if notice["date_of_relevant_event"] is None:
            continue
        date = datetime.strptime(notice["date_of_relevant_event"], "%d/%m/%Y")
        if name not in notices_dict or date > notices_dict[name]["date"]:
            notices_dict[name] = notice
            notices_dict[name]["date"] = date
            
    # Convert the dictionary back to a list
    unique_notices = list(notices_dict.values())
    for notice in unique_notices:
        shares = notice["long_position"][0]["total_number_of_shares"] - notice["total_number_of_derivatives"]
        if shares >= threshold_shares:
            freefloat -= shares

    freefloat /= total_issued_shares
    
    outputs.append(freefloat)

# Create a DataFrame with the collected data
result_df = pd.DataFrame({
    "ID": stock_codes,
    "outputs": outputs
})

# Save the DataFrame to a CSV file
result_df.to_csv('output.csv', index=False)


Processing:   0%|          | 0/51 [00:00<?, ?it/s]

['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '00002', 'Description', 'Not applicable', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '2,526,450,570', '0', '2,526,450,570', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  24%|██▎       | 12/51 [00:01<00:03, 10.62it/s]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=2&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['II. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'H', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '00038', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '391,940,000', '391,940,000', '2. Class of shares', 'Ordinary shares', 'Type of shares', 'A', 'Listed on SEHK (Note 1)', 'No', 'Stock code', '601038', 'Description', 'A shares listed on SSE', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '731,705,275', '731,705,275', 'Page 2 of 6', 'v', '1.0.2']
Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=38&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\

Processing:  27%|██▋       | 14/51 [00:02<00:09,  3.98it/s]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=69&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '00136', 'Description', 'Balance at close of preceding month', '11,585,897,545', 'Increase / decrease (-)', 'Balance at close of the month', '11,585,897,545', 'FF301', 'Page 2 of 8', 'v', '1.0.2']


Processing:  29%|██▉       | 15/51 [00:05<00:19,  1.83it/s]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=136&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '00271', 'Description', 'Balance at close of preceding month', '1,240,668,945', 'Increase / decrease (-)', 'Balance at close of the month', '1,240,668,945', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  31%|███▏      | 16/51 [00:07<00:23,  1.47it/s]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=271&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '00933', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '9,722,276,727', '0', '9,722,276,727', 'FF301', 'Page 2 of 7', 'v', '1.0.2']


Processing:  33%|███▎      | 17/51 [00:10<00:36,  1.08s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=933&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['II. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '00981', 'Description', 'Hong Kong Shares', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '5,972,933,178', '13,410', '5,972,946,588', '2. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'No', 'Stock code', '688981', 'Description', 'The common shares of the Company listed on the Science and Technology Innovation Board of the ', 'Shanghai Stock Exchange, which are subscribed in RMB by investors in the PRC.', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '1,973,609,172', '0', '1,973,609,172

Processing:  35%|███▌      | 18/51 [00:13<00:53,  1.62s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=981&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '00990', 'Description', 'Balance at close of preceding month', '13,471,344,631', 'Increase / decrease (-)', 'Balance at close of the month', '13,471,344,631', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  37%|███▋      | 19/51 [00:14<00:44,  1.40s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=990&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['II. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'H', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01055', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '4,643,997,308', '0', '4,643,997,308', '2. Class of shares', 'Ordinary shares', 'Type of shares', 'A', 'Listed on SEHK (Note 1)', 'No', 'Stock code', '600029', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '13,476,909,012', '486', '13,476,909,498', 'Page 2 of 7', 'v', '1.0.2']


Processing:  39%|███▉      | 20/51 [00:16<00:44,  1.45s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=1055&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01182', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '349,519,567', '349,519,567', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  41%|████      | 21/51 [00:16<00:38,  1.29s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/10/2023&sced=31/12/2023&sc=1182&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01310', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '1,311,599,356', '0', '1,311,599,356', 'FF301', 'Page 2 of 7', 'v', '1.0.2']


Processing:  43%|████▎     | 22/51 [00:18<00:41,  1.43s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/09/2023&sced=31/12/2023&sc=1310&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['II. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01347', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '1,308,804,360', '34,334', '1,308,838,694', '2. Class of shares', 'Ordinary shares', 'Type of shares', 'A', 'Listed on SEHK (Note 1)', 'No', 'Stock code', '688347', 'Description', 'Shares listed on the STAR Market of the Shanghai Stock Exchange', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '407,750,000', '407,750,000', 'Page 2 of 6', 'v', '1.0.2']


Processing:  45%|████▌     | 23/51 [00:20<00:44,  1.59s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=1347&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['II. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'H', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01456', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '442,640,000', '442,640,000', '2. Class of shares', 'Ordinary shares', 'Type of shares', 'A', 'Listed on SEHK (Note 1)', 'No', 'Stock code', '601456', 'Description', 'Shanghai Stock Exchange', 'Balance at close of preceding month', '2,389,133,168', 'Increase / decrease (-)', 'Balance at close of the month', '2,389,133,168', 'Page 2 of 6', 'v', '1.0.2']


Processing:  47%|████▋     | 24/51 [00:22<00:42,  1.58s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=1456&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01681', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '811,016,860', '0', '811,016,860', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  49%|████▉     | 25/51 [00:23<00:38,  1.49s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=1681&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01833', 'Description', 'Balance at close of preceding month', '1,118,812,900', 'Increase / decrease (-)', 'Balance at close of the month', '1,118,812,900', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  51%|█████     | 26/51 [00:26<00:44,  1.80s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=1833&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01836', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '795,079,500', '120,500', '795,200,000', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  53%|█████▎    | 27/51 [00:27<00:41,  1.72s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=1836&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['II. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'H', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01880', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '116,383,500', '0', '116,383,500', '2. Class of shares', 'Ordinary shares', 'Type of shares', 'A', 'Listed on SEHK (Note 1)', 'No', 'Stock code', '601888', 'Description', 'Listed on the main board of the Shanghai Stock Exchange', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '1,952,475,544', '0', '1,952,475,544', 'Page 2 of 6', 'v', '1.0.2']


Processing:  55%|█████▍    | 28/51 [00:45<02:28,  6.47s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=1880&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01909', 'Description', 'Ordinary Shares', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '3,840,000,000', '0', '3,840,000,000', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  57%|█████▋    | 29/51 [00:49<02:03,  5.62s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=1909&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01929', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '10,000,000,000', '-12,263,200', '9,987,736,800', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  59%|█████▉    | 30/51 [00:49<01:28,  4.22s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/10/2023&sced=31/12/2023&sc=1929&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['II. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'H', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '01988', 'Description', 'H Shares', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '8,320,295,289', '0', '8,320,295,289', '2. Class of shares', 'Ordinary shares', 'Type of shares', 'A', 'Listed on SEHK (Note 1)', 'No', 'Stock code', '600016', 'Description', 'A Shares', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '35,462,123,213', '0', '35,462,123,213', '3. Class of shares', 'Preference shares', 'Type of shares', 'Other type(specify in description)', 'Listed on SEHK (Note 1)', 'No', 'Stock code', '360037', 'Description', 'Domestic Preference Shares',

Processing:  61%|██████    | 31/51 [00:53<01:20,  4.02s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=1988&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '02013', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '2,794,594,990', '0', '2,794,594,990', 'FF301', 'Page 2 of 7', 'v', '1.0.2']


Processing:  63%|██████▎   | 32/51 [00:59<01:29,  4.71s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=2013&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'FF301', 'Stock code', '02020', 'Description', 'Multi-counter stock code', '82020', 'RMB', 'Description', 'N/A', 'N/A', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '2,832,623,500', '0', '2,832,623,500', 'Page 2 of 7', 'v', '1.0.2']


Processing:  65%|██████▍   | 33/51 [01:00<01:04,  3.59s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=2020&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['II. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'H', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '02145', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '197,343,760', '0', '197,343,760', '2. Class of shares', 'Ordinary shares', 'Type of shares', 'Other type(specify in description)', 'Listed on SEHK (Note 1)', 'No', 'Stock code', 'N/A', 'Description', 'Domestic Shares', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '200,614,140', '0', '200,614,140', 'Page 3 of 7', 'v', '1.0.2']


Processing:  67%|██████▋   | 34/51 [01:02<00:53,  3.12s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=2145&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '02162', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '279,735,566', '279,735,566', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  69%|██████▊   | 35/51 [01:03<00:40,  2.51s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=2162&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '02273', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '246,589,092', '0', '246,589,092', 'FF301', 'Page 2 of 7', 'v', '1.0.2']


Processing:  71%|███████   | 36/51 [01:05<00:32,  2.14s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=2273&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '02362', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '12,502,082,051', '0', '12,502,082,051', 'FF301', 'Page 2 of 8', 'v', '1.0.2']


Processing:  73%|███████▎  | 37/51 [01:05<00:24,  1.73s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=2362&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '02416', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '574,260,070', '1,659,390', '575,919,460', 'FF301', 'Page 2 of 7', 'v', '1.0.2']


Processing:  75%|███████▍  | 38/51 [01:08<00:23,  1.84s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=2416&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '02469', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '2,253,657,730', '27,723,100', '2,281,380,830', 'FF301', 'Page 2 of 7', 'v', '1.0.2']


Processing:  76%|███████▋  | 39/51 [01:16<00:44,  3.71s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=2469&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '03383', 'Description', 'N/A', 'Balance at close of preceding month', '5,046,047,500', 'Increase / decrease (-)', 'Balance at close of the month', '5,046,047,500', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  78%|███████▊  | 40/51 [01:25<00:58,  5.29s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=3383&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '03668', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '1,320,439,437', '0', '1,320,439,437', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  80%|████████  | 41/51 [01:28<00:46,  4.66s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=3668&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '03888', 'Description', 'Ordinary Shares', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '1,365,543,317', '0', '1,365,543,317', 'FF301', 'Page 2 of 8', 'v', '1.0.2']


Processing:  82%|████████▏ | 42/51 [01:29<00:31,  3.53s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=3888&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '03900', 'Description', 'Balance at close of preceding month', '2,531,998,690', 'Increase / decrease (-)', 'Balance at close of the month', '2,531,998,690', 'FF301', 'Page 2 of 7', 'v', '1.0.2']


Processing:  84%|████████▍ | 43/51 [01:32<00:27,  3.46s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=3900&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '03918', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '4,422,990,160', '0', '4,422,990,160', 'FF301', 'Page 2 of 8', 'v', '1.0.2']


Processing:  86%|████████▋ | 44/51 [01:35<00:23,  3.39s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=3918&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['II. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'A', 'Listed on SEHK (Note 1)', 'No', 'Stock code', '600958', 'Description', 'Shanghai Stock Exchange', 'Balance at close of preceding month', '7,469,482,864', 'Increase / decrease (-)', 'Balance at close of the month', '7,469,482,864', '2. Class of shares', 'Ordinary shares', 'Type of shares', 'H', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '03958', 'Description', 'H Shares', 'Balance at close of preceding month', '1,027,162,428', 'Increase / decrease (-)', 'Balance at close of the month', '1,027,162,428', 'Page 2 of 6', 'v', '1.0.2']


Processing:  88%|████████▊ | 45/51 [01:37<00:16,  2.81s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=3958&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '06889', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '704,867,696', '704,867,696', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  90%|█████████ | 46/51 [01:39<00:12,  2.55s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/10/2023&sced=31/12/2023&sc=6889&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '09636', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '466,087,000', '0', '466,087,000', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  92%|█████████▏| 47/51 [01:40<00:08,  2.20s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=9636&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '09858', 'Description', 'Balance at close of preceding month', '3,795,404,000', 'Increase / decrease (-)', 'Balance at close of the month', '3,795,404,000', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  94%|█████████▍| 48/51 [01:43<00:06,  2.30s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=9858&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '09860', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '727,354,791', '727,354,791', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing:  96%|█████████▌| 49/51 [01:46<00:05,  2.61s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=9860&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '09923', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '445,992,842', '0', '445,992,842', 'FF301', 'Page 2 of 9', 'v', '1.0.2']


Processing:  98%|█████████▊| 50/51 [01:52<00:03,  3.77s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=9923&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.
['\x0cII. Movements in Issued Shares', '1. Class of shares', 'Ordinary shares', 'Type of shares', 'Not applicable', 'Listed on SEHK (Note 1)', 'Yes', 'Stock code', '09930', 'Description', 'Balance at close of preceding month', 'Increase / decrease (-)', 'Balance at close of the month', '3,197,244,000', '0', '3,197,244,000', 'FF301', 'Page 2 of 6', 'v', '1.0.2']


Processing: 100%|██████████| 51/51 [01:53<00:00,  2.23s/it]

Data extracted from https://di.hkex.com.hk/di/NSSrchCorpList.aspx?sa1=cl&scsd=01/07/2023&sced=31/12/2023&sc=9930&src=MAIN&lang=EN&g_lang=en.
Data extraction completed. JSON object generated.





2.0 Data Washing

In [6]:
# 1. sdi_take the newest one
# 2. take union

2.1 

In [7]:
#for rag_holder in rag_data["consolidated_list_of_substantial_shareholders"]:
    # 1. Check if SDI data exist
       # If Exist, then get the newer one by comparing the event data
       # If not, just use the rag one
       
    # 2. Check whether it overshot the thersold or not
        # If percentage figure >= 5%, it is non freefloat
        # Else reject the data
    # 3. Output the washed JSON object
    

3.0 Calculation

In [8]:
# Total issued shares - non-free-float shares = answer