# Imports

In [1]:
from utils import ixbrl_frs_102_2024
from bs4 import BeautifulSoup
import json

# Functions

In [2]:
def retrieve_all_ix_financial_data_minus(xhtml_content_path):
    """
    Retrieve financial values from the XHTML content, including detecting negative values and adjusting for decimals.
    
    Args:
        xhtml_content_path (str): The path to the XHTML file.
    
    Returns:
        dict: A dictionary containing the financial values.
    """

    # Open the XHTML file in read mode with UTF-8 encoding
    with open(xhtml_content_path, 'r', encoding='utf-8') as file:
        # Read the entire content of the file into a string variable
        xhtml_content = file.read()

    # Parse the XHTML content using BeautifulSoup with 'html.parser' to create a soup object
    soup = BeautifulSoup(xhtml_content, 'html.parser')
    
    # Initialize an empty dictionary to store the financial values
    dict_name = {}

    # Iterate over each label in the ixbrl_labels_2024_full list
    for label in ixbrl_frs_102_2024:
        
        # Check if the label is not empty or None
        if label:
            print(f'label: {label}')
            # Select all elements in the soup that have a 'name' attribute ending with the current label
            # elements = soup.select(f'[name$="{label}"]')
            elements = soup.select(f'[name$=":{label}"]')
            # Initialize an empty list to temporarily store the values for the current label
            temp_list = []
            # Iterate over each element found
            for item in elements:
                print(f'item: {item}')
                # Extract and strip the text content of the element
                value = item.text.strip()
                # Check if the element has a 'sign' attribute and if its value is '-'
                if item.has_attr('sign') and item['sign'] == '-':
                    # Prepend a '-' to the value to indicate it is negative
                    value = '-' + value
                
                # Check if the element has a 'decimals' attribute and if its value is '-3'
                if item.has_attr('decimals') and item['decimals'] == '-3':
                    # Convert the value to an integer (removing commas), multiply by 1000, and format with commas
                    # This is because '-3' means they simplify the reporting and values need to be multiplied by 1000
                    value = '{:,}'.format(int(value.replace(',', '')) * 1000)
                
                # Append the processed value to the temporary list
                temp_list.append(value)
                print(f'temp list: {temp_list}')

            # Assign the temporary list to the dictionary with the current label as the key
            dict_name[label] = temp_list

    # Return the dictionary containing all the financial values
    return dict_name

## Micro, Small or Medium selecting criteria

Data obtaine from:   
https://www.gov.uk/government/publications/life-of-a-company-annual-requirements/life-of-a-company-part-1-accounts

Micro companies
- a turnover of £632,000 or less
- £316,000 or less on its balance sheet
- 10 employees or less


Qualifying as a small company   
For accounting periods beginning on or after 1 January 2016, a small company must meet at least 2 of the following conditions:   

- annual turnover must be not more than £10.2 million
- the balance sheet total must be not more than £5.1 million
- the average number of employees must be not more than 50

Qualifying as a medium-sized company
To be a medium-sized company, you must meet at least 2 of the following conditions:

- the annual turnover must be no more than £36 million
- the balance sheet total must be no more than £18 million
- the average number of employees must be no more than 250


In [4]:
def company_size(ixbrl_data):
    """
    Determine the size of a company based on the UK Company House criteria.

    Args:
        ixbrl_data (dict): A dictionary containing the financial values extracted from the XHTML content.

    Returns:
        str: The size of the company ('micro', 'small', 'medium', or 'large').
    """
    # Define the criteria for different company sizes
    micro_criteria = {
        'turnover': 632000,
        'balance_sheet': 316000,
        'employees': 10
    }
    small_criteria = {
        'turnover': 10200000,
        'balance_sheet': 5100000,
        'employees': 50
    }
    medium_criteria = {
        'turnover': 36000000,
        'balance_sheet': 18000000,
        'employees': 250
    }

    # Extract the required data from the ixbrl_data dictionary
    turnover = ixbrl_data.get('TurnoverRevenue', [])
    fixed_assets = ixbrl_data.get('FixedAssets', [])
    current_assets = ixbrl_data.get('CurrentAssets', [])
    employees = ixbrl_data.get('AverageNumberEmployeesDuringPeriod', [])

    # Check if any of the ixbrl labels values list is empty and display a message
    if not turnover:
        print("TurnoverRevenue list is empty.")
    if not fixed_assets:
        print("FixedAssets list is empty.")
    if not current_assets:
        print("CurrentAssets list is empty.")
    if not employees:
        print("AverageNumberEmployeesDuringPeriod list is empty.")

    # Convert string values to integers after removing commas
    turnover = [int(value.replace(',', '')) for value in turnover]
    current_assets = [int(value.replace(',', '')) for value in current_assets]
    employees = [int(value.replace(',', '')) for value in employees]

    # Calculate fixed assets if it's empty
    if not fixed_assets:
        fixed_assets_components = [
            'IntangibleAssets',
            'PropertyPlantEquipment',
            'InvestmentsFixedAssets',
            'InvestmentsInGroupUndertakings',
            'LoansToGroupUndertakings',
            'InvestmentsInAssociatesJointVenturesParticipatingInterests',
            'LoansToAssociatesJointVenturesParticipatingInterests',
            'OtherInvestmentsOtherThanLoans',
            'OtherLoansClassifiedUnderInvestments',
            'OwnShares',
            'InvestmentProperty',
            'BiologicalAssetsNon-current'
        ]
        
        fixed_assets = [0, 0]  # Initialize with two zeros for the last two years
        for component in fixed_assets_components:
            component_values = ixbrl_data.get(component, [])
            if component_values:
                for value in component_values[:2]:  # Use only the first two items that generaly represent the total sum of the subcomponents
                    try:
                        int_value = int(value.replace(',', ''))
                        fixed_assets[component_values.index(value)] += int_value
                    except ValueError:
                        print(f"ValueError: invalid literal for int() with base 10: '{value}' in label '{component}'")
        print("Fixed Assets value was missing and calculated using components.")
    else:
        fixed_assets = [int(value.replace(',', '')) for value in fixed_assets[:2]]  # Use only the first two items

    # Calculate the total balance sheet value for the latest and previous year
    balance_sheet_latest = sum(fixed_assets[-2:-1]) + sum(current_assets[-2:-1])
    balance_sheet_previous = sum(fixed_assets[-1:]) + sum(current_assets[-1:])
    balance_sheet = [balance_sheet_latest, balance_sheet_previous]

    # Convert values to strings with commas for human readability
    turnover = [f"{value:,}" for value in turnover]
    fixed_assets = [f"{value:,}" for value in fixed_assets]
    current_assets = [f"{value:,}" for value in current_assets]
    employees = [f"{value:,}" for value in employees]
    balance_sheet = [f"{value:,}" for value in balance_sheet]

    # Print values in the same column
    print('-'*70)
    print(f"{'Turnover Revenue:':<35} {turnover}")
    print(f"{'Fixed Assets:':<35} {fixed_assets}")
    print(f"{'Current Assets:':<35} {current_assets}")
    print(f"{'Average Number Employees | Period:':<35} {employees}")
    print(f"{'Balance Sheet Total:':<35} {balance_sheet}")
    print('-'*70)

    # Determine the size of the company based on the criteria
    # Check if the company meets at least 2 out of 3 conditions for micro size
    micro_conditions_met = (
        (len(turnover) > 1 and int(turnover[-2].replace(',', '')) <= micro_criteria['turnover']) +
        (int(balance_sheet[-2].replace(',', '')) <= micro_criteria['balance_sheet']) +
        (len(employees) > 1 and int(employees[-2].replace(',', '')) <= micro_criteria['employees'])
    )
    if micro_conditions_met >= 2:
        print(f"Size of Entity: Micro")
        return 'micro'

    # Check if the company meets at least 2 out of 3 conditions for small size
    small_conditions_met = (
        (len(turnover) > 1 and int(turnover[-2].replace(',', '')) <= small_criteria['turnover']) +
        (int(balance_sheet[-2].replace(',', '')) <= small_criteria['balance_sheet']) +
        (len(employees) > 1 and int(employees[-2].replace(',', '')) <= small_criteria['employees'])
    )
    if small_conditions_met >= 2:
        print(f"Size of Entity: Small")
        return 'small'

    # Check if the company meets at least 2 out of 3 conditions for medium size
    medium_conditions_met = (
        (len(turnover) > 1 and int(turnover[-2].replace(',', '')) <= medium_criteria['turnover']) +
        (int(balance_sheet[-2].replace(',', '')) <= medium_criteria['balance_sheet']) +
        (len(employees) > 1 and int(employees[-2].replace(',', '')) <= medium_criteria['employees'])
    )
    if medium_conditions_met >= 2:
        print(f"Size of Entity: Medium")
        return 'medium'

    # If none of the above conditions are met, the company is classified as large
    print(f"Size of Entity: Large")
    return 'large'


# Load an xhtml file and retrieve ixbrl labels

In [5]:
# Path to the local XHTML file
file_path = 'xhtml/00981987_AA_2023-06-29_CHARTERHOUSE HOLDINGS PLC.xhtml'

# Pass the content to the function
ixbrl_data = retrieve_all_ix_financial_data_minus(file_path)

# Calculate Size

In [6]:
size = company_size(ixbrl_data)


FixedAssets list is empty.
Fixed Assets value was missing and calculated using components.
----------------------------------------------------------------------
Turnover Revenue:                   ['31,716,000', '29,644,000']
Fixed Assets:                       ['7,839,000', '8,340,000']
Current Assets:                     ['25,245,000', '23,684,000']
Average Number Employees | Period:  ['94', '92']
Balance Sheet Total:                ['33,084,000', '32,024,000']
----------------------------------------------------------------------
Size of Entity: Medium
