In [5]:
import docx
import csv
import os
import json
import requests
from print_shareholder_info import print_shareholder_info
from dotenv import load_dotenv

In [21]:
load_dotenv()

document = docx.Document()

API_KEY = os.environ.get("COMPANY_HOUSE_API_KEY")
OFFICER_NAME = os.environ.get("OFFICER_NAME")
OFFICER_DOB = os.environ.get("OFFICER_DOB")

URL = f'https://api.company-information.service.gov.uk/search/officers?q="{OFFICER_NAME}"'

headers = {"Authorization": API_KEY}

headers

{'Authorization': 'eb613ae5-33b1-4aaf-8f23-18ad86769709'}

In [9]:
# Add a title to the document
document.add_heading(f"Associated companies for: {OFFICER_NAME}", level=1)
current_heading = document.add_heading("Current appointments", level=1)
current_end = document.add_paragraph("-")
former_heading = document.add_heading("Former appointments", level=1)
former_end = document.add_paragraph("--")

former_end

<docx.text.paragraph.Paragraph at 0x215361157d0>

In [15]:
# Load the SIC codes and descriptions from the CSV file
with open(
    "./SIC07_CH_condensed_list_en.csv", 
newline="",
) as csvfile:
    sic_codes_reader = csv.reader(csvfile, delimiter=",", quotechar='"')
    sic_mapping = {row[0]: row[1] for row in sic_codes_reader}

sic_mapping

{'SIC Code': 'Description',
 '01110': 'Growing of cereals (except rice), leguminous crops and oil seeds',
 '01120': 'Growing of rice',
 '01130': 'Growing of vegetables and melons, roots and tubers',
 '01140': 'Growing of sugar cane',
 '01150': 'Growing of tobacco',
 '01160': 'Growing of fibre crops',
 '01190': 'Growing of other non-perennial crops',
 '01210': 'Growing of grapes',
 '01220': 'Growing of tropical and subtropical fruits',
 '01230': 'Growing of citrus fruits',
 '01240': 'Growing of pome fruits and stone fruits',
 '01250': 'Growing of other tree and bush fruits and nuts',
 '01260': 'Growing of oleaginous fruits',
 '01270': 'Growing of beverage crops',
 '01280': 'Growing of spices, aromatic, drug and pharmaceutical crops',
 '01290': 'Growing of other perennial crops',
 '01300': 'Plant propagation',
 '01410': 'Raising of dairy cattle',
 '01420': 'Raising of other cattle and buffaloes',
 '01430': 'Raising of horses and other equines',
 '01440': 'Raising of camels and camelids',

In [22]:
# Make the API request
response = requests.get(URL, headers=headers)
DATA = response.json()

DATA

{'items': [{'appointment_count': 1,
   'matches': {'title': [1, 5, 7, 12, 14, 21], 'snippet': []},
   'snippet': '',
   'address': {'premises': 'One Microsoft Way',
    'locality': 'Wa 98052',
    'country': 'United States',
    'address_line_1': 'Redmond'},
   'address_snippet': 'One Microsoft Way, Redmond, Wa 98052, United States',
   'date_of_birth': {'year': 1963, 'month': 1},
   'links': {'self': '/officers/mI2cWvy4QIMWxs3EvB7Es6FrXGc/appointments'},
   'description_identifiers': ['appointment-count', 'born-on'],
   'description': 'Total number of appointments 1 - Born January 1963',
   'title': 'Keith Ranger DOLLIVER',
   'kind': 'searchresults#officer'},
  {'appointment_count': 1,
   'snippet': '',
   'matches': {'snippet': [], 'title': [1, 5, 7, 12, 14, 21]},
   'address': {'locality': 'Washington 98006',
    'address_line_1': '5580 179th Avenue Se',
    'address_line_2': 'Bellevue',
    'region': 'Usa'},
   'address_snippet': '5580 179th Avenue Se, Bellevue, Washington 98006, 

In [23]:
# only keep officers with exact name match
exact_name_matches = []
for item in DATA["items"]:
    if item["title"].lower() == OFFICER_NAME.lower():
        exact_name_matches.append(item)

exact_name_dob_matches = []
for item in DATA["items"]:
    if (
        item["title"].lower() == OFFICER_NAME.lower()
        and isinstance(item.get("date_of_birth"), dict)
        and str(item["date_of_birth"]["year"])
        + "-"
        + str(item["date_of_birth"]["month"]).zfill(2)
        == OFFICER_DOB[:7]
    ):
        exact_name_dob_matches.append(item)

exact_name_dob_matches

[{'appointment_count': 1,
  'matches': {'title': [1, 5, 7, 12, 14, 21], 'snippet': []},
  'snippet': '',
  'address': {'premises': 'One Microsoft Way',
   'locality': 'Wa 98052',
   'country': 'United States',
   'address_line_1': 'Redmond'},
  'address_snippet': 'One Microsoft Way, Redmond, Wa 98052, United States',
  'date_of_birth': {'year': 1963, 'month': 1},
  'links': {'self': '/officers/mI2cWvy4QIMWxs3EvB7Es6FrXGc/appointments'},
  'description_identifiers': ['appointment-count', 'born-on'],
  'description': 'Total number of appointments 1 - Born January 1963',
  'title': 'Keith Ranger DOLLIVER',
  'kind': 'searchresults#officer'},
 {'appointment_count': 1,
  'snippet': '',
  'matches': {'snippet': [], 'title': [1, 5, 7, 12, 14, 21]},
  'address': {'locality': 'Washington 98006',
   'address_line_1': '5580 179th Avenue Se',
   'address_line_2': 'Bellevue',
   'region': 'Usa'},
  'address_snippet': '5580 179th Avenue Se, Bellevue, Washington 98006, Usa',
  'links': {'self': '/off

In [29]:
new_matches = json.dumps(exact_name_dob_matches)

new_matches

'[{"appointment_count": 1, "matches": {"title": [1, 5, 7, 12, 14, 21], "snippet": []}, "snippet": "", "address": {"premises": "One Microsoft Way", "locality": "Wa 98052", "country": "United States", "address_line_1": "Redmond"}, "address_snippet": "One Microsoft Way, Redmond, Wa 98052, United States", "date_of_birth": {"year": 1963, "month": 1}, "links": {"self": "/officers/mI2cWvy4QIMWxs3EvB7Es6FrXGc/appointments"}, "description_identifiers": ["appointment-count", "born-on"], "description": "Total number of appointments 1 - Born January 1963", "title": "Keith Ranger DOLLIVER", "kind": "searchresults#officer"}, {"appointment_count": 1, "snippet": "", "matches": {"snippet": [], "title": [1, 5, 7, 12, 14, 21]}, "address": {"locality": "Washington 98006", "address_line_1": "5580 179th Avenue Se", "address_line_2": "Bellevue", "region": "Usa"}, "address_snippet": "5580 179th Avenue Se, Bellevue, Washington 98006, Usa", "links": {"self": "/officers/hgf_hGSfHEbv9YaGnc7D8ZW6Ojo/appointments"}

In [56]:
for match in exact_name_dob_matches:
    print(json.dumps(match))

{"appointment_count": 1, "matches": {"title": [1, 5, 7, 12, 14, 21], "snippet": []}, "snippet": "", "address": {"premises": "One Microsoft Way", "locality": "Wa 98052", "country": "United States", "address_line_1": "Redmond"}, "address_snippet": "One Microsoft Way, Redmond, Wa 98052, United States", "date_of_birth": {"year": 1963, "month": 1}, "links": {"self": "/officers/mI2cWvy4QIMWxs3EvB7Es6FrXGc/appointments"}, "description_identifiers": ["appointment-count", "born-on"], "description": "Total number of appointments 1 - Born January 1963", "title": "Keith Ranger DOLLIVER", "kind": "searchresults#officer"}
{"appointment_count": 1, "snippet": "", "matches": {"snippet": [], "title": [1, 5, 7, 12, 14, 21]}, "address": {"locality": "Washington 98006", "address_line_1": "5580 179th Avenue Se", "address_line_2": "Bellevue", "region": "Usa"}, "address_snippet": "5580 179th Avenue Se, Bellevue, Washington 98006, Usa", "links": {"self": "/officers/hgf_hGSfHEbv9YaGnc7D8ZW6Ojo/appointments"}, "

In [75]:
officer_data_cache = []  # Store data for all matches

for match in exact_name_dob_matches:
    match_data = []  # Store data for the current match
    page_no = 1
    items_per_page = 50
    start_index = 0
    remaining_results = 1
    total_results = -1
    multiple_pages = False

    link = match['links']['self']

    while remaining_results > 0:
        officer_url = f"https://api.company-information.service.gov.uk{link}?page={page_no}&items_per_page={items_per_page}&start_index={start_index}"
        officer_response = requests.get(officer_url, headers=headers)
        current_data = officer_response.json()  # gets the current data
        match_data.append(current_data)  # adds it to the match data cache

        with open(
                f"{OFFICER_NAME} page {page_no}.json", "w", encoding="utf-8"
        ) as json_file:  # open a files to dump the data named by the page
            json.dump(current_data, json_file, ensure_ascii=False, indent=4)

        if total_results < 0:  # if you haven't checked the total results
            total_results = current_data["total_results"]  # fetch the number
            remaining_results = total_results  # set the remaining results to total
            multiple_pages = total_results > items_per_page

        remaining_results -= items_per_page
        start_index += items_per_page

        if remaining_results < items_per_page:
            items_per_page = remaining_results

        page_no += 1

    officer_data_cache.append(match_data)
    print(officer_data_cache)
    
    if multiple_pages:
        for current_page in range(1, page_no, 1):
            with open(
                    f"{OFFICER_NAME} page {current_page}.json"
            ) as json_file:
                for line in json_file:
                    converted_data = json.dumps(line)
                    if 'items' in converted_data:
                        officer_data_cache[converted_data['items']].update(
                            converted_data
                        )  # asuming both file has same ids otherwise use try catch

        officer_data = list(officer_data.values())
    else:
        with open(f"{OFFICER_NAME} page 1.json") as json_file:
            officer_data = json.load(json_file)

        # Loop through the officer's appointments and print the company name, number, and nature of business
        for current_page in range(0, page_no, 1):
            for appointment in officer_data["items"]:
                company_name = appointment["appointed_to"]["company_name"]
                company_number = appointment["appointed_to"]["company_number"]
                appointed_on = appointment["appointed_on"]
                officer_role = appointment["officer_role"]
                company_status = appointment["appointed_to"]["company_status"]
                psc_name = ""
                company_title = f"{company_name} ({company_number})"

                # Get the company profile URL
                company_profile_url = f"https://api.company-information.service.gov.uk/company/{company_number}"
                company_profile_response = requests.get(
                    company_profile_url, headers=headers
                )
                company_profile_data = company_profile_response.json()

                # Get the SIC code and look up the activity in the mapping dictionary
                sic_code = company_profile_data.get("sic_codes", ["N/A"])[0]
                activity = sic_mapping.get(sic_code, "Unknown")

                # Get company status, incorporation date, dissolution date

                company_inc = company_profile_data.get("date_of_creation")
                if "date_of_cessation" in company_profile_data:
                    company_dis = company_profile_data["date_of_cessation"]
                else:
                    company_dis = None

                # Get the company's persons with significant control data
                company_psc_url = f"https://api.company-information.service.gov.uk/company/{company_number}/persons-with-significant-control"
                company_psc_response = requests.get(company_psc_url, headers=headers)
                company_psc_data = company_psc_response.json()

                # print(json.dumps(company_psc_data, indent=4))

                # Loop through the company's persons with significant control and print the name
                psc_names = []
                if "items" in company_psc_data and company_psc_data["items"]:
                    for psc in company_psc_data["items"]:
                        if "name" in psc:
                            psc_name = psc["name"]
                            psc_names.append(psc_name)
                else:
                    print("No PSC data available for this company.")

                if "resigned_on" in appointment:
                    resigned_on = appointment["resigned_on"]
                    if len(psc_names) == 1:
                        for psc_name in psc_names:
                            psc_statement = f"The company has a person with significant control named {psc_name}."
                        new_paragraph = former_end.insert_paragraph_before(
                            f"{company_name} ({company_number}) \n{OFFICER_NAME} was appointed {officer_role} of {company_name} ({company_number}) on {appointed_on} and resigned on {resigned_on}. The nature of business is {activity}. {psc_statement}"
                        )

                    elif len(psc_names) > 1:
                        last_name = psc_names.pop()
                        full_list = ", ".join(psc_names)
                        psc_statement = f"The company has the following persons with significant control: {full_list} and {last_name}."
                        new_paragraph = former_end.insert_paragraph_before(
                            f"{company_name} ({company_number}) \n{OFFICER_NAME} was appointed {officer_role} of {company_name} ({company_number}) on {appointed_on} and resigned on {resigned_on}. The nature of business is {activity}. {psc_statement}"
                        )

                    else:
                        psc_statement = (
                            "The company has no persons with significant control"
                        )
                        new_paragraph = former_end.insert_paragraph_before(
                            f"{company_name} ({company_number}) \n{OFFICER_NAME} was appointed {officer_role} of {company_name} on {appointed_on} and resigned on {resigned_on}. The nature of business is {activity}. {psc_statement}"
                        )

                else:
                    if len(psc_names) == 1:
                        for psc_name in psc_names:
                            psc_statement = f"The company has a person with significant control named {psc_name}."
                            if "active" in company_status:
                                new_paragraph = current_end.insert_paragraph_before(
                                    f"{company_name} ({company_number}) \n{OFFICER_NAME} has been serving as {officer_role} of {company_name} since {appointed_on}. The nature of business is {activity}. {psc_statement}"
                                )
                            else:
                                new_paragraph = former_end.insert_paragraph_before(
                                    f"{company_name} ({company_number}) \n{OFFICER_NAME} served as {officer_role} of {company_name} between {appointed_on} and {company_dis}. The nature of business was {activity}. {psc_statement}"
                                )
                    elif len(psc_names) > 1:
                        last_name = psc_names.pop()
                        full_list = ", ".join(psc_names)
                        psc_statement = f"The company has the following persons with significant control: {full_list} and {last_name}."
                        if "active" in company_status:
                            new_paragraph = current_end.insert_paragraph_before(
                                f"{company_name} ({company_number}) \n{OFFICER_NAME} has been serving as {officer_role} of {company_name} since {appointed_on}. The nature of business is {activity}. {psc_statement}"
                            )
                        else:
                            new_paragraph = former_end.insert_paragraph_before(
                                f"{company_name} ({company_number}) \n{OFFICER_NAME} served as {officer_role} of {company_name} between {appointed_on} and {company_dis}. The nature of business is {activity}. {psc_statement}"
                            )

                    else:
                        psc_statement = (
                            "The company has no persons with significant control"
                        )
                        if "active" in company_status:
                            new_paragraph = current_end.insert_paragraph_before(
                                f"{company_name} ({company_number}) \n{OFFICER_NAME} has been serving as {officer_role} of {company_name} since {appointed_on}. The nature of business is {activity}. {psc_statement}"
                            )
                        else:
                            new_paragraph = former_end.insert_paragraph_before(
                                f"{company_name} ({company_number}) \n{OFFICER_NAME} served as {officer_role} of {company_name} between {appointed_on} and {company_dis}. The nature of business was {activity}. {psc_statement}"
                            )

[[{'name': 'Keith Ranger DOLLIVER', 'kind': 'personal-appointment', 'total_results': 1, 'is_corporate_officer': False, 'items_per_page': 50, 'date_of_birth': {'month': 1, 'year': 1963}, 'etag': '5adf497546280219eb30b84eff6f9171ab13659c', 'start_index': 0, 'links': {'self': '/officers/mI2cWvy4QIMWxs3EvB7Es6FrXGc/appointments'}, 'items': [{'occupation': 'Attorney', 'country_of_residence': 'United States', 'links': {'company': '/company/06441873'}, 'officer_role': 'director', 'address': {'premises': 'One Microsoft Way', 'locality': 'Wa 98052', 'address_line_1': 'Redmond', 'country': 'United States'}, 'appointed_to': {'company_number': '06441873', 'company_status': 'active', 'company_name': 'LINKEDIN TECHNOLOGY UK LIMITED'}, 'appointed_on': '2017-01-23', 'name': 'Keith Ranger DOLLIVER', 'name_elements': {'other_forenames': 'Ranger', 'surname': 'DOLLIVER', 'forename': 'Keith'}, 'nationality': 'American'}]}]]
[[{'name': 'Keith Ranger DOLLIVER', 'kind': 'personal-appointment', 'total_results'

TypeError: string indices must be integers, not 'str'