In [None]:
import requests
import json
import pandas as pd
from pandas import DataFrame
from datetime import datetime, timedelta
import pgeocode
import folium
import time
import statistics

### Searching after SIC codes

In [None]:
class CHouse:
    api_calls = 0
    total_api_calls = 0
    current_time = datetime.now()
    
    def __init__(self, api_key):
        self.api_key = api_key        
        
    @classmethod
    def api_guard(cls):
        call_time = datetime.now()
        buffer_delta = timedelta(seconds=420)
        
        if call_time > (CHouse.current_time + buffer_delta):
            CHouse.api_calls = 0
            CHouse.current_time = datetime.now()
            
        if CHouse.api_calls >= 599:
            print("API cool down 5 min")
            time.sleep(310)
            print(f"Restarting, current total API calls: {CHouse.total_api_calls}")
            CHouse.api_calls = 0
        else:
            CHouse.api_calls += 1
            CHouse.total_api_calls += 1
            
        CHouse.current_time = datetime.now()
            
    def _enrich_geo_loc(self, df: DataFrame) -> DataFrame:
        nomi = pgeocode.Nominatim('gb')
        output_lat = []
        output_lon = []
        for i in range(df.shape[0]):
            try:
                temp = (df['registered_office_address'][i]['postal_code'])
                output_lat.append(nomi.query_postal_code(temp)[['latitude', 'longitude']][0])
                output_lon.append(nomi.query_postal_code(temp)[['latitude', 'longitude']][1])
            except:
                output_lat.append('NaN')
                output_lon.append('NaN')
                
        df['Latitude'] = output_lat
        df['Longitude'] = output_lon
        
        return df

    def _founder_api(self,company_number: str) -> (list, list, list):
        url_founder = "https://api.company-information.service.gov.uk/company/{}/persons-with-significant-control"
        birth_year = []
        name = []
        residency = []
        
        response = requests.get(url_founder.format(company_number),auth=(self.api_key,''))
        json_search_result = response.text
        search_result = json.JSONDecoder().decode(json_search_result)

        for i in range(len(search_result['items'])):
            birth_year.append(search_result['items'][i]['date_of_birth']['year'])
            name.append(search_result['items'][i]['name'])
            residency.append(search_result['items'][i]['country_of_residence'])

        return birth_year, name, residency
        
    def filter_sic(self, sic_code: int, status: str, index: int) -> DataFrame:
        url_companies = "https://api.company-information.service.gov.uk/advanced-search/companies?sic_codes={}&start_index={}&company_status={}"
        output = []
        while True:
            CHouse.api_guard()
            response = requests.get(url_companies.format(sic_code, index, status),auth=(self.api_key,''))
            json_search_result = response.text
            search_result = json.JSONDecoder().decode(json_search_result)
            if len(search_result) == 4:
                break
            output += search_result['items']
            index += 20
            df = pd.DataFrame(output)[['company_name', 'company_number', 'company_type', 'date_of_creation', 'registered_office_address', 'sic_codes']]
            df['date_of_creation'] =  pd.to_datetime(df['date_of_creation'], format='%Y-%m-%d')
        return df
        
    def create_map(self, df: DataFrame) -> object:
        df_temp = self._enrich_geo_loc(df)
        m = folium.Map(location=[51.5072, 0])

        for i in range(df_temp.shape[0]):
            try:
                folium.Marker(
                    [df_temp['Latitude'][i], df_temp['Longitude'][i]], popup=f"<i>{df_temp['company_name'][i]}"
        ).add_to(m)
            except:
                continue
        return m

    def enrich_founder(self, df: DataFrame) -> None:
        result_year = []
        result_name = []
        result_residency = []
        
        for i in range(len(df['company_number'])):
            try:
                CHouse.api_guard()
                export = self._founder_api(df['company_number'][i])
                result_year.append(export[0])
                result_name.append(export[1])
                result_residency.append(export[2])
            except:
                result_year.append('NaN')
                result_name.append('NaN')
                result_residency.append('NaN')
                
        result_year_median = []
        result_year_min = []
        
        for i in range(len(result_year)):
            result_year_median.append(statistics.median(result_year[i]))
            result_year_min.append(min(result_year[i]))
            
        df['Significant Person Birth Year/s'] = result_year
        df['Median Person Birth Year'] = result_year_median
        df['Oldest Person Birth Year'] = result_year_min
        df['Significant Person Name/s'] = result_name
        df['Significant Person Residency'] = result_residency
        
    def retrieve_filings(self, company_number: str) -> dict:
        url_filings = "https://api.company-information.service.gov.uk/company/{}/filing-history?items_per_page=200"
        
        CHouse.api_guard()
        response = requests.get(url_filings.format(company_number),auth=(self.api_key,''))
        json_search_result = response.text
        search_result = json.JSONDecoder().decode(json_search_result)
        
        return search_result
    
    def export_excel(self, df:DataFrame) -> None:
        df.to_excel("CompaniesExport.xlsx")

In [None]:
w = CHouse()

In [None]:
table = w.filter_sic(62011, 'active', 8700)

In [None]:
table

In [None]:
w.create_map(table)

In [None]:
w.enrich_founder(table)
table

In [None]:
w.export_excel(table)

In [None]:
w.retrieve_filings("08403673")