In [None]:
!pip install --user pandas openpyxl

In [15]:
import pandas as pd

# Load the spreadsheet
file_path = 'Instrument list_51.xlsx'
spreadsheet = pd.ExcelFile(file_path)

  warn("Workbook contains no default style, apply openpyxl's default")


In [16]:
# Load the data from the appropriate row, skipping the initial metadata rows
df = pd.read_excel(file_path, sheet_name='1.0 All Equity', skiprows=5)

In [17]:
# Load the data from the appropriate row, skipping the initial metadata rows
df = pd.read_excel(file_path, sheet_name='1.0 All Equity', skiprows=5)

  warn("Workbook contains no default style, apply openpyxl's default")


In [18]:
# Set the correct header and remove unnecessary rows
df.columns = df.iloc[2]
df = df.drop([0, 1, 2])

In [19]:
# Select relevant columns and filter for "Shares" in "MiFIR Indentifier Name"
columns_of_interest = [
    'TIDM', 'Issuer Name', 'Instrument Name', 'ISIN',
    'ICB Industry', 'ICB Super-Sector Name', 'Country of Incorporation',
    'MiFIR Indentifier Name'
]

filtered_df = df[columns_of_interest]
filtered_df = filtered_df[filtered_df['MiFIR Indentifier Name'] == 'Shares']

# Drop the now unnecessary 'MiFIR Indentifier Name' column
filtered_df = filtered_df.drop(columns=['MiFIR Indentifier Name'])

In [20]:
import yfinance as yf

In [21]:
import sqlite3
import json
import yfinance as yf
from tqdm import tqdm, tqdm_notebook
import time

def initialize_database():
    conn = sqlite3.connect('uk_company_info.db')
    c = conn.cursor()
    c.execute('''
              CREATE TABLE IF NOT EXISTS companies
              (id INTEGER PRIMARY KEY,
              company_sec_cik TEXT,
              company_name TEXT,
              company_ticker TEXT,
              company_exchange TEXT,
              company_isin TEXT,
              country TEXT,
              industry TEXT,
              sector TEXT,
              longBusinessSummary TEXT,
              retrieval_string TEXT)
              ''')
    conn.commit()
    conn.close()

def retrieve_info(ticker):
    try:
        company_info = yf.Ticker(ticker).info
        if 'country' in company_info or 'industry' in company_info or 'city' in company_info or 'symbol' in company_info:
            return company_info
    except Exception as e:
        pass
    return None

def get_company_info(company_sec_cik, company_name, company_ticker, company_exchange, company_isin):
    identifiers = [company_isin, (company_ticker + ".L").replace("..", "."), company_ticker]
    company_info = None
    retrieval_string = None
    
    for identifier in identifiers:
        company_info = retrieve_info(identifier)
        if company_info:
            retrieval_string = identifier
            break
        time.sleep(0.25)
    
    if company_info:
        return {
            'company_sec_cik': company_sec_cik,
            'company_name': company_name,
            'company_ticker': company_ticker,
            'company_exchange': company_exchange,
            'company_isin': company_isin,
            'country': company_info.get('country', 'N/A'),
            'industry': company_info.get('industry', 'N/A'),
            'sector': company_info.get('sector', 'N/A'),
            'longBusinessSummary': company_info.get('longBusinessSummary', 'N/A'),
            'retrieval_string': retrieval_string,
        }
    else:
        print(identifiers, "returned none")
        return None

def insert_company_info(company):
    conn = sqlite3.connect('uk_company_info.db')
    c = conn.cursor()
    c.execute('''
              INSERT INTO companies (company_sec_cik, company_name, company_ticker, company_exchange, company_isin, country, industry, sector, longBusinessSummary, retrieval_string)
              VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
              ''', (company['company_sec_cik'], company['company_name'], company['company_ticker'], company['company_exchange'],
                    company['company_isin'], company['country'], company['industry'], company['sector'], company['longBusinessSummary'], company['retrieval_string']))
    conn.commit()
    conn.close()

In [22]:
initialize_database()

In [23]:
import logging
logger = logging.getLogger('yfinance')
logger.disabled = True
logger.propagate = False

for index, row in tqdm(filtered_df.iterrows(), total=filtered_df.shape[0], desc="Processing companies"):
    company_sec_cik = "N/A"
    company_name = row["Issuer Name"]
    company_ticker = row["TIDM"]
    company_exchange = "LSE"
    company_isin = row["ISIN"]
    
    try:
        company_info = get_company_info(company_sec_cik, company_name, company_ticker, company_exchange, company_isin)
        company_info["company_isin"] = company_isin
        if company_info is not None:
            insert_company_info(company_info)
        else:
            print(f"Failed to retrieve info for {company_name} ({company_ticker}, {company_isin})")
    except Exception as e:
        print()
    time.sleep(0.25)

Processing companies:  43%|████▎     | 782/1804 [17:43<39:03,  2.29s/it]  

['GB0009065284', '44IO.L', '44IO'] returned none



Processing companies: 100%|██████████| 1804/1804 [56:07<00:00,  1.87s/it]  
