In [20]:
import pandas as pd 
import yfinance as yf
import json 
import time
import sqlite3
from tqdm import tqdm

In [7]:
file_path = "t7-xetr-allTradableInstruments.csv"

# Try reading the file with different delimiters and inspect the columns

data = pd.read_csv(file_path, delimiter=';', skiprows=2)

Unnamed: 0,Product Status,Instrument Status,Instrument,ISIN,Product ID,Instrument ID,WKN,Mnemonic,MIC Code,CCP eligible Code,...,Single Sided Quote Support,Liquidity Class,Cover Indicator,VolatilityCorridorOpeningAuction,VolatilityCorridorIntradayAuction,VolatilityCorridorClosingAuction,VolatilityCorridorContinuous,DisableOnBookTrading,Maximum Order Quantity,Maximum Order Value
0,Active,Active,STRABAG SE,AT000000STR1,432891,7026002,000A0M23V,XD4,XETR,Y,...,SSQ_ON_ENTRY_ALLOWED,2.0,,,,,,N,1883870,73000000
1,Active,Active,FACC AG INH.AKT.,AT00000FACC2,52092,2504163,000A1147K,1FC,XETR,Y,...,SSQ_ON_ENTRY_ALLOWED,2.0,,,,,,N,8935128,73000000
2,Active,Active,RAIFFEISEN BK INTL INH.,AT0000606306,52094,2504165,000A0D9SU,RAW,XETR,Y,...,SSQ_ON_ENTRY_ALLOWED,1.0,,,,,,N,20350877,348000000
3,Active,Active,PORR AG,AT0000609607,52095,2504166,000850185,ABS2,XETR,Y,...,SSQ_ON_ENTRY_ALLOWED,2.0,,,,,,N,5320699,73000000
4,Active,Active,LENZING AG,AT0000644505,52097,2504168,000852927,LEN,XETR,Y,...,SSQ_ON_ENTRY_ALLOWED,2.0,,,,,,N,2208774,73000000


In [8]:
# Filter the rows where "Instrument Status" is "Active"
filtered_data = data[data["Instrument Status"] == "Active"]

# Select and rename the columns
selected_data = filtered_data[["Instrument", "ISIN", "Mnemonic"]]
selected_data.columns = ["instrument_name", "instrument_isin", "instrument_ticker"]

Unnamed: 0,instrument_name,instrument_isin,instrument_ticker
0,STRABAG SE,AT000000STR1,XD4
1,FACC AG INH.AKT.,AT00000FACC2,1FC
2,RAIFFEISEN BK INTL INH.,AT0000606306,RAW
3,PORR AG,AT0000609607,ABS2
4,LENZING AG,AT0000644505,LEN


In [23]:
def initialize_database():
    conn = sqlite3.connect('de_company_info.db')
    c = conn.cursor()
    c.execute('''
              CREATE TABLE IF NOT EXISTS companies
              (id INTEGER PRIMARY KEY,
              company_sec_cik TEXT,
              company_name TEXT,
              company_ticker TEXT,
              company_exchange TEXT,
              company_isin TEXT,
              country TEXT,
              industry TEXT,
              sector TEXT,
              longBusinessSummary TEXT,
              retrieval_string TEXT)
              ''')
    conn.commit()
    conn.close()

initialize_database()

In [17]:
def retrieve_info(ticker):
    try:
        company_info = yf.Ticker(ticker).info
        if 'country' in company_info or 'industry' in company_info or 'city' in company_info or 'symbol' in company_info:
            return company_info
    except Exception as e:
        pass
    return None

def get_company_info(company_ticker, company_isin, company_name):
    identifiers = [company_isin, (company_ticker + ".DE").replace("..", "."), company_ticker]
    company_info = None
    retrieval_string = None
    
    for identifier in identifiers:
        company_info = retrieve_info(identifier)
        if company_info:
            retrieval_string = identifier
            break
        time.sleep(0.25)
    
    company_exchange = company_info.get("exchange", "N/A")
    company_sec_cik = "N/A"
    company_ticker = company_info.get("symbol")

    if company_info:
        return {
            'company_sec_cik': company_sec_cik,
            'company_name': company_name,
            'company_ticker': company_ticker,
            'company_exchange': company_exchange,
            'company_isin': company_isin,
            'country': company_info.get('country', 'N/A'),
            'industry': company_info.get('industry', 'N/A'),
            'sector': company_info.get('sector', 'N/A'),
            'longBusinessSummary': company_info.get('longBusinessSummary', 'N/A'),
            'retrieval_string': retrieval_string,
        }
    else:
        print(identifiers, "returned none")
        return None

In [21]:
def insert_company_info(company):
    conn = sqlite3.connect('de_company_info.db')
    c = conn.cursor()
    c.execute('''
              INSERT INTO companies (company_sec_cik, company_name, company_ticker, company_exchange, company_isin, country, industry, sector, longBusinessSummary, retrieval_string)
              VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
              ''', (company['company_sec_cik'], company['company_name'], company['company_ticker'], company['company_exchange'],
                    company['company_isin'], company['country'], company['industry'], company['sector'], company['longBusinessSummary'], company['retrieval_string']))
    conn.commit()
    conn.close()

In [24]:
import logging
logger = logging.getLogger('yfinance')
logger.disabled = True
logger.propagate = False

for index, row in tqdm(selected_data.iterrows(), total=selected_data.shape[0], desc="Processing companies"):
    
    company_name = row["instrument_name"]
    company_isin = row["instrument_isin"]
    company_ticker = row["instrument_ticker"]

    try:
        company_info = get_company_info(company_ticker, company_isin, company_name)
        company_info["company_isin"] = company_isin
        if company_info is not None:
            insert_company_info(company_info)
        else:
            print(f"Failed to retrieve info for {company_name} ({company_ticker}, {company_isin})")
    except Exception as e:
        print()
    time.sleep(0.25)


Processing companies:   2%|▏         | 87/3564 [03:12<7:26:55,  7.71s/it]




Processing companies:   2%|▏         | 89/3564 [03:28<7:22:12,  7.64s/it]




Processing companies:  17%|█▋        | 603/3564 [13:45<1:18:02,  1.58s/it]




Processing companies:  21%|██        | 739/3564 [17:22<1:07:50,  1.44s/it]




Processing companies:  21%|██        | 740/3564 [17:22<51:00,  1.08s/it]  




Processing companies:  21%|██        | 741/3564 [17:23<39:15,  1.20it/s]




Processing companies:  21%|██        | 742/3564 [17:23<31:01,  1.52it/s]




Processing companies:  21%|██        | 743/3564 [17:23<25:15,  1.86it/s]




Processing companies:  21%|██        | 744/3564 [17:23<21:13,  2.21it/s]




Processing companies:  27%|██▋       | 967/3564 [27:39<1:51:07,  2.57s/it]




Processing companies:  27%|██▋       | 976/3564 [28:07<1:48:20,  2.51s/it]




Processing companies: 100%|██████████| 3564/3564 [2:17:27<00:00,  2.31s/it]  
