# Summary

Enhancement on chroma vector database:
1. Use a higher performing sentence transformer all-mpnet-base-v2 (previously all-MiniLM-L6-v2).
2. Updated chunking strategy for SEC filings markdown files to minimize splitting tables into different chunks.
3. Updated metadata for both SEC filings and earnings call databases, allowing them to be used in downstream retrieval process.
4. Reduced chunk size (from 1024 to 256) to help mitigate downstream decoder model rate limit.

In [None]:
!pip install langchain langchain_community langchain-chroma langchain_text_splitters -q
!pip install transformers sentence-transformers -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.0/990.0 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m373.5/373.5 kB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.8/139.8 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m581.4/581.4 kB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/92.2 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.8/62.8 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━

In [None]:
import os
import itertools
import pickle
from copy import deepcopy

In [None]:
from google.colab import drive
import os

# Step 1: Mount Google Drive
drive.mount('/content/drive')

# Step 2: Change directory to where your notebook is located
notebook_directory = '/content/drive/My Drive/Colab Notebooks/2024 NLP GenAI/NLP/project'
os.chdir(notebook_directory)

# Step 3: Verify the current working directory
print("Current working directory:", os.getcwd())

Mounted at /content/drive
Current working directory: /content/drive/My Drive/Colab Notebooks/2024 NLP GenAI/NLP/project


# Experiment: Chunking Tables

Problem: Markdown files have a lot of tables. When performing chunking, these tables present two challenges. First, tables may be split into different chunks, resulting in a loss of context. Second, markdown tables take a lot of space, which exacerbates the first problem.

In [None]:
# Try: identify tables in markdown files
def chunk_markdown_file(filepath):
    chunks = []  # List to hold chunks of the file
    current_chunk = []  # Current chunk being processed
    in_table = False  # Flag to track if currently inside a table

    with open(filepath, 'r') as file:
        lines = file.readlines()

    for i, line in enumerate(lines):
        # Check for potential start of a table
        if not in_table and '|' in line:
            # Look ahead to see if the next line is a separator, indicating a table start
            if i + 1 < len(lines) and set(lines[i + 1].strip()) <= {'-', '|', ':'}:
                in_table = True
                # If there's content in the current chunk, save it and start a new chunk
                if current_chunk:
                    chunks.append('\n'.join(current_chunk))
                    current_chunk = []
        elif in_table:
            # If in a table and the line doesn't conform to table row format, it's the end of the table
            if '|' not in line:
                in_table = False
                # Add the table to chunks
                chunks.append('\n'.join(current_chunk))
                current_chunk = []

        # Add the current line to the chunk
        current_chunk.append(line.strip())

    # Add any remaining content to chunks
    if current_chunk:
        chunks.append('\n'.join(current_chunk))

    return chunks


In [None]:
filepath = "./output/SEC_EDGAR_FILINGS_MD/AMD-2024/amd-20240330-10-Q1/amd-20240330-10-Q1.md"
chunks = chunk_markdown_file(filepath)
for i, chunk in enumerate(chunks):
    print(f"Chunk {i+1}:\n{chunk}\n{'-'*20}\n")

Chunk 1:
# United States Securities And Exchange Commission Washington, D.C. 20549

## Form 10-Q

(Mark One)
QUARTERLY REPORT PURSUANT TO SECTION 13 OR 15 (d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the quarterly period ended March 30, 2024 OR
TRANSITION REPORT PURSUANT TO SECTION 13 OR 15 (d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Commission File Number **001-07882**

# Advanced Micro Devices, Inc. (Exact Name Of Registrant As Specified In Its Charter)

Delaware **94-1692300**
(State or other jurisdiction of incorporation or organization)(I.R.S. Employer Identification No.)
2485 Augustine **Drive**
Santa Clara, California **95054**
(Address of principal executive offices)(Zip Code)
(408) **749-4000**
Registrant's telephone number, including area code N/A
(Former name, former address and former fiscal year, if changed since last report)
Securities registered pursuant to Section 12(b) of the Act:
Title of each class Trading Symbol(s) Name of each exc

In [None]:
# Try: convert markdown table to csv to save footprint
def markdown_table_to_csv(table_chunk):
    lines = table_chunk.strip().split('\n')  # Split the chunk into lines
    csv_lines = []  # List to hold lines of the CSV

    for i, line in enumerate(lines):
        # Skip the separator row
        if i == 1:
            continue
        # Split the line by pipe, trim whitespace, and filter out empty strings
        columns = [col.strip() for col in line.split('|') if col.strip()]
        # Join the columns with commas and add to the list of CSV lines
        csv_lines.append(','.join(columns))

    # Join all CSV lines with newline characters
    csv_chunk = '\n'.join(csv_lines)
    return csv_chunk

# Example Markdown table chunk
markdown_table = """
| Header1 | Header2 |
|---------|---------|
| Row1Col1| Row1Col2|
| Row2Col1| Row2Col2|
"""

# Convert to CSV
csv_output = markdown_table_to_csv(markdown_table)
print(csv_output)

Header1,Header2
Row1Col1,Row1Col2
Row2Col1,Row2Col2


In [None]:
chunks[3]

'| Condensed Consolidated Statements of Operations (Unaudited)   | Three Months Ended   |       |    |        |\n|---------------------------------------------------------------|----------------------|-------|----|--------|\n| March 30, 2024                                                | April 1, 2023        |       |    |        |\n| (In millions, except per share amounts)                       |                      |       |    |        |\n| Net revenue                                                   | $                    | 5,473 | $  | 5,353  |\n| Cost of sales                                                 | 2,683                | 2,689 |    |        |\n| Amortization of acquisition-related intangibles               | 230                  | 305   |    |        |\n| Total cost of sales                                           | 2,913                | 2,994 |    |        |\n| Gross profit                                                  | 2,560                | 2,359 |    |  

In [None]:
len(chunks[3])

2774

In [None]:
chunk_csv = markdown_table_to_csv(chunks[3])
chunk_csv

'Condensed Consolidated Statements of Operations (Unaudited),Three Months Ended\nMarch 30, 2024,April 1, 2023\n(In millions, except per share amounts)\nNet revenue,$,5,473,$,5,353\nCost of sales,2,683,2,689\nAmortization of acquisition-related intangibles,230,305\nTotal cost of sales,2,913,2,994\nGross profit,2,560,2,359\nResearch and development,1,525,1,411\nMarketing, general and administrative,620,585\nAmortization of acquisition-related intangibles,392,518\nLicensing gain,(13),(10)\nOperating income (loss),36,(145)\nInterest expense,(25),(25)\nOther income (expense), net,53,43\nIncome (loss) before income taxes and equity income,64,(127)\nIncome tax provision (benefit),(52),13\nEquity income in investee,7,1\nNet income (loss),$,123,$,(139)\nEarnings (loss) per share Basic,$,0.08,$,(0.09)\nDiluted,$,0.07,$,(0.09)\nShares used in per share calculation Basic,1,617,1,611\nDiluted,1,639,1,611\nSee accompanying notes.'

In [None]:
len(chunk_csv)

905

Converting markdown to csv can reduce the chunk length significantly. Two concerns:
- Does it lose context? I read that GPT-3.5 has a fairly good understanding of csv and markdown formats, so I am not too concerned.
- When the big table chunk is further chunked, will it lose context? Yes for sure. One practice I read online:  create a new column that, for that row, repeats each header and the value of the cell in the row, eg header1: row1cell1, header 2: row1cell2... store these new cells as the embedding.

In [None]:
# Try: chunk markdown tables by rows and add headers to each row
def markdown_table_to_row_chunks(table_chunk):
    lines = table_chunk.strip().split('\n')  # Split the chunk into lines
    row_chunks = []  # List to hold row chunks

    # Extract headers
    headers = [header.strip() for header in lines[0].split('|') if header.strip()]

    for i, line in enumerate(lines):
        # Skip the header and separator row
        if i == 0 or i == 1:
            continue
        # Split the line by pipe, trim whitespace, and filter out empty strings
        cells = [cell.strip() for cell in line.split('|') if cell.strip()]
        # Combine each header with its corresponding cell value
        row_chunk = [f"{header}: {cell}" for header, cell in zip(headers, cells)]
        # Join the combined header and cell values with a comma and add to the list of row chunks
        row_chunks.append(', '.join(row_chunk))

    return row_chunks

# Example Markdown table chunk
markdown_table = """
| Header1 | Header2 |
|---------|---------|
| Row1Col1| Row1Col2|
| Row2Col1| Row2Col2|
"""

# Convert to row chunks
row_chunks = markdown_table_to_row_chunks(markdown_table)
for chunk in row_chunks:
    print(chunk)

Header1: Row1Col1, Header2: Row1Col2
Header1: Row2Col1, Header2: Row2Col2


In [None]:
chunk_csv_row = markdown_table_to_row_chunks(chunks[3])
chunk_csv_row

['Condensed Consolidated Statements of Operations (Unaudited): March 30, 2024, Three Months Ended: April 1, 2023',
 'Condensed Consolidated Statements of Operations (Unaudited): (In millions, except per share amounts)',
 'Condensed Consolidated Statements of Operations (Unaudited): Net revenue, Three Months Ended: $',
 'Condensed Consolidated Statements of Operations (Unaudited): Cost of sales, Three Months Ended: 2,683',
 'Condensed Consolidated Statements of Operations (Unaudited): Amortization of acquisition-related intangibles, Three Months Ended: 230',
 'Condensed Consolidated Statements of Operations (Unaudited): Total cost of sales, Three Months Ended: 2,913',
 'Condensed Consolidated Statements of Operations (Unaudited): Gross profit, Three Months Ended: 2,560',
 'Condensed Consolidated Statements of Operations (Unaudited): Research and development, Three Months Ended: 1,525',
 'Condensed Consolidated Statements of Operations (Unaudited): Marketing, general and administrative, 

Observation: The idea of chunking tables to rows sounded good but doesn't seem to apply well to this use case because the tables in the markdown files are not perfectly aligned, causing the row chunks to miss information or do not provide useful information.

# Project code


Chunking strategy for markdown files:
- Chunk the markdown file by the beginning and ending of tables, tracking if a chunk contains table. If a chunk is below the min_length threshold, drop the chunk.
- For non-table chunks, perform RecursiveCharacterTextSplitter with low overlap.
- For table chunks, perform table chunks, first convert the table chunk from markdown to csv format (significantly reducing chunk length), then perform RecursiveCharacterTextSplitter with high overlap (for the purpose of retaining table coherency).
- metadata: ticker, year, filing_type, is_table (boolean).

Chunking strategy for earnings call pickle files:
- Earnings call data doesn't have tables, so the chunking is conducted using RecursiveCharacterTextSplitter with low overlap.
- metadata: ticker, year, quarter, speaker

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_text_splitters import MarkdownHeaderTextSplitter
from langchain.schema import Document

# use a higher performing sentence transformer (previously all-MiniLM-L6-v2)
model_name = "all-mpnet-base-v2"
emb_fn = SentenceTransformerEmbeddings(model_name=model_name)

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Config
# ticker = ['AMD']
ticker = ['NVDA', 'AMD', 'INTC', 'QCOM', 'AVGO', 'DELL']
year = ['2024']
combinations = list(itertools.product(ticker, year))

chunk_size = 512

### DB for markdown based SEC filings

In [None]:
markdown_dir = "./output/SEC_EDGAR_FILINGS_MD"

md_content_list = []

for combo in combinations:
  ticker, year = combo
  for md_dirs in os.listdir(os.path.join(markdown_dir,f"{ticker}-{year}")):
    md_file_path = os.path.join(markdown_dir,f"{ticker}-{year}",md_dirs,f"{md_dirs}.md")
    print(f'{ticker}-{year}-{md_dirs}')
    with open(md_file_path, 'r') as file:
      content = file.read()
    md_content_list.append([content, ticker, year, '-'.join(md_dirs.split('-')[-2:])])

NVDA-2024-nvda-20240428-10-Q2
NVDA-2024-nvda-20240128-10-K
AMD-2024-amd-20240330-10-Q1
INTC-2024-intc-20240330-10-Q1
QCOM-2024-qcom-20240324-10-Q1
AVGO-2024-avgo-20240505-10-Q2
AVGO-2024-avgo-20240204-10-Q1
DELL-2024-dell-20240503-10-Q2


In [None]:
def chunk_markdown_file(md_content, metadata={}, min_length=10):
    chunks = []  # List to hold chunks of the file with metadata
    current_chunk = []  # Current chunk being processed
    in_table = False  # Flag to track if currently inside a table

    lines = md_content.strip().split('\n')

    for i, line in enumerate(lines):
        # Check for potential start of a table
        if not in_table and '|' in line:
            # Look ahead to see if the next line is a separator, indicating a table start
            if i + 1 < len(lines) and set(lines[i + 1].strip()) <= {'-', '|', ':'}:
                in_table = True
                # If there's content in the current chunk, save it and start a new chunk
                if current_chunk and len('\n'.join(current_chunk)) >= min_length:
                    chunks.append({'content': '\n'.join(current_chunk), 'is_table': False})
                current_chunk = []
        elif in_table:
            # If in a table and the line doesn't conform to table row format, it's the end of the table
            if '|' not in line:
                in_table = False
                # Add the table to chunks
                if current_chunk:
                    chunks.append({'content': '\n'.join(current_chunk), 'is_table': True})
                current_chunk = []
        # Add the current line to the chunk
        current_chunk.append(line)

    # Add any remaining content to chunks
    if current_chunk and len('\n'.join(current_chunk)) >= min_length:
        chunks.append({'content': '\n'.join(current_chunk), 'is_table': in_table})

    chunks_docs = []
    for chunk in chunks:
        doc = Document(page_content=chunk['content'], metadata=deepcopy(metadata))
        doc.metadata.update({'is_table':chunk['is_table']})
        chunks_docs.append(doc)

    return chunks_docs

def markdown_table_to_csv(table_chunk_doc):
    lines = table_chunk_doc.page_content.strip().split('\n')  # Split the chunk into lines
    csv_lines = []  # List to hold lines of the CSV

    for i, line in enumerate(lines):
        # Skip the separator row
        if i == 1:
            continue
        # Split the line by pipe, trim whitespace, and filter out empty strings
        columns = [col.strip() for col in line.split('|') if col.strip()]
        # Join the columns with commas and add to the list of CSV lines
        csv_lines.append(','.join(columns))

    # Join all CSV lines with newline characters
    csv_chunk = '\n'.join(csv_lines)

    table_chunk_doc.page_content = csv_chunk
    return table_chunk_doc

def process_chunks(chunks_docs, chunk_size=512):
    processed_chunks_docs = []

    for chunk_doc in chunks_docs:
        if chunk_doc.metadata['is_table']:
            table_splitter = RecursiveCharacterTextSplitter(separators=[], chunk_size=chunk_size, chunk_overlap=(chunk_size//2))
            table_chunks_docs = table_splitter.split_documents([markdown_table_to_csv(chunk_doc)])
            processed_chunks_docs.extend(table_chunks_docs)
        else:
            text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", ". ", " "],
                                                           chunk_size=chunk_size, chunk_overlap=50)
            text_chunks_docs = text_splitter.split_documents([chunk_doc])
            processed_chunks_docs.extend(text_chunks_docs)

    return processed_chunks_docs


In [None]:
# manual check
# # Step 1: Chunk the Markdown content
# chunks_docs = chunk_markdown_file(md_content_list[0][0])

# # Step 2: Process the chunks
# processed_chunks_docs = process_chunks(chunks_docs, chunk_size=512)

# processed_chunks_docs[0]

Document(metadata={'is_table': False}, page_content='# United States Securities And Exchange Commission Washington, D.C. 20549\n\n## Form 10-Q\n\n(Mark One)\nQUARTERLY REPORT PURSUANT TO SECTION 13 OR 15 (d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the quarterly period ended March 30, 2024 OR\nTRANSITION REPORT PURSUANT TO SECTION 13 OR 15 (d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Commission File Number **001-07882**\n\n# Advanced Micro Devices, Inc. (Exact Name Of Registrant As Specified In Its Charter)')

In [None]:
sec_markdown_docs = []

for md_content in md_content_list:
    metadata = {"ticker":md_content[1], "year":md_content[2], "filing_type":md_content[3]}
    chunks_docs = chunk_markdown_file(md_content[0], metadata=metadata, min_length=10)
    processed_chunks_docs = process_chunks(chunks_docs, chunk_size=chunk_size)
    sec_markdown_docs.extend(processed_chunks_docs)

sec_markdown_docs[0]

Document(metadata={'ticker': 'NVDA', 'year': '2024', 'filing_type': '10-Q2', 'is_table': False}, page_content='![0_image_0.png](0_image_0.png)\n\nFORM **10-Q**\nQUARTERLY REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the quarterly period ended April 28, **2024**\nOR\nTRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n\n![0_image_1.png](0_image_1.png)\n\n![0_image_2.png](0_image_2.png)\n\n![0_image_3.png](0_image_3.png)\n\n![0_image_4.png](0_image_4.png)\n\n![0_image_5.png](0_image_5.png)\n\n![0_image_6.png](0_image_6.png)\n\n![0_image_7.png](0_image_7.png)')

In [None]:
%%time
sec_filings_md_db = Chroma.from_documents(sec_markdown_docs, emb_fn,
                                          persist_directory="./sec-filings-md-db_v2",
                                          collection_name="sec_filings_md_v2")

CPU times: user 51.3 s, sys: 871 ms, total: 52.2 s
Wall time: 54.8 s


### DB for Earnings Call

In [None]:
earnings_call_dir = "./output/earnings_call"

earnings_call_list = []

for combo in combinations:
  ticker, year = combo
  ec_file_path = os.path.join(earnings_call_dir,f"{ticker}-{year}","earnings_calls.pkl")
  print(f'{ticker}-{year}-earnings_call')
  with open(ec_file_path, 'rb') as file:
    loaded_earnings_call = pickle.load(file)
    for doc in loaded_earnings_call['earnings_docs']:
      doc.metadata.update({'ticker': ticker, 'year': year})
      earnings_call_list.append(doc)

NVDA-2024-earnings_call
AMD-2024-earnings_call
INTC-2024-earnings_call
QCOM-2024-earnings_call
AVGO-2024-earnings_call
DELL-2024-earnings_call


In [None]:
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", ". ", " "],
                                                    chunk_size=chunk_size,
                                                    chunk_overlap=50)
earnings_call_docs = text_splitter.split_documents(earnings_call_list)

In [None]:
%%time
earnings_call_db = Chroma.from_documents(earnings_call_docs, emb_fn,
                                         persist_directory="./earnings-call-db_v2",
                                         collection_name="earnings_call_v2")

CPU times: user 13 s, sys: 218 ms, total: 13.2 s
Wall time: 13.7 s


### Queries

In [None]:
company_ticker = 'AMD'

query_1 = (
    f"Industry and Market Analysis: "
    f"1. What are the main trends and drivers in company {company_ticker}? "
    f"2. Who are the major competitors in this industry? "
    f"3. What is the company's market share relative to its competitors?"
)

In [None]:
relevant_docs = sec_filings_md_db.similarity_search(query_1, k=1)
relevant_docs[0]

Document(metadata={'filing_type': '10-Q1', 'is_table': False, 'ticker': 'AMD', 'year': '2024'}, page_content='Basis of *Presentation.* The accompanying unaudited condensed consolidated financial statements of AMD have been prepared in accordance with U.S. generally accepted accounting principles (U.S. GAAP) for interim financial information and the instructions to Form 10-Q and Article 10 of Regulation S-X')

In [None]:
relevant_docs = earnings_call_db.similarity_search(query_1, k=3)
relevant_docs[0]

Document(metadata={'quarter': 'Q1', 'speaker': 'Aaron Rakers', 'ticker': 'AMD', 'year': '2024'}, page_content="Yes. Okay. And then as a quick follow-up, going back to an earlier question on server demand, more traditional server. As you see the ramp of maybe share opportunities in more traditional enterprise, I'm curious how you would characterize the growth that you expect to see a more traditional server CPU market as we move through '24 or even longer term, how you'd characterize that growth trend?")

### Load and examine the persistent databases

In [None]:
embedding_function = SentenceTransformerEmbeddings(model_name=model_name)
load_sec_filings_md_db = Chroma(persist_directory="./sec-filings-md-db_v2",
                                embedding_function=embedding_function,
                                collection_name="sec_filings_md_v2")
load_earnings_call_db = Chroma(persist_directory="./earnings-call-db_v2",
                                embedding_function=embedding_function,
                                collection_name="earnings_call_v2")

In [None]:
in_session_db_output = sec_filings_md_db.similarity_search(query_1, k=1)
loaded_db_output = load_sec_filings_md_db.similarity_search(query_1, k=1)
print(in_session_db_output[0] == loaded_db_output[0])

in_session_db_output = earnings_call_db.similarity_search(query_1, k=1)
loaded_db_output = load_earnings_call_db.similarity_search(query_1, k=1)
print(in_session_db_output[0] == loaded_db_output[0])

True
True
