## Import Libraries

In [105]:
!pip install langchain-groq



In [106]:
import pandas as pd
import numpy as np
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, split, posexplode, udf, explode
from pyspark.sql.types import ArrayType, DoubleType, StringType
from pyspark.sql import functions as F
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_groq import ChatGroq
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

In [107]:
# Set your Groq API key
import os
os.environ["GROQ_API_KEY"] =  "PASS YOUR API KEY"

## Read the dataset

In [None]:
# Create Spark session
spark = SparkSession.builder.appName("GENAI-Task").getOrCreate()

In [None]:
path = '.\GenAI\Gen-AI-Data.csv'

# Define path
df = spark.read \
    .option("header", True) \
    .option("inferSchema", True) \
    .option("multiLine", True) \
    .option("quote", '"') \
    .option("escape", '"') \
    .option("mode", "PERMISSIVE") \
    .csv(path)

# Filter for specific company identifier
# df = df.filter("cik == 718413")

# Show the first few rows
df.show()

+----------------+-------+----+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+
|        filename|    cik|year|           section_1|          section_1A|          section_1B|           section_2|           section_3|           section_4|           section_5|           section_6|           section_7|          section_7A|           section_8|           section_9|          section_9A|          section_9B|          section_10|          section_11|          section_12|          section_13|          section_14|          section_15|
+----------------+-------+----+--------------------+--------------------+--------------------+--

## Unpivoted version of Orginal DataFrame -  where each row represents one section of a 10-K document.

In [110]:
section_cols = [f"section_{i}" if i not in ['9A', '9B'] else f"section_{i}" for i in list(range(1, 9)) + ['9A', '9B'] + list(range(10, 16))]
df_melted = df.select(
    col("filename"),
    col("cik"),
    col("year"),
    F.explode(F.array([
        F.struct(F.lit(c).alias("section_name"), col(c).alias("text")) for c in section_cols
    ])).alias("exploded")
).select(
    "filename", "cik", "year", "exploded.section_name", "exploded.text"
)
df_melted.show()

+----------------+-------+----+------------+--------------------+
|        filename|    cik|year|section_name|                text|
+----------------+-------+----+------------+--------------------+
|1566373_2018.htm|1566373|2018|   section_1|Item 1. Business\...|
|1566373_2018.htm|1566373|2018|   section_2|Item 2. Propertie...|
|1566373_2018.htm|1566373|2018|   section_3|Item 3. Legal Pro...|
|1566373_2018.htm|1566373|2018|   section_4|Item 4. Mine Safe...|
|1566373_2018.htm|1566373|2018|   section_5|Item 5. Market fo...|
|1566373_2018.htm|1566373|2018|   section_6|Item 6. Selected ...|
|1566373_2018.htm|1566373|2018|   section_7|Item 7. Managemen...|
|1566373_2018.htm|1566373|2018|   section_8|Item 8. Financial...|
|1566373_2018.htm|1566373|2018|  section_9A|Item 9A. Controls...|
|1566373_2018.htm|1566373|2018|  section_9B|Item 9B. Other In...|
|1566373_2018.htm|1566373|2018|  section_10|Item 10. Director...|
|1566373_2018.htm|1566373|2018|  section_11|Item 11. Executiv...|
|1566373_2

## Check Shape Of DataFrame

In [111]:
# Number of rows
num_rows = df_melted.count()

# Number of columns
num_cols = len(df_melted.columns)

print(f"Shape: ({num_rows}, {num_cols})")

Shape: (48, 5)


## Implementing chunking on all sections data

In [112]:
# Step 1: Define UDF to chunk list of words into 500-word chunks
def chunk_words(words, chunk_size=1000):
    return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

chunk_words_udf = udf(chunk_words, ArrayType(StringType()))

# Step 2: Tokenize text
df_tokenized = df_melted.withColumn("words", split(col("text"), " "))

# Step 3: Create chunks from words
df_chunked = df_tokenized.withColumn("chunks", chunk_words_udf(col("words")))

df_final = df_chunked.select(
    "filename", "cik", "year", "section_name",
    posexplode("chunks").alias("chunk_index", "chunk_text")
)

df_final.show()

# Number of rows
num_rows = df_final.count()

# Number of columns
num_cols = len(df_final.columns)

print(f"Shape: ({num_rows}, {num_cols})")

+----------------+-------+----+------------+-----------+--------------------+
|        filename|    cik|year|section_name|chunk_index|          chunk_text|
+----------------+-------+----+------------+-----------+--------------------+
|1566373_2018.htm|1566373|2018|   section_1|          0|Item 1. Business\...|
|1566373_2018.htm|1566373|2018|   section_1|          1|inarigivir 400mg ...|
|1566373_2018.htm|1566373|2018|   section_1|          2|clinical trial in...|
|1566373_2018.htm|1566373|2018|   section_1|          3|interferon recept...|
|1566373_2018.htm|1566373|2018|   section_1|          4|serum HBV RNA, ar...|
|1566373_2018.htm|1566373|2018|   section_1|          5|will evaluate in ...|
|1566373_2018.htm|1566373|2018|   section_1|          6|modulating the ho...|
|1566373_2018.htm|1566373|2018|   section_1|          7|Europe, Hong Kong...|
|1566373_2018.htm|1566373|2018|   section_1|          8|any of our compou...|
|1566373_2018.htm|1566373|2018|   section_1|          9|other co

## Extracting Attributes For Year 2020

In [113]:
model = SentenceTransformer('all-MiniLM-L6-v2')

template = """
You are a financial analyst. Use the following 10-K report chunks to answer the question briefly and precisely.
Respond in a few words only. No sentences, no explanations.

Query: {query}

Chunk 1:
{chunk_1}

Chunk 2:
{chunk_2}

Chunk 3:
{chunk_3}

Answer (few words only):
"""

def run_llm_query(query:str, year:int, template:str, model):

  # Convert PySpark chunks to Pandas
  chunks_pd = df_final.select( "filename", "cik", "year","section_name","chunk_index","chunk_text").toPandas()

  # Filter Based On year
  chunks_pd = chunks_pd[chunks_pd['year'] == year]

  # Creating Embedding Of Chunks
  chunks_pd["augmented_text"] = str(chunks_pd["cik"]) + "-" + chunks_pd["section_name"] + ": " + chunks_pd["chunk_text"]
  chunks_pd["embedding"] = chunks_pd["augmented_text"].apply(lambda x: model.encode(x).tolist())

  # Embed the query
  query_embedding = model.encode([query])[0]

  # Compute cosine similarity and get top 3 chunks
  chunks_pd["similarity"] = chunks_pd["embedding"].apply(lambda x: cosine_similarity([query_embedding], [x])[0][0])
  top_chunks = chunks_pd.sort_values(by="similarity", ascending=False).head(3)



  prompt = PromptTemplate(
    input_variables=["query", "chunk_1", "chunk_2", "chunk_3"],
    template=template
    )
  llm = ChatGroq(model="llama3-70b-8192", temperature=0.0,max_tokens = 100)
  chain = LLMChain(llm=llm, prompt=prompt)
  result = chain.run({
      "query": query,
      "chunk_1": top_chunks.iloc[0]["chunk_text"],
      "chunk_2": top_chunks.iloc[1]["chunk_text"],
      "chunk_3": top_chunks.iloc[2]["chunk_text"]
  })
  return result, top_chunks

**Note** : LLM-based RAG was conducted on three different companies, each from a different year between 2018 and 2020, as no single company had filings available for all three years.

|Year | cik |
|-----|-----|
|2018| 1566373|
|2019|88121|
|2020|718413|

##  Attribute 1 : Business Information Of Company

In [114]:
## ---------------------- FOR YEAR 2020 - cik 718413 -----------------

# Attribute 1 : Business Information Of Company
query = "What is the business of the company as mentioned in the 10-K filing?"
year = 2020

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model = model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Banking and financial services.


* Actual Answer : Banking and Financial Services

In [116]:
top_chunks["chunk_text"].iloc[0]

'Item 1. The Business\nOrganization and Operation\nThe Company. The Company was organized under the laws of the State of Vermont in 1982 and became a registered bank holding company under the Bank Holding Company Act of 1956, as amended, in October 1983 when it acquired all of the voting shares of the Bank, headquartered in Derby, Vermont. The Bank is the only subsidiary of the Company and principally all of the Company’s business operations are presently conducted through it. Therefore, the following narrative and the other information about the Company contained in this report are based primarily on the Bank’s operations.\nThe Bank; Banking Services. Community National Bank was organized in 1851 as the Peoples Bank, and was subsequently reorganized as the National Bank of Derby Line in 1865. In 1975, after 110 continuous years of operation as the National Bank of Derby Line, the Bank acquired the Island Pond National Bank and changed its name to “Community National Bank.” On December

In [117]:
## ---------------------- FOR YEAR 2019 - cik 88121 -----------------

# Attribute 1 : Business Information Of Company
query = "What is the business of the company as mentioned in the 10-K filing?"
year = 2019

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Diverse global agribusiness and transportation company.


* Actual Answer : Diverse Global Agribusiness and Transportation Company

In [121]:
# Show top chunk on which LLM answer is based on
top_chunks["chunk_text"].iloc[1]

'locations and applied auditor judgment to determine the locations at which procedures were to be performed. We tested certain internal controls over the Company’s revenue process, including controls related to the recognition and consolidation of global revenue amounts. We tested a sample of individual revenue transactions by comparing the amounts recognized by the Company to relevant underlying documentation such as contracts. In addition, we evaluated the overall sufficiency of audit evidence obtained over revenue.\nWe have served as the Company’s auditor since 1959.\nKansas City, Missouri\nFebruary 19, 2020\nSEABOARD CORPORATION\nConsolidated Statements of Comprehensive Income\nSee accompanying notes to consolidated financial statements.\nSEABOARD CORPORATION\nConsolidated Balance Sheets\nSee accompanying notes to consolidated financial statements.\nSEABOARD CORPORATION\nConsolidated Statements of Cash Flows\nSee accompanying notes to consolidated financial statements.\nSEABOARD CO

In [122]:
## ---------------------- FOR YEAR 2018 cik - 1566373 -----------------

# Attribute 1 : Business Information Of Company
query = "What is the business of the company as mentioned in the 10-K filing?"
year = 2018

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Clinical-stage biopharmaceutical company.


* Actual Answer: Clinical Stage Biopharmaceutical Company.

In [123]:
# Show top chunk on which LLM answer is based on
top_chunks['chunk_text'].iloc[0]

'Item 1. Business\nOverview\nWe are a clinical-stage biopharmaceutical company engaged in the discovery and development of a novel class of therapeutics for the treatment of viral infections, inflammatory diseases and certain cancers using our proprietary small molecule nucleotide platform. We design our compounds to selectively target and modulate the activity of specific proteins implicated in various disease states. We are developing our lead product candidate, inarigivir soproxil, or inarigivir, for the treatment of chronic hepatitis B virus, or HBV. We have designed our antiviral product candidates, including inarigivir, to selectively activate within infected hepatic cells the cellular protein, retinoic acid-inducible gene 1 (RIG-I), to inhibit viral replication and to cause the induction of intracellular interferon signaling pathways for antiviral defense. We believe that inarigivir, as a RIG-I agonist, could play an important role in antiviral therapy as a result of its dual me

## Attribute 2: Company Legal Proceedings

In [124]:
## ---------------------- FOR YEAR 2020 - cik 718413 ---------------------
query = "What legal proceedings has the company disclosed?"
year = 2020

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Routine litigation incidental to banking business.


Actual Answer : Routine litigation incidental to banking business.

In [125]:
# Show top chunk on which LLM answer is based on
top_chunks['chunk_text'].iloc[0]

'Item 3. Legal Proceedings\nThere are no pending legal proceedings to which the Company or the Bank is a party or of which any of its property is the subject, other than routine litigation incidental to its banking business, none of which, in the opinion of management, is material to the Company’s consolidated operations or financial condition.\nItem 4.'

In [82]:
## ---------------------- FOR YEAR 2019 - cik 88121 -----------------
query = "What legal proceedings has the company disclosed?"
year = 2019

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Cereoil, Nolston, AFMLS.


Actual Answer : S.A. (“Cereoil”) ,  Asset Forfeiture and Money Laundering Section (“AFMLS”) , Nolston S.A. (“Nolston”)

In [83]:
# Show top chunk on which LLM answer is based on
top_chunks['chunk_text'].iloc[1]

'S.A. (“Cereoil”) filed a suit in the Bankruptcy Court of First Instance in Uruguay that was served during the second quarter of 2018 naming as parties Seaboard and Seaboard’s subsidiaries, Seaboard Overseas Limited (“SOL”) and Seaboard Uruguay Holdings Ltd. (“Seaboard Uruguay”). Seaboard has a 45% indirect ownership of Cereoil. The suit seeks an order requiring Seaboard, SOL and Seaboard Uruguay to reimburse Cereoil the amount of $22 million, contending that deliveries of soybeans to SOL pursuant to purchase agreements should be set aside as fraudulent conveyances. Seaboard intends to defend this case vigorously. It is impossible at this stage to determine the probability of a favorable or unfavorable outcome resulting from this suit. In the event of an adverse ruling, Seaboard and its two subsidiaries could be ordered to pay the amount of $22 million. Any award in this case would offset against any award in the additional case described below filed by the Trustee on April 27, 2018.\n

In [84]:
## ---------------------- FOR YEAR 2018 - cik 1566373 -----------------
query = "What legal proceedings has the company disclosed?"
year = 2018

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
None disclosed.


* Actual Answer : No Information available

In [85]:
# Show top chunk on which LLM answer is based on
top_chunks

Unnamed: 0,filename,cik,year,section_name,chunk_index,chunk_text,augmented_text,embedding,similarity
0,1566373_2018.htm,1566373,2018,section_1,0,Item 1. Business\nOverview\nWe are a clinical-...,0 1566373\n1 1566373\n2 1566373\n3...,"[0.009221972897648811, -0.0379352867603302, -0...",-0.079253
1,1566373_2018.htm,1566373,2018,section_1,1,inarigivir 400mg in different patient populati...,0 1566373\n1 1566373\n2 1566373\n3...,"[0.009221972897648811, -0.0379352867603302, -0...",-0.079253
2,1566373_2018.htm,1566373,2018,section_1,2,clinical trial in cancer later in 2019. SB 112...,0 1566373\n1 1566373\n2 1566373\n3...,"[0.009221972897648811, -0.0379352867603302, -0...",-0.079253


## Attribute 3 : Risk Factors for investors

In [130]:
## ---------------------- FOR YEAR 2020 - cik 718413 -----------------
query = "What are the principal risk factors for investors?"
year = 2020

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Competition, regulation, and market risks.


* Actual Answer  - Competition, regulation, and market risks.

In [132]:
# Show top chunk on which LLM answer is based on
top_chunks["chunk_text"].iloc[1]

'the market areas of the three owner financial institutions and leases space from them in some of their branch offices, including at the Bank’s facilities in Barre and Lyndonville, Vermont.\nStatutory Business Trust. In 2007, the Company formed CMTV Statutory Trust I (the Trust), a Delaware statutory business trust, for the purpose of issuing $12.5 million of trust preferred securities and lending the proceeds to the Company. This funding provided a portion of the cash consideration paid by the Company in the acquisition of LyndonBank and provided additional regulatory capital. The Trust is a variable interest entity for which the Company is not the primary beneficiary, within the meaning of applicable accounting standards. Accordingly, the Trust is not consolidated with the Company for financial reporting purposes.\nCompetition\nAll of the Bank’s full-service banking offices are located in northern and central Vermont. The Bank’s main office is located in Derby, in Orleans County. In 

In [133]:
## ---------------------- FOR YEAR 2019 - cik 88121  -----------------
query = "What are the principal risk factors for investors?"
year = 2019

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Assumptions, estimates, and judgments.


* Actual Answer - Assumption, estimates, judgement

In [136]:
# Show top chunk on which LLM answer is based on
top_chunks

Unnamed: 0,filename,cik,year,section_name,chunk_index,chunk_text,augmented_text,embedding,similarity
72,88121_2019.htm,88121,2019,section_8,5,"investments, Seaboard also has trading securit...",52 88121\n53 88121\n54 88121\n55 8...,"[-0.0389004684984684, 0.023023409768939018, -0...",0.263155
66,88121_2019.htm,88121,2019,section_7,6,in estimate could result in a material adverse...,52 88121\n53 88121\n54 88121\n55 8...,"[-0.01666143164038658, 0.11231815069913864, -0...",0.250062
71,88121_2019.htm,88121,2019,section_8,4,gains/losses on these equity investments is re...,52 88121\n53 88121\n54 88121\n55 8...,"[-0.04535048082470894, -0.01630735956132412, -...",0.241675


In [137]:
## ---------------------- FOR YEAR 2018 - cik 1566373 -----------------

query = "What are the principal risk factors for investors?"
year = 2018

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Risks of clinical trials, regulatory approval, and competition.


Actual Answer : Risks of clinical trials, regulatory approval, and competition.

In [138]:
# Show top chunk on which LLM answer is based on
top_chunks

Unnamed: 0,filename,cik,year,section_name,chunk_index,chunk_text,augmented_text,embedding,similarity
0,1566373_2018.htm,1566373,2018,section_1,0,Item 1. Business\nOverview\nWe are a clinical-...,0 1566373\n1 1566373\n2 1566373\n3...,"[0.009221972897648811, -0.0379352867603302, -0...",-0.051749
1,1566373_2018.htm,1566373,2018,section_1,1,inarigivir 400mg in different patient populati...,0 1566373\n1 1566373\n2 1566373\n3...,"[0.009221972897648811, -0.0379352867603302, -0...",-0.051749
2,1566373_2018.htm,1566373,2018,section_1,2,clinical trial in cancer later in 2019. SB 112...,0 1566373\n1 1566373\n2 1566373\n3...,"[0.009221972897648811, -0.0379352867603302, -0...",-0.051749


## Attribute 4 : Control Over Financial Reporting

In [92]:
## ---------------------- FOR YEAR 2020 - cik 718413  -----------------

query = "What did management conclude about the effectiveness of internal controls over financial reporting?"
year = 2020

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Effective


Actual Answer : Effective

In [93]:
# Show top chunk on which LLM answer is based on
top_chunks["chunk_text"].iloc[0]

'Item 9A. Controls and Procedures\nDisclosure Controls and Procedures\nManagement is responsible for establishing and maintaining effective disclosure controls and procedures, as defined in Rule 13a-15(e) under the Securities Exchange Act of 1934 (the Exchange Act). As of December 31, 2020, an evaluation was performed under the supervision and with the participation of management, including the principal executive officer and principal financial officer, of the effectiveness of the design and operation of the Company’s disclosure controls and procedures. Based on that evaluation, management concluded that its disclosure controls and procedures as of December 31, 2020 were effective in ensuring that material information required to be disclosed in the reports it files with the Commission under the Exchange Act was recorded, processed, summarized, and reported on a timely basis.\nFor this purpose, the term “disclosure controls and procedures” means controls and other procedures of the Co

In [94]:
## ---------------------- FOR YEAR 2019 - cik 88121  -----------------

query = "What did management conclude about the effectiveness of internal controls over financial reporting?"
year = 2019

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Effective.


* Actual Answer : Effective

In [95]:
# Show top chunk on which LLM answer is based on
top_chunks

Unnamed: 0,filename,cik,year,section_name,chunk_index,chunk_text,augmented_text,embedding,similarity
68,88121_2019.htm,88121,2019,section_8,1,locations and applied auditor judgment to dete...,52 88121\n53 88121\n54 88121\n55 8...,"[-0.060247424989938736, 0.04971352964639664, -...",0.363626
67,88121_2019.htm,88121,2019,section_8,0,Item 8. Financial Statements and Supplementary...,52 88121\n53 88121\n54 88121\n55 8...,"[-0.05201123654842377, 0.0006078851292841136, ...",0.260508
63,88121_2019.htm,88121,2019,section_7,3,by realized margins or losses as revenue is re...,52 88121\n53 88121\n54 88121\n55 8...,"[-0.07151684165000916, 0.05800343304872513, 0....",0.256393


In [96]:
## ---------------------- FOR YEAR 2018 - cik 1566373 -----------------
query = "What did management conclude about the effectiveness of internal controls over financial reporting?"
year = 2018

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Effective.


**Note** : This information is not mentioned in the company's 10-K filing, and the LLM have not correctly identified its absence.

* Actual Answer : Information Not Provided

## Attribute 5: Company dividends payment

In [97]:
## ---------------------- FOR YEAR 2020 - cik 718413 -----------------
query = "Does the company pay dividends on its common stock?"
year = 2020

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
No


* Actual Answer : No

In [98]:
# Show top chunk on which LLM answer is based on
top_chunks["chunk_text"].iloc[0]

'Item 5. Market for Registrant’s Common Equity, Related Stockholder Matters and Issuer Purchases of Equity Securities\nInformation on the trading market in, market price of, and dividends paid on, the Company’s common stock is incorporated by reference to the section of the 2020 Annual Report under the caption “Common Stock Performance by Quarter” immediately following the “Management’s Discussion and Analysis of Financial Condition and Results of Operations”, filed as Exhibit 13 to this report. The balance of the information required by item 201 of Regulation S-K is omitted in accordance with the regulatory relief available to smaller reporting companies under applicable SEC disclosure rules, as amended in 2018 Release Nos. 33-10513 and 34-83550.\nThe following table provides information as to the purchases of the Company’s common stock during the three months ended December 31, 2020, by the Company or by any affiliated purchaser (as defined in SEC Rule 10b-18). During the monthly per

In [99]:
## ---------------------- FOR YEAR 2019 - cik 88121 -----------------
query = "Does the company pay dividends on its common stock?"
year = 2019

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
Yes.


* Actual Answer : Yes

In [100]:
# Show top chunk on which LLM answer is based on
top_chunks["chunk_text"].iloc[0]

'Item 5. Market for Registrant’s Common Equity, Related Stockholder Matters and Issuer Purchases of Equity Securities\nSeaboard’s common stock is traded on the NYSE American under the symbol SEB. Seaboard had 2,308 stockholders of record of its common stock as of January 31, 2020.\nStock Performance Chart\nThe SEC requires a five-year comparison of stock performance for Seaboard with that of an appropriate broad equity market index and similar industry index. Since there is no single industry index to compare stock performance, the companies comprising the Dow Jones U.S. Food Products and Dow Jones U.S. Marine Transportation Industry indices (the “Peer Group”) were chosen as the second comparison.\nThe following graph shows a five-year comparison of cumulative total return for Seaboard Corporation, the NYSE American Index and the companies comprising the Peer Group, weighted by market capitalization for the five fiscal years commencing December 31, 2014 and ending December 31, 2019.\nT

In [101]:
## ---------------------- FOR YEAR 2018 -  cik 1566373 -----------------
query = "Does the company pay dividends on its common stock?"
year = 2018

# Run your function
answer, top_chunks = run_llm_query(query=query, year=year, template=template, model=model)

# Show the output
print("LLM Answer:")
print(answer)

LLM Answer:
No.


* Actual Answer : No

**Note** : None of the top retrieved chunks contain dividend payment information, and the LLM has accurately recognized that this detail is not present in the 10-K filing.

In [102]:
# Show top chunk on which LLM answer is based on
top_chunks

Unnamed: 0,filename,cik,year,section_name,chunk_index,chunk_text,augmented_text,embedding,similarity
0,1566373_2018.htm,1566373,2018,section_1,0,Item 1. Business\nOverview\nWe are a clinical-...,0 1566373\n1 1566373\n2 1566373\n3...,"[0.009221972897648811, -0.0379352867603302, -0...",-0.032054
1,1566373_2018.htm,1566373,2018,section_1,1,inarigivir 400mg in different patient populati...,0 1566373\n1 1566373\n2 1566373\n3...,"[0.009221972897648811, -0.0379352867603302, -0...",-0.032054
2,1566373_2018.htm,1566373,2018,section_1,2,clinical trial in cancer later in 2019. SB 112...,0 1566373\n1 1566373\n2 1566373\n3...,"[0.009221972897648811, -0.0379352867603302, -0...",-0.032054
