### Loading text file (Parsed PDF into .txt file)

In [9]:
from langchain_community.document_loaders import UnstructuredMarkdownLoader
import os

dir_path = './Reports'

Data_Book = []

for filename in os.listdir(dir_path):
    if filename.endswith('.md'):
        file_path = os.path.join(dir_path, filename)
        md_loader = UnstructuredMarkdownLoader(file_path)
        Data_Book.extend(md_loader.load_and_split())

In [10]:
len(Data_Book)

890

In [3]:
Data_Book[:2]

[Document(metadata={'source': './Reports/2022 Q3 NVDA.md'}, page_content='UNITED STATES SECURITIES AND EXCHANGE COMMISSION\n\nWashington, D.C. 20549\n\nFORM 10-Q\n\n☒ QUARTERLY REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n\nFor the quarterly period ended October 30, 2022\n\nOR\n\n☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n\nCommission file number: 0-23985\n\nNVIDIA\n\n(Exact name of registrant as specified in its charter) NVIDIA CORPORATION\n\nDelaware 94-3177549 (State or Other Jurisdiction of Incorporation or Organization) (I.R.S. Employer Identification No.)\n\n2788 San Tomas Expressway\n\nSanta Clara, California 95051\n\n(408) 486-2000\n\n(Address, including zip code, and telephone number, including area code, of principal executive offices)\n\nN/A\n\n(Former name, former address and former fiscal year if changed since last report)\n\nSecurities registered pursuant to Section 12(b) of the Act:\n\nTit

In [4]:
Data_Book[0].page_content

'UNITED STATES SECURITIES AND EXCHANGE COMMISSION\n\nWashington, D.C. 20549\n\nFORM 10-Q\n\n☒ QUARTERLY REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n\nFor the quarterly period ended October 30, 2022\n\nOR\n\n☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n\nCommission file number: 0-23985\n\nNVIDIA\n\n(Exact name of registrant as specified in its charter) NVIDIA CORPORATION\n\nDelaware 94-3177549 (State or Other Jurisdiction of Incorporation or Organization) (I.R.S. Employer Identification No.)\n\n2788 San Tomas Expressway\n\nSanta Clara, California 95051\n\n(408) 486-2000\n\n(Address, including zip code, and telephone number, including area code, of principal executive offices)\n\nN/A\n\n(Former name, former address and former fiscal year if changed since last report)\n\nSecurities registered pursuant to Section 12(b) of the Act:\n\nTitle of each class Trading Symbol(s) Name of each exchange on which registe

### LLM using Azure OpenAI

In [1]:
from langchain_openai import AzureChatOpenAI
from dotenv import load_dotenv
load_dotenv() # Load AzureAI and Langsmith credentials

llm = AzureChatOpenAI(
    azure_deployment="gpt-35-turbo",  
    api_version="2024-08-01-preview", 
    temperature=0.7,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

### Graph and GraphIndex Creation

In [None]:
import os
import pickle
from tqdm import tqdm
from langchain.indexes import GraphIndexCreator

checkpoint_file = 'processed_docs_checkpoint.pkl'

if os.path.exists(checkpoint_file):
    with open(checkpoint_file, 'rb') as f:
        processed_indexes = pickle.load(f)
else:
    processed_indexes = []

start_index = len(processed_indexes) # Resume processing docs from start_index

index_creator = GraphIndexCreator(llm=llm)
for i, doc in enumerate(tqdm(Data_Book[start_index:], desc="Processing Documents", initial=start_index, total=len(Data_Book))):
    index = index_creator.from_text(doc.page_content)
    processed_indexes.append(index)

    # Save progress every 1000 documents (adjust as needed)
    if len(processed_indexes) % 10 == 0 or len(processed_indexes) == len(Data_Book):
        with open(checkpoint_file, 'wb') as f:
            pickle.dump(processed_indexes, f)
    
graphs = processed_indexes

### compose(G, H) in networkx
Compose graph G with H by combining nodes and edges into a single graph.

Returns
A new graph with the same type as G

In [13]:
import networkx as nx

graph_nx = graphs[0]._graph
for g in graphs[1:]:
    graph_nx = nx.compose(graph_nx, g._graph)

In [14]:
from langchain.graphs.networkx_graph import NetworkxEntityGraph

graph = NetworkxEntityGraph(graph_nx)
graph

<langchain_community.graphs.networkx_graph.NetworkxEntityGraph at 0x7f1ad50c8940>

## Drawing and Saving the Graph

In [None]:
# graph.draw_graphviz(path="Data_Book.pdf", prog='fdp')

In [None]:
# import pickle
# pickle.dump(graph, open('Sec-10Q-Graph.pickle', 'wb'))

In [2]:
import pickle
graph = pickle.load(open('Sec-10Q-Graph.pickle', 'rb'))

### GraphQAChain

In [3]:
from langchain.chains import GraphQAChain

chain = GraphQAChain.from_llm(
    llm=llm, 
    graph=graph, 
    verbose=True
)

In [4]:
question = """
How has Apple's total net sales changed over time?
"""

chain.invoke(question)



[1m> Entering new GraphQAChain chain...[0m
Entities Extracted:
[32;1m[1;3mApple[0m
Full Context:
[32;1m[1;3mApple withdrew from the case[0m

[1m> Finished chain.[0m


{'query': "\nHow has Apple's total net sales changed over time?\n",
 'result': "I don't know."}

In [8]:
question = """
Can any trends be identified in Apple's Services segment revenue over the reported periods?
"""

chain.invoke(question)



[1m> Entering new GraphQAChain chain...[0m
Entities Extracted:
[32;1m[1;3mApple, Services[0m
Full Context:
[32;1m[1;3mApple withdrew from the case
Services net sales increased during the third quarter of 2023
Services $ 20,907 $ 19,821
Services Services net sales increased
Services $ 14,842 $ 29,551
Services 71.0 % 72.6 %
Services are a part of Expansion
Services is 20,766
Services is 19,516
Services is 6,057
Services is 5,393
Services net sales $20,766
Services compared to same quarter
Services due primarily to higher net sales
Services dollar in millions $ 14,709
Services dollar in millions $ 14,123[0m

[1m> Finished chain.[0m


{'query': "\nCan any trends be identified in Apple's Services segment revenue over the reported periods?\n",
 'result': "Yes, the trend in Apple's Services segment revenue over the reported periods is an increase."}

In [9]:
question = """
Are there any notable changes in Apple's liquidity position or cash flows as reported in these 10-Qs?
"""

chain.invoke(question)



[1m> Entering new GraphQAChain chain...[0m
Entities Extracted:
[32;1m[1;3mApple[0m
Full Context:
[32;1m[1;3mApple withdrew from the case[0m

[1m> Finished chain.[0m


{'query': "\nAre there any notable changes in Apple's liquidity position or cash flows as reported in these 10-Qs?\n",
 'result': "I don't know."}