In [None]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
from langchain_ollama import OllamaLLM
import os



# Opportunity KG

## Input data

In [None]:
input_txt_folder = "../input/inputtxt"

# Import opportunity file 
for filename in os.listdir(input_txt_folder):
    if filename.endswith('opportunityfileexample.txt'):
        file_path = os.path.join(input_txt_folder, filename)
        print(f"Processing {filename}...")

        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
                print(type(content))
        except Exception as e:
            print(f"Error processing {filename}: {e}")

documents = [Document(page_content=content)]
display(documents)

Processing input_paper.txt...
<class 'str'>


[Document(metadata={}, page_content='Chapter 1\nIntroduction\n1.1\nBackground of Large Language Models (LLMs)\nLarge Language Models (LLMs) represent a significant leap in computational systems capable of under-\nstanding and generating human language. Building on traditional language models (LMs) like N-gram\nmodels [1], LLMs address limitations such as rare word handling, overfitting, and capturing complex\nlinguistic patterns. Notable examples, such as GPT-3 and GPT-4 [2], leverage the self-attention mecha-\nnism within Transformer architectures to efficiently manage sequential data and understand long-range\ndependencies. Key advancements include in-context learning for generating coherent text from prompts\nand Reinforcement Learning from Human Feedback (RLHF) [3] for refining models using human re-\nsponses. Techniques like prompt engineering, question-answering, and conversational interactions have\nsignificantly advanced the field of natural language processing (NLP) [4].\n1.2\

## LLM and Graph Transformer

In [28]:
llm = OllamaLLM(model="llama3.2:3b")
graph_transformer = LLMGraphTransformer(llm=llm, ignore_tool_usage=True)

In [29]:
graph = await graph_transformer.aconvert_to_graph_documents(documents)

In [30]:
print(graph)

[GraphDocument(nodes=[], relationships=[], source=Document(metadata={}, page_content='Chapter 1\nIntroduction\n1.1\nBackground of Large Language Models (LLMs)\nLarge Language Models (LLMs) represent a significant leap in computational systems capable of under-\nstanding and generating human language. Building on traditional language models (LMs) like N-gram\nmodels [1], LLMs address limitations such as rare word handling, overfitting, and capturing complex\nlinguistic patterns. Notable examples, such as GPT-3 and GPT-4 [2], leverage the self-attention mecha-\nnism within Transformer architectures to efficiently manage sequential data and understand long-range\ndependencies. Key advancements include in-context learning for generating coherent text from prompts\nand Reinforcement Learning from Human Feedback (RLHF) [3] for refining models using human re-\nsponses. Techniques like prompt engineering, question-answering, and conversational interactions have\nsignificantly advanced the fiel

### Saving the graph

In [13]:
import json

output_data = [
    {
        "page_content": doc.page_content,
        "metadata": doc.metadata
    }
    for doc in graph
]

with open("KG_fromtext.json", "w", encoding="utf-8") as f:
    json.dump(output_data, f, ensure_ascii=False, indent=2)

AttributeError: 'GraphDocument' object has no attribute 'page_content'

In [22]:
print(f"Nodes:{graph[0].nodes}")
print(f"Relationships:{graph[0].relationships}")

Nodes:[]
Relationships:[]


## Visualize the graph

In [None]:
from pyvis.network import Network

def visualize_graph(graph_documents):

    # Create network
    net = Network(height="1200px", width="100%", directed=True,
                      notebook=False, bgcolor="#222222", font_color="white")
    
    nodes = graph_documents[0].nodes
    relationships = graph_documents[0].relationships

    # Build lookup for valid nodes
    node_dict = {node.id: node for node in nodes}
    
    # Filter out invalid edges and collect valid node IDs
    valid_edges = []
    valid_node_ids = set()
    for rel in relationships:
        if rel.source.id in node_dict and rel.target.id in node_dict:
            valid_edges.append(rel)
            valid_node_ids.update([rel.source.id, rel.target.id])


    # Track which nodes are part of any relationship
    connected_node_ids = set()
    for rel in relationships:
        connected_node_ids.add(rel.source.id)
        connected_node_ids.add(rel.target.id)

    # Add valid nodes
    for node_id in valid_node_ids:
        node = node_dict[node_id]
        try:
            net.add_node(node.id, label=node.id, title=node.type, group=node.type)
        except:
            continue  # skip if error

    # Add valid edges
    for rel in valid_edges:
        try:
            net.add_edge(rel.source.id, rel.target.id, label=rel.type.lower())
        except:
            continue  # skip if error

    # Configure physics
    net.set_options("""
            {
                "physics": {
                    "forceAtlas2Based": {
                        "gravitationalConstant": -100,
                        "centralGravity": 0.01,
                        "springLength": 200,
                        "springConstant": 0.08
                    },
                    "minVelocity": 0.75,
                    "solver": "forceAtlas2Based"
                }
            }
            """)
        
    output_file = "knowledge_graph.html"
    net.save_graph(output_file)
    print(f"Graph saved to {os.path.abspath(output_file)}")

    # Try to open in browser
    try:
        import webbrowser
        webbrowser.open(f"file://{os.path.abspath(output_file)}")
    except:
        print("Could not open browser automatically")
        
# Run the function
visualize_graph(graph)