In [None]:
%pip install langchain openai wikipedia tiktoken neo4j python-dotenv transformers

In [1]:
import re, os, json
import pandas as pd
import boto3
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="wikipedia")

from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.document_loaders import WikipediaLoader
from langchain.embeddings import BedrockEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from langchain.document_loaders import TextLoader
from langchain.vectorstores.neo4j_vector import SearchType

from py2neo import Graph
from neo4j import GraphDatabase
from graphdatascience import GraphDataScience

In [2]:
uri = "bolt://3.35.174.93:7687"
username = "neo4j"
password = "password"
graph = Graph(uri, auth=(username, password))
gds = GraphDataScience(
    uri,
    auth=(username, password),
    aura_ds=False
)

### 모듈 구성

In [6]:
boto3_bedrock = boto3.client('bedrock-runtime', region_name='us-east-1')
bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=boto3_bedrock, region_name='us-east-1')

# Neo4j 데이터 초기화
def init_graph_data():
    graph.run("MATCH (n) DETACH DELETE n")

def bert_len(text):
    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    tokens = tokenizer.encode(text, max_length=512, truncation=True)
    return len(tokens)

# wikipedia 문서 로드 및 chunk 단위로 Split
def chunk_document(query, chunk_size, chunk_overlap):
    raw_documents = WikipediaLoader(query=query, doc_content_chars_max=20000, load_max_docs=1).load()
    text_splitter = RecursiveCharacterTextSplitter(
              chunk_size = chunk_size,
              chunk_overlap  = chunk_overlap,
              length_function = bert_len,
              separators=['\n\n', '\n', ' ', ''],
          )
    documents = text_splitter.create_documents([raw_documents[0].page_content])
    return documents

def chunk_text(query, chunk_size, chunk_overlap):
    raw_documents = WikipediaLoader(query=query, doc_content_chars_max=20000, load_max_docs=1).load()
    text_splitter = RecursiveCharacterTextSplitter(
              chunk_size = chunk_size,
              chunk_overlap  = chunk_overlap,
              length_function = bert_len,
              separators=['\n\n', '\n', ' ', ''],
          )
    documents = text_splitter.split_text(raw_documents[0].page_content)
    return documents


# embedding을 graph에 로드
def vector_load(text):
    neo4j_vector = Neo4jVector.from_documents(
        text,
        bedrock_embeddings,
        url=uri,
        username=username,
        password=password
    )
    return neo4j_vector

# vector 검색 결과 확인
def search_context(neo4j_vector, question):
    vector_results = neo4j_vector.similarity_search(question, k=1)
    vector_result = vector_results[0].page_content
    return vector_result

### chunk size = 1000

In [7]:
init_graph_data()

query = "Amazon S3"
chunk_size = 1000
chunk_overlap = 30

docs = chunk_document(query, chunk_size, chunk_overlap)
long_vector = vector_load(docs)

In [8]:
question = "What are the different storage classes offered by Amazon S3? Please provide a list of all available storage classes."
print(search_context(long_vector, question))

Amazon S3 or Amazon Simple Storage Service is a service offered by Amazon Web Services (AWS) that provides object storage through a web service interface. Amazon S3 uses the same scalable storage infrastructure that Amazon.com uses to run its e-commerce network. Amazon S3 can store any type of object, which allows uses like storage for Internet applications, backups, disaster recovery, data archives, data lakes for analytics, and hybrid cloud storage. AWS launched Amazon S3 in the United States on March 14, 2006, then in Europe in November 2007.


== Technical details ==


=== Design ===
Amazon S3 manages data with an object storage architecture which aims to provide scalability, high availability, and low latency with high durability. The basic storage units of Amazon S3 are objects which are organized into buckets. Each object is identified by a unique, user-assigned key. Buckets can be managed using the console provided by Amazon S3, programmatically with the AWS SDK, or the REST ap

### chunk size = 200

In [9]:
init_graph_data()

query = "Amazon S3"
chunk_size = 200
chunk_overlap = 30

docs = chunk_document(query, chunk_size, chunk_overlap)
short_vector = vector_load(docs)

<img src="./image/chunks.png">

#### 불완전한 정보

In [10]:
print(search_context(short_vector, question))

=== Amazon S3 storage classes ===
Amazon S3 offers nine different storage classes with different levels of durability, availability, and performance requirements.
Amazon S3 Standard is the default. It is general purpose storage for frequently accessed data.
Amazon S3 Express One Zone is a single-digit millisecond latency storage for frequently accessed data and latency-sensitive applications. It stores data only in one availability zone.
Amazon S3 Standard-Infrequent Access (Standard-IA) is designed for less frequently accessed data, such as backups and disaster recovery data.
Amazon S3 One Zone-Infrequent Access (One Zone-IA) performs like the Standard-IA, but stores data only in one availability zone.


### Chunk 단위 임베딩으로 그래프 로드

In [13]:
def chunks(xs, n=3):
    n = max(1, n)
    return [xs[i:i + n] for i in range(0, len(xs), n)]
    
def create_text_embedding_entries(query, chunk_size, chunk_overlap):
    docs = chunk_text(query, chunk_size, chunk_overlap)
    service_name = query
    res = []    
    seq_id = -1
    
    for d in chunks(docs):
        embeddings = bedrock_embeddings.embed_documents(d)
        for i in range(len(d)):
            seq_id += 1
            res.append({'name': service_name,
                        'seqId': seq_id,
                        'contextId': service_name + str(seq_id),  # unique 
                        'textEmbedding': embeddings[i],  # chunked
                        'text': d[i]  })
    return res

In [14]:
query = 'Amazon S3'
vector = create_text_embedding_entries(query, chunk_size, chunk_overlap)
pd.DataFrame(vector)

Unnamed: 0,name,seqId,contextId,textEmbedding,text
0,Amazon S3,0,Amazon S30,"[0.9296875, -0.48632812, 0.61328125, -0.230468...",Amazon S3 or Amazon Simple Storage Service is ...
1,Amazon S3,1,Amazon S31,"[0.6953125, 0.47460938, -0.26757812, 0.5429687...",=== Design ===
2,Amazon S3,2,Amazon S32,"[0.35351562, -0.29492188, 0.5625, -0.13378906,...",Amazon S3 manages data with an object storage ...
3,Amazon S3,3,Amazon S33,"[0.07080078, -0.28125, 0.5546875, -0.4296875, ...",The Amazon AWS authentication mechanism allows...
4,Amazon S3,4,Amazon S34,"[0.09277344, -0.31640625, 0.4296875, -0.455078...",=== Amazon S3 storage classes ===\nAmazon S3 o...
5,Amazon S3,5,Amazon S35,"[0.46679688, -0.48828125, 0.6875, -0.10107422,...",Amazon S3 Intelligent-Tiering moves objects au...
6,Amazon S3,6,Amazon S36,"[0.2578125, -0.20019531, 0.62890625, -0.470703...",=== File size limits ===\nAn object in S3 can ...
7,Amazon S3,7,Amazon S37,"[0.62890625, -0.40429688, 0.55859375, -0.31640...",=== Notable users ===\nPhoto hosting service S...
8,Amazon S3,8,Amazon S38,"[0.6015625, -0.578125, 0.21289062, -0.16992188...","Reddit is hosted on Amazon S3.\nBitcasa, and T..."
9,Amazon S3,9,Amazon S39,"[0.6171875, -0.47460938, 0.70703125, -0.371093...",=== S3 API and competing services ===\nThe bro...


### -임베딩 Vector 로드

In [15]:
def update_embeddings(emb):
    total = len(emb)
    count = 0
    for d in chunks(emb, 100):
        gds.run_cypher('''
        UNWIND $records AS record
        MERGE(s:Service {name:record.name})
        CREATE(c:Chunk {chunkid:record.contextId, seqid:record.seqId, text:record.text})
        MERGE(s)-[:CHUNKED]->(c)
        with c, record
        CALL db.create.setVectorProperty(c, 'embedding', record.textEmbedding)
        YIELD node
        RETURN distinct 'done'
        ''', params = {'records':d})
        count += len(d)

In [16]:
init_graph_data()
update_embeddings(vector)

<img src="./image/embed.png">

In [17]:
from langchain.chains import GraphCypherQAChain
from langchain.chat_models import BedrockChat
from langchain.graphs import Neo4jGraph
from langchain.prompts.prompt import PromptTemplate
from langchain.schema import StrOutputParser
from langchain.memory import ConversationBufferMemory
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import Bedrock

### -Without RAG
학습 시점에 제공된 Public 정보를 활용. 

Outdated 된 정보를 제공하거나 Hallucination 발생할 수 있음.

In [18]:
boto3_bedrock = boto3.client('bedrock-runtime', region_name='us-east-1')
llm = Bedrock(model_id="anthropic.claude-v2:1", client=boto3_bedrock, model_kwargs={'max_tokens_to_sample':10240, "temperature": 0})

answer = llm("What are the different storage classes offered by Amazon S3? Please provide a list of all available storage classes.")
print(answer)

 Amazon S3 offers the following storage classes:

- S3 Standard - High durability and availability for frequently accessed data. This is the default storage class.

- S3 Standard-Infrequent Access (S3 Standard-IA) - For data that is less frequently accessed but requires high availability when needed. Lower storage price than S3 Standard but charges retrieval fees.

- S3 One Zone-Infrequent Access (S3 One Zone-IA) - Same as S3 Standard-IA but stores data in a single Availability Zone. Lower cost but less availability guarantees.

- S3 Intelligent-Tiering - Automatically moves data between four access tiers based on changing access patterns to optimize costs.

- S3 Glacier Instant Retrieval - Low cost storage for archiving with milliseconds retrieval time.

- S3 Glacier Flexible Retrieval - Archival storage where data retrieval time ranges from minutes to hours. Lowest cost.

- S3 Glacier Deep Archive - Lowest cost storage class for long term archival with retrieval times of 12 hours.

-

### -Vector 검색으로 Context 추출

embedding을 검색하기 위한 'vector' 인덱스가 자동으로 생성되어 있으며, 인덱스를 활용해 빠르게 검색합니다

In [19]:
vector_search = """
WITH $embedding AS e
CALL db.index.vector.queryNodes('vector', $k, e) yield node, score
RETURN node.text AS result
"""

In [20]:
graph_instance = Neo4jGraph(url=uri, username=username, password=password)

question = "What are the different storage classes offered by Amazon S3? Please provide a list of all available storage classes."
embedding = bedrock_embeddings.embed_query(question)
context = graph_instance.query(
    vector_search, {'embedding': embedding, 'k': 3})
context = [el['result'] for el in context]

In [21]:
print(context)

['=== Amazon S3 storage classes ===\nAmazon S3 offers nine different storage classes with different levels of durability, availability, and performance requirements.\nAmazon S3 Standard is the default. It is general purpose storage for frequently accessed data.\nAmazon S3 Express One Zone is a single-digit millisecond latency storage for frequently accessed data and latency-sensitive applications. It stores data only in one availability zone.\nAmazon S3 Standard-Infrequent Access (Standard-IA) is designed for less frequently accessed data, such as backups and disaster recovery data.\nAmazon S3 One Zone-Infrequent Access (One Zone-IA) performs like the Standard-IA, but stores data only in one availability zone.', 'Amazon S3 manages data with an object storage architecture which aims to provide scalability, high availability, and low latency with high durability. The basic storage units of Amazon S3 are objects which are organized into buckets. Each object is identified by a unique, user

### -답변 생성

In [22]:
prompt_template = """
You are a helpful, respectful, and honest assistant, dedicated to providing valuable and accurate information.
Guidance for answers below
    Answer the question only using the in the context given below, and not with the prior knowledge.
    If you don't see answer in the context just Reply "Sorry , the answer is not in the context so I don't know".

Now read this context and answer the question. 
{context}

Based on the provided context above and information from the retriever source, provide a detailed answer to the below question
{question}

If the information is not available in the context , respond with "don't know."

Assistant: """

prompt = PromptTemplate.from_template(prompt_template)

### -불완전한 답변

In [23]:
answer = llm(prompt.format(context=context, question=question))
print(answer)

 Based on the context provided, the different storage classes offered by Amazon S3 are:

- Amazon S3 Standard
- Amazon S3 Express One Zone  
- Amazon S3 Standard-Infrequent Access (Standard-IA)
- Amazon S3 One Zone-Infrequent Access (One Zone-IA)

The context states that "Amazon S3 offers nine different storage classes with different levels of durability, availability, and performance requirements." However, it only lists out the four storage classes mentioned above. Since the full list of nine storage classes is not provided in the context, I do not have enough information to provide the complete list.


### -Parent Document Retrieval

In [26]:
vector_search = """
WITH $embedding AS e
CALL db.index.vector.queryNodes('vector', $k, e) yield node, score
MATCH (s:Service)-[:CHUNKED]->(node)
WITH s
MATCH (s)-[:CHUNKED]->(docs:Chunk)
WITH s, docs
ORDER BY docs.seqid ASC
RETURN s AS service, COLLECT(docs.text) AS result
"""

In [27]:
graph_instance = Neo4jGraph(url=uri, username=username, password=password)

question = "What are the different storage classes offered by Amazon S3? Please provide a list of all available storage classes."
embedding = bedrock_embeddings.embed_query(question)
context = graph_instance.query(
    vector_search, {'embedding': embedding, 'k': 1})
context = [el['result'] for el in context]

In [28]:
context

[['Amazon S3 or Amazon Simple Storage Service is a service offered by Amazon Web Services (AWS) that provides object storage through a web service interface. Amazon S3 uses the same scalable storage infrastructure that Amazon.com uses to run its e-commerce network. Amazon S3 can store any type of object, which allows uses like storage for Internet applications, backups, disaster recovery, data archives, data lakes for analytics, and hybrid cloud storage. AWS launched Amazon S3 in the United States on March 14, 2006, then in Europe in November 2007.\n\n\n== Technical details ==',
  '=== Design ===',
  'Amazon S3 manages data with an object storage architecture which aims to provide scalability, high availability, and low latency with high durability. The basic storage units of Amazon S3 are objects which are organized into buckets. Each object is identified by a unique, user-assigned key. Buckets can be managed using the console provided by Amazon S3, programmatically with the AWS SDK, 

### -완전한 답변

In [29]:
answer = llm(prompt.format(context=context, question=question))
print(answer)

 According to the context, Amazon S3 offers the following storage classes:

1. Amazon S3 Standard 
2. Amazon S3 Express One Zone
3. Amazon S3 Standard-Infrequent Access (Standard-IA)  
4. Amazon S3 One Zone-Infrequent Access (One Zone-IA)
5. Amazon S3 Intelligent-Tiering 
6. Amazon S3 on Outposts  
7. Amazon S3 Glacier Instant Retrieval
8. Amazon S3 Glacier Flexible Retrieval  
9. Amazon S3 Glacier Deep Archive


### Topology 구성

In [79]:
# ----- Run the below Cypher Query ----
# MATCH (s1:Service {name: 'Amazon S3'})
# MERGE (s2:Service {name: 'Amazon EBS'})
# MERGE (ec2:Service {name: 'Amazon EC2'})
# MERGE (ec2)-[:hasStorageType]->(s1)
# MERGE (ec2)-[:hasStorageType]->(s2)

<img src="./image/query1.png">
<img src="./image/topo1.png">

In [31]:
query = 'Amazon EC2'
vector = create_text_embedding_entries(query, chunk_size, chunk_overlap)
pd.DataFrame(vector)

Unnamed: 0,name,seqId,contextId,textEmbedding,text
0,Amazon EC2,0,Amazon EC20,"[0.29492188, 0.07910156, 0.87890625, -0.193359...",Amazon Elastic Compute Cloud (EC2) is a part o...
1,Amazon EC2,1,Amazon EC21,"[0.10253906, 0.33398438, 0.55859375, -0.208984...",Amazon announced a limited public beta test of...
2,Amazon EC2,2,Amazon EC22,"[0.16796875, -0.032714844, 0.7421875, -0.27734...","a service level agreement for EC2,\nMicrosoft ..."
3,Amazon EC2,3,Amazon EC23,"[0.024902344, -0.1328125, 0.46875, -0.28710938...","== Instance types ==\nInitially, EC2 used Xen ..."
4,Amazon EC2,4,Amazon EC24,"[-0.328125, 0.068847656, 0.21484375, -0.102050...","Compute Optimized: C5, C5n, C4\nMemory Optimiz..."
5,Amazon EC2,5,Amazon EC25,"[-0.16894531, 0.047607422, 0.69921875, 0.14550...","=== Cost ===\nAs of April 2018, Amazon charged..."
6,Amazon EC2,6,Amazon EC26,"[-0.3046875, -0.01940918, 0.80859375, -0.11718...",=== Free tier ===\nAs of December 2010 Amazon ...
7,Amazon EC2,7,Amazon EC27,"[-0.51171875, 0.06982422, 0.45898438, -0.15820...",=== Reserved instances ===\nReserved instances...
8,Amazon EC2,8,Amazon EC28,"[-0.20898438, -0.06689453, 0.30859375, -0.3652...","In September 2016, AWS announced several enhan..."
9,Amazon EC2,9,Amazon EC29,"[-0.0026397705, 0.24902344, 0.546875, -0.22851...",=== Spot instances ===\nCloud providers mainta...


In [35]:
update_embeddings(vector)

In [33]:
query = 'Amazon EBS'
ebs_vector = create_text_embedding_entries(query, chunk_size, chunk_overlap)
pd.DataFrame(ebs_vector)

Unnamed: 0,name,seqId,contextId,textEmbedding,text
0,Amazon EBS,0,Amazon EBS0,"[0.09326172, -0.18554688, 0.9140625, -0.324218...",Amazon Elastic Block Store (EBS) provides raw ...
1,Amazon EBS,1,Amazon EBS1,"[0.25585938, -0.20117188, 0.703125, -0.4433593...","== Use case ==\nIn a typical use case, using E..."
2,Amazon EBS,2,Amazon EBS2,"[0.28515625, -0.09863281, 0.63671875, -0.74218...",== Volume types ==\nThe following table shows ...
3,Amazon EBS,3,Amazon EBS3,"[0.29882812, -0.13964844, 0.81640625, -0.38281...",== Features ==\nAmazon EBS provides several fe...
4,Amazon EBS,4,Amazon EBS4,"[0.87109375, -0.39453125, 0.99609375, -0.20800...",== See also ==\nAmazon Elastic File System (EF...


In [34]:
update_embeddings(ebs_vector)

<img src="./image/topo2.png">

### -Vector Similarity 검색

In [41]:
vector_search = """
WITH $embedding AS e
CALL db.index.vector.queryNodes('vector', $k, e) yield node, score
MATCH (s:Service)-[:CHUNKED]->(node)
WITH s
MATCH (s)-[:CHUNKED]->(docs:Chunk)
WITH s, docs
ORDER BY docs.seqid ASC
RETURN s AS service, COLLECT(docs.text) AS result
"""

In [105]:
question = """
Tell me about the different storage types available for Amazon EC2. 
I need detailed information on the characteristics and use cases for each storage type. 
Additionally, I want to know which storage type is best suited for perfgormance.
"""
embedding = bedrock_embeddings.embed_query(question)
context = graph_instance.query(
    vector_search, {'embedding': embedding, 'k': 1})
context = [el['result'] for el in context]

In [103]:
context

[['Amazon Elastic Compute Cloud (EC2) is a part of Amazon.com\'s cloud-computing platform, Amazon Web Services (AWS), that allows users to rent virtual computers on which to run their own computer applications. EC2 encourages scalable deployment of applications by providing a web service through which a user can boot an Amazon Machine Image (AMI) to configure a virtual machine, which Amazon calls an "instance", containing any software desired. A user can create, launch, and terminate server-instances as needed, paying by the second for active servers – hence the term "elastic". EC2 provides users with control over the geographical location of instances that allows for latency optimization and high levels of redundancy. In November 2010, Amazon switched its own retail website platform to EC2 and AWS.\n\n\n== History ==',
  'Amazon announced a limited public beta test of EC2 on August 25, 2006, offering access on a first-come, first-served basis.\nAmazon added two new instance types (Lar

### -Context 기반 답변

In [104]:
vector_answer = llm(prompt.format(context=context, question=question))
print(vector_answer)

 Based on the context provided, here is the detailed information on the different storage types available for Amazon EC2:

The two main storage options for EC2 instances are:

1. Instance-store volumes:
- These are temporary storage volumes that persist only during the lifetime of the EC2 instance. 
- When the EC2 instance is stopped or terminated, the data on instance-store volumes is lost.
- Use cases: Good for temporary data like buffers, caches, scratch data etc. Not suitable for persistent data.

2. Elastic Block Store (EBS) Volumes:  
- EBS provides persistent block-level storage volumes that can be attached to EC2 instances. 
- EBS volumes persist independently from the EC2 instance, and act like regular hard drives.
- Can be up to 16TB in size.
- Data on EBS volumes persists when EC2 instance is stopped/terminated.
- Use cases: Great for persistent storage of data like databases, application data etc.

Some key characteristics of EBS:
- Volumes can be detached from one EC2 inst

### -Graph QAChain 활용

In [96]:
graph_instance = Neo4jGraph(url=uri, username=username, password=password)
print(graph_instance.schema)


        Node properties are the following:
        [{'labels': 'Chunk', 'properties': [{'property': 'seqid', 'type': 'INTEGER'}, {'property': 'embedding', 'type': 'LIST'}, {'property': 'text', 'type': 'STRING'}, {'property': 'chunkid', 'type': 'STRING'}]}, {'labels': 'Service', 'properties': [{'property': 'name', 'type': 'STRING'}]}]
        Relationship properties are the following:
        []
        The relationships are the following:
        ['(:Service)-[:CHUNKED]->(:Chunk)', '(:Service)-[:hasStorageType]->(:Service)']
        


In [111]:
CYPHER_GENERATION_TEMPLATE = """

You are an excellent Assistant for generating Cypher Query Language for graph searches. 
You will be exploring a graph that represents the connections between AWS services.

<instruction>
Guidance for answers below
Each Service has relationships with other Services.
 - (s1:Service)-[:]->(s2:Service)
Each Service node is connected to Chunks containing detailed information about the service:
 - (s1:Service)-[:CHUNKED]->(c:Chunk)
Each Chunk has detailed information about the service in its text.
 - (c:Chunk {{c.text}})
 - RETURN c.text
</instruction>

<example>
Here are a few examples of generated Cypher statements for particular questions:
# Question :
Explain storage types does Amazon EC2 have.
# Generated Cypher :
MATCH (ec2:Service {{name:"Amazon EC2"}})-[:hasStorageType]->(s:Service)
WITH s
MATCH (s:Service)-[:CHUNKED]->(c:Chunk)
RETURN COLLECT(DISTINCT c.text) AS Text

# Question :
Explain the cost of each storage type of Amazon EC2.
# Generated Cypher :
MATCH (ec2:Service {{name:"Amazon EC2"}})-[:hasStorageType]->(s:Service)
WITH s
MATCH (s:Service)-[:CHUNKED]->(c:Chunk)
WHERE c.text CONTAINS "cost"
RETURN COLLECT(DISTINCT c.text) AS Text
</example>

Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
<schema>
{schema}
</schema>

The question is: 
{question} 
"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=['schema', 'question'], validate_template=True, template=CYPHER_GENERATION_TEMPLATE
)

In [112]:
chat_llm = BedrockChat(model_id="anthropic.claude-v2:1", model_kwargs={"temperature": 0}, region_name='us-east-1')

chain = GraphCypherQAChain.from_llm(
    chat_llm,
    graph=graph_instance, 
    cypher_prompt=CYPHER_GENERATION_PROMPT,
    verbose=True,
    validate_cypher=True,
    return_direct=True
)

def chat(question):
    r = chain(question)
    summary_prompt_tpl = f"""Human: 
    Fact: {json.dumps(r['result'])}

    * Describe the above fact as if you are answering this question "{r['query']}"
    * Don't omit any information related to the question
    * When the fact is not empty, assume the question is valid and the answer is true
    * Do not return helpful or extra text or apologies
    * List the results in rich text format if there are more than one results
    Assistant:
    """
    return llm(summary_prompt_tpl)

def chat_response(input_text):
    try:
        return chat(input_text)
    except:
        return "I'm sorry, there was an error retrieving the information you requested."

In [114]:
chat_answer = chat(question) 
print(chat_answer)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (ec2:Service {name:"Amazon EC2"})-[:hasStorageType]->(storage:Service)
WITH storage 
MATCH (storage)-[:CHUNKED]->(chunk:Chunk)
WHERE chunk.text CONTAINS "storage" OR chunk.text CONTAINS "use" OR chunk.text CONTAINS "performance"
RETURN COLLECT(DISTINCT chunk.text) AS text
[0m

[1m> Finished chain.[0m
 Based on the facts provided, here are the details on the different storage types available for Amazon EC2:

<b>Amazon Elastic Block Store (EBS)</b>

EBS provides raw block-level storage that can be attached to Amazon EC2 instances. It offers two major categories of storage options:

1. SSD-backed storage for transactional workloads like databases and boot volumes. Performance depends primarily on IOPS. Volume types include:

- General Purpose SSD (gp2): General purpose SSD volume that balances price and performance. 
- Provisioned IOPS SSD (io1): Highest performance SSD volume designed for missio

### -정보를 통합해서 답변

In [131]:
def summarize_answer(question, fact1, fact2):
    
    summary_prompt_tpl = f"""Human: 
    <fact1> 
    {fact1}
    </fact1>
    <fact2>
    {fact2}
    </fact2>
    
    * Describe the above fact as if you are answering this question "{question}"
    * Combine the multiple facts related to the question.
    * When the fact is not empty, assume the question is valid and the answer is true
    * Do not return helpful or extra text or apologies
    * List the results in rich text format if there are more than one results
    Assistant:
    """
    return llm(summary_prompt_tpl)

In [132]:
facts = {
    "fact1": vector_answer,
    "fact2": chat_answer
}
final_answer = summarize_answer(question, vector_answer, chat_answer)
print(final_answer)

 Here is the detailed information on the different storage types available for Amazon EC2:

<b>Instance Store Volumes</b>
- Temporary block-level storage volumes
- Persist only during the EC2 instance lifetime 
- Data lost when instance is stopped or terminated
- Use Cases: 
    - Buffers, caches, scratch data
    - Not suitable for persistent data

<b>Elastic Block Store (EBS) Volumes</b> 
- Persistent block storage volumes 
- Data persists independently from EC2 instance
- Can be up to 16 TB in size
- Data persists when instance is stopped/terminated
- Use Cases:
    - Databases, application data 
    - Great for persistent storage

<b>EBS Volume Types</b>
1. SSD-backed storage 
    - General Purpose SSD (gp2): Balances price and performance
    - Provisioned IOPS SSD (io1): Highest performance SSD for mission-critical applications
2. Disk-backed storage
    - Throughput Optimized HDD (st1): Frequently accessed, throughput intensive workloads 
    - Cold HDD (sc1): Less frequently ac