In [1]:
from PIL import Image
import io
import base64
from typing import Any, Dict, List, Optional
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core.query_pipeline import QueryPipeline, InputComponent, ArgPackComponent
from llama_index.core.prompts import PromptTemplate
from llama_index.llms.openai import OpenAI
from llama_index.postprocessor.colbert_rerank import ColbertRerank
from llama_index.core.llms import ChatMessage
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.query_pipeline import CustomQueryComponent
from llama_index.core.schema import NodeWithScore
from pydantic import BaseModel
from llama_index.core.bridge.pydantic import Field
from llama_index.core.output_parsers import PydanticOutputParser

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from typing import Dict, Any

class AnswerFormat(BaseModel):
    """Object representing a single knowledge pdf file."""
    answer: str = "None"
    
    @classmethod
    def schema(cls, by_alias: bool = True) -> Dict[str, Any]:
        schema = super().model_json_schema(by_alias)
        properties = schema.get("properties", {})

        # Manually adding descriptions
        properties["answer"]["description"] = "Your Answer to the given query"

        return schema

In [3]:
storage_context = StorageContext.from_defaults(persist_dir="./index/piazza")
index = load_index_from_storage(storage_context)

In [38]:
from llama_index.core.output_parsers import PydanticOutputParser

input_component = InputComponent()
output_parser = PydanticOutputParser(AnswerFormat)
prompt_str = """\
You are given a context with all piazza posts and answers of a certain course.
Answer the following questions: {query_str}
"""
json_prompt_str = output_parser.format(prompt_str)
llm = OpenAI(
    model="gpt-4o",
    temperature=0.2,
)
retriever = index.as_retriever(similarity_top_k=10)
reranker = ColbertRerank(top_n=3, keep_retrieval_score=True)
DEFAULT_CONTEXT_PROMPT = json_prompt_str + (
    "Here is some context that may be relevant:\n"
    "-----\n"
    "{node_context}\n"
    "-----\n"
)
output_parser = PydanticOutputParser(AnswerFormat)


In [5]:
print(json_prompt_str)

You are given a context with all piazza posts and answers of a certain course.
Answer the following questions: {query_str}



Here's a JSON schema to follow:
{{"description": "Object representing a single knowledge pdf file.", "properties": {{"answer": {{"default": "None", "title": "Answer", "type": "string", "description": "Your Answer to the given query"}}}}, "title": "AnswerFormat", "type": "object"}}

Output a valid JSON object but do not repeat the schema.



In [6]:
print(DEFAULT_CONTEXT_PROMPT)

You are given a context with all piazza posts and answers of a certain course.
Answer the following questions: {query_str}



Here's a JSON schema to follow:
{{"description": "Object representing a single knowledge pdf file.", "properties": {{"answer": {{"default": "None", "title": "Answer", "type": "string", "description": "Your Answer to the given query"}}}}, "title": "AnswerFormat", "type": "object"}}

Output a valid JSON object but do not repeat the schema.
Here is some context that may be relevant:
-----
{node_context}
-----



In [48]:
class Response(CustomQueryComponent):
    llm: OpenAI = Field(..., description="OpenAI LLM")
    system_prompt: Optional[str] = Field(
        default=None, description="System prompt to use for the LLM"
    )
    context_prompt: str = Field(
        default=DEFAULT_CONTEXT_PROMPT,
        description="Context prompt to use for the LLM",
    )

    def _validate_component_inputs(
        self, input: Dict[str, Any]
    ) -> Dict[str, Any]:
        return input

    @property
    def _input_keys(self) -> set:
        # Removed "chat_history" from the input keys
        return {"nodes", "query_str"}

    @property
    def _output_keys(self) -> set:
        return {"response"}

    def _prepare_context(
        self,
        # Removed chat_history parameter
        nodes: List[NodeWithScore],
        query_str: str,
    ) -> List[ChatMessage]:
        node_context = ""
        for idx, node in enumerate(nodes):
            print(node.metadata)
            print(node.text)
            node_text = node.get_content(metadata_mode="llm")
            node_context += f"Context Chunk {idx}:\n{node_text}\n\n"

        formatted_context = self.context_prompt.format(
            node_context=node_context, query_str=query_str
        )
        user_message = ChatMessage(role="user", content=formatted_context)

        # print(formatted_context)

        # Removed appending to chat_history
        context = [user_message]

        if self.system_prompt is not None:
            # Adjusted to use context instead of chat_history
            context = [
                ChatMessage(role="system", content=self.system_prompt)
            ] + context

        return context

    def _run_component(self, **kwargs) -> Dict[str, Any]:
        # Removed chat_history from kwargs
        nodes = kwargs["nodes"]
        query_str = kwargs["query_str"]

        prepared_context = self._prepare_context(
            # Adjusted call to _prepare_context
            nodes, query_str
        )
        print(prepared_context)
        response = self.llm.chat(prepared_context)
        return {"response": response}

    async def _arun_component(self, **kwargs: Any) -> Dict[str, Any]:
        # Removed chat_history from kwargs
        nodes = kwargs["nodes"]
        query_str = kwargs["query_str"]

        prepared_context = self._prepare_context(
            # Adjusted call to _prepare_context
            nodes, query_str
        )

        response = await self.llm.achat(prepared_context)

        return {"response": response}


In [49]:
response_component = Response(
    llm=llm,
    system_prompt=(
        "You are a Virtual Teaching Assistant. Answer questions in the style of a human TA."
        "If you can find the answer in the context, don't modify too much, if you are able to find multiple answers, structure them and summerize them."
    )
)

In [10]:
import pandas as pd

def pretty_print(df):
    return display(HTML(df.to_html().replace("\\n", "<br>")))

def visualize_retrieved_nodes(nodes) -> None:
    result_dicts = []
    for node in nodes:
        result_dict = {"Score": node.score, "Text": node.node.get_text()}
        result_dicts.append(result_dict)

    pretty_print(pd.DataFrame(result_dicts))

In [50]:
pipeline = QueryPipeline(
    modules={
        "input": input_component,
        "query_retriever": retriever,
        "reranker": reranker,
        "response_component": response_component
        # "output_parser": output_parser
    },
    verbose=True,
)

pipeline.add_link("input", "query_retriever", src_key="query_str")
pipeline.add_link("query_retriever", "reranker", dest_key="nodes")
pipeline.add_link(
    "input", "reranker", src_key="query_str", dest_key="query_str"
)
pipeline.add_link("reranker", "response_component", dest_key="nodes")
pipeline.add_link("input", "response_component", dest_key="query_str")
# pipeline.add_link("response_component", "output_parser")

In [51]:
response = pipeline.run(query_str="Connectivity issues with instances deployed on AWS")

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: Connectivity issues with instances deployed on AWS

[0m[1;3;38;2;155;135;227m> Running module query_retriever with input: 
input: Connectivity issues with instances deployed on AWS

[0m[1;3;38;2;155;135;227m> Running module reranker with input: 
query_str: Connectivity issues with instances deployed on AWS
nodes: [NodeWithScore(node=TextNode(id_='50df09cc-eede-450b-b142-38a400b85359', embedding=None, metadata={'filename': '/home/jason/coursistant/DR/txt/announcement.txt'}, excluded_embed_metadata_keys=[], excl...

[0m[1;3;38;2;155;135;227m> Running module response_component with input: 
query_str: Connectivity issues with instances deployed on AWS
nodes: [NodeWithScore(node=TextNode(id_='50df09cc-eede-450b-b142-38a400b85359', embedding=None, metadata={'filename': '/home/jason/coursistant/DR/txt/announcement.txt', 'retrieval_score': 0.8286686360507944}...

[0m{'filename': '/home/jason/coursistant/DR/txt/announc

In [42]:
for node in nodes:
    print(node.id_)
    print(node.node.get_content()[:120])
    print("reranking score: ", node.score)
    print("retrieval score: ", node.node.metadata["retrieval_score"])
    print("**********")

50df09cc-eede-450b-b142-38a400b85359
Announcement: Monday (12/04): 3:00 pm - 5:00 pm (Science and Engineering Library Room 209)

Wednesday (12/06): 8:00 pm -
reranking score:  0.7618685960769653
retrieval score:  0.8286686360507944
**********
f7c8213e-7268-4cb3-8d2e-a7298ec71a4d
=P

The reason why the professor said you have to delete 'one' to add 'the other' is that the two rules in the provided 
reranking score:  0.6718893051147461
retrieval score:  0.7533819370971339
**********
861a760f-d5cc-4f99-a5d5-bd375bcfd74f
Thanks.
Answer: You may try directly copying the ssh command from the aws ec2 instance webpage to make sure your command
reranking score:  0.6224735379219055
retrieval score:  0.7831712575785905
**********


In [52]:
print(response)

assistant: ```json
{
  "answer": "Many students have experienced connectivity issues with instances deployed on AWS. Here are some potential solutions and troubleshooting steps:

1. Ensure you are using Ubuntu 20 and the VyOS rolling version (rolling 1.3 or 1.3.3).
2. Verify that your VPC's route tables are set to 'main' and the target is set to 'any (0.0.0.0/0)'.
3. Check if your security rules allow TCP requests from any network.
4. Review the network (subnet) topology to ensure each instance is configured correctly.
5. Confirm whether the Elastic IP is correctly associated with the intended interface.
6. Double-check your SSH key and the connection method (use the connection command copied from the 'Connect' page on the AWS website).
7. If you suspect issues with the remote operating system, assign a public IP directly to a specific port when creating the EC2 instance and check if you can SSH/ping into it.
8. If you cannot SSH into the software router using a public IP, try connecti

In [47]:
from pyvis.network import Network

net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(pipeline.dag)
net.show("rag_dag.html")

rag_dag.html
