In [1]:
import os
from operator import itemgetter
from textwrap import dedent

import pandas as pd
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.runnables import chain, RunnablePassthrough, Runnable
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser


@chain
def rag_parser(docs) -> str:

    output = ""
    
    for doc in docs:
        output += f"Human: {doc.page_content}\nAI Assistant: {doc.metadata['Answer']}\n\n"
    
    return output


def build_standard_chat_prompt_template(kwargs) -> Runnable:
    messages = []
    
    for key in ['system', 'messages', 'human']:
        if kwargs.get(key):
            if key == 'system':
                system_content = kwargs['system']
                system_prompt = PromptTemplate(**system_content)
                message = SystemMessagePromptTemplate(prompt=system_prompt)
                messages.append(message)
    
            else:
                human_content = kwargs['human']
                human_prompt = PromptTemplate(**human_content)
                message = HumanMessagePromptTemplate(prompt=human_prompt)
                messages.append(message)
    
    chat_prompt = ChatPromptTemplate.from_messages(messages)
    
    return chat_prompt



# Prepare the vector database
df = pd.read_csv("immatics_faq.csv")

embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

documents = []

for idx, row in df.iterrows():
  document = Document(page_content=row['Question'],
             metadata={"Answer": row["Answer"]})
  documents.append(document)

vectorstore = FAISS.from_documents(documents, embedding=embedding)

retriever = vectorstore.as_retriever(search_type="similarity", 
                    search_kwargs={"k": 3})

  embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.chdir("../../")

from src.initialization import credential_init

credential_init()

In [3]:
model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="gpt-4o-mini", 
                   temperature=0 # a range from 0-2, the higher the value, the higher the `creativity`
                  )


system_template = "You are a helpful AI assisitant answering the question the best you can."
human_template = dedent("""
                     {text}
                     
                     context:
                     {context}
                     """
)

input_ = {"system": {"template": system_template},
      "human": {"template": human_template,
        "input_variables": ['text', 'context']}
      }

chat_prompt_template = build_standard_chat_prompt_template(input_)

pipeline_ = RunnablePassthrough.assign(context=itemgetter('text')|retriever|rag_parser) | chat_prompt_template

In [4]:
output = pipeline_.invoke({"text": "Tell me something about this company."})

In [5]:
# Assuming you have the object `chat_prompt_value`
formatted_string = "\n\n".join(
    f"{type(msg).__name__.replace('Message', '')}: {msg.content.strip()}"
    for msg in output.messages
)

print(formatted_string)

System: You are a helpful AI assisitant answering the question the best you can.

Human: Tell me something about this company.

context:
Human: Where is the company incorporated?
AI Assistant: We are incorporated under the laws of the Netherlands.

Human: When was the company founded?
AI Assistant: Immatics was founded in 2000 as a spin out from H.G. Rammensee’s laboratory at University Tübingen, Germany. In 2015, Immatics and MD Anderson Cancer Center launched Immatics US, Inc. in Houston Texas.

Human: Who are Immatics' independent auditors?
AI Assistant: PriceWaterhouseCoopers GmbH Wirtschaftsprüfungsgesellschaft (PwC)
Friedrich-Ebert-Anlage 35-37
60327 Frankfurt am Main


In [22]:
@chain
def create_prompt_str(output):

    formatted_string = "\n\n".join(
    f"{type(msg).__name__.replace('Message', '')}: {msg.content.strip()}"
    for msg in output.messages)

    return formatted_string

In [7]:
from langchain_core.output_parsers import StrOutputParser

pipeline_ = RunnablePassthrough.assign(context=itemgetter('text')|retriever|rag_parser) | chat_prompt_template | model | StrOutputParser()

output = pipeline_.invoke({"text": "Tell me something about this company."})

print(output)

Immatics is a biotechnology company that focuses on the development of innovative cancer immunotherapies. Founded in 2000 as a spin-off from H.G. Rammensee’s laboratory at the University of Tübingen in Germany, the company has made significant strides in the field of cancer treatment. In 2015, Immatics expanded its operations by launching Immatics US, Inc. in Houston, Texas, in collaboration with the MD Anderson Cancer Center, which is renowned for its cancer research and treatment. The company is incorporated under the laws of the Netherlands and is audited by PriceWaterhouseCoopers GmbH Wirtschaftsprüfungsgesellschaft (PwC).


## Create 10 alternative answers

Instead of asking the machine to ask 10 alternative questions based on the given data, we choose to reformulate the question such that 10 variations are generated.

In [8]:
from typing import List

from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser



class Question(BaseModel):

    name: str = Field(description="A reformulate question.")

class Output(BaseModel):

    name: List[Question] = Field(description="10 reformulated questions focusing on different field. Each of the question should be more specific")


output_parser = PydanticOutputParser(pydantic_object=Output)
format_instructions = output_parser.get_format_instructions()


system_template = "You are a helpful AI assisitant answering the question the best you can."
human_template = dedent("""
                     {text}

                     output format instruction: {format_instructions}
                     """
)

input_ = {"system": {"template": system_template},
          "human": {"template": human_template,
                    "input_variables": ['text'],
                    "partial_variables": {"format_instructions": format_instructions}}
         }

chat_prompt_template = build_standard_chat_prompt_template(input_)

questions_generation_pipepline = chat_prompt_template | model | output_parser

In [35]:
system_template = "You are a helpful AI assisitant answering the question the best you can."
human_template = dedent("""
                        {text}
                        
                        context:
                        {context}
                        """)

input_ = {"system": {"template": system_template},
          "human": {"template": human_template,
                    "input_variables": ['text', 'context']}
      }

chat_prompt_template = build_standard_chat_prompt_template(input_)

qa_pipeline = RunnablePassthrough.assign(context=itemgetter('text')|retriever|rag_parser) | chat_prompt_template | model | StrOutputParser()

In [36]:
text = "Tell me something about this company."

new_questions = questions_generation_pipepline.invoke({"text": text})

In [37]:
for new_question in new_questions.name:
    print(new_question.name)

What are the core products and services offered by this company?
How does this company differentiate itself from its competitors in the market?
What is the company's mission and vision statement?
Can you provide information about the company's founding and history?
What are the key markets or industries that this company operates in?
How does this company approach sustainability and corporate social responsibility?
What recent innovations or developments has this company introduced?
What is the company's organizational structure and leadership team?
How does this company engage with its customers and gather feedback?
What are the future growth plans or strategic goals for this company?


In [38]:
batches = [{"text": new_question.name} for new_question in new_questions.name]

In [39]:
answers = qa_pipeline.batch(batches)

In [40]:
answers

['Immatics is a biotechnology company that focuses on the development of innovative cancer immunotherapies. The core products and services offered by Immatics include:\n\n1. **T-cell Engager Therapies**: These are designed to harness the body’s immune system to target and destroy cancer cells.\n\n2. **Adoptive Cell Therapy**: This involves the extraction and modification of a patient’s own T-cells to enhance their ability to fight cancer.\n\n3. **Target Discovery and Validation**: Immatics utilizes its proprietary technology platforms to identify and validate novel tumor-associated antigens that can be targeted by immunotherapies.\n\n4. **Clinical Development**: The company is involved in the clinical development of its therapies, conducting trials to evaluate their safety and efficacy in treating various types of cancer.\n\n5. **Collaborations and Partnerships**: Immatics engages in partnerships with other organizations, including academic institutions and pharmaceutical companies, to

## Give it an extra step so we can extract the prompt of each of the generation questions, so the perplexity can be computed.

In [41]:
prompt_pipeline = RunnablePassthrough.assign(context=itemgetter('text')|retriever|rag_parser) | chat_prompt_template

qa_pipeline = RunnablePassthrough.assign(prompt=prompt_pipeline) | RunnablePassthrough.assign(answer=itemgetter("prompt") | model | StrOutputParser(),
                                                                                              prompt=itemgetter("prompt")|create_prompt_str)

answers = qa_pipeline.batch(batches)

In [42]:
answers[0]

{'text': 'What are the core products and services offered by this company?',
 'prompt': 'System: You are a helpful AI assisitant answering the question the best you can.\n\nHuman: What are the core products and services offered by this company?\n\ncontext:\nHuman: Where is the company incorporated?\nAI Assistant: We are incorporated under the laws of the Netherlands.\n\nHuman: When was the company founded?\nAI Assistant: Immatics was founded in 2000 as a spin out from H.G. Rammensee’s laboratory at University Tübingen, Germany. In 2015, Immatics and MD Anderson Cancer Center launched Immatics US, Inc. in Houston Texas.\n\nHuman: How is Immatics’ stock traded?\nAI Assistant: Our common stock is listed on the Nasdaq Global Market under the ticker symbol IMTX.',
 'answer': 'Immatics is a biotechnology company that focuses on the development of innovative cancer immunotherapies. The core products and services offered by Immatics include:\n\n1. **T-cell Engagers**: These are engineered prot

In [43]:
print(answers[0]['prompt'])

System: You are a helpful AI assisitant answering the question the best you can.

Human: What are the core products and services offered by this company?

context:
Human: Where is the company incorporated?
AI Assistant: We are incorporated under the laws of the Netherlands.

Human: When was the company founded?
AI Assistant: Immatics was founded in 2000 as a spin out from H.G. Rammensee’s laboratory at University Tübingen, Germany. In 2015, Immatics and MD Anderson Cancer Center launched Immatics US, Inc. in Houston Texas.

Human: How is Immatics’ stock traded?
AI Assistant: Our common stock is listed on the Nasdaq Global Market under the ticker symbol IMTX.


## Let's save the result in a file and evaluate the conditional perplexity 

In [44]:
data = [[answer['prompt'], answer['answer']] for answer in answers]

answer_df = pd.DataFrame(data=data, columns=['prompt', 'answer'])

In [45]:
answer_df

Unnamed: 0,prompt,answer
0,System: You are a helpful AI assisitant answer...,Immatics is a biotechnology company that focus...
1,System: You are a helpful AI assisitant answer...,Immatics differentiates itself from its compet...
2,System: You are a helpful AI assisitant answer...,To provide you with the company's mission and ...
3,System: You are a helpful AI assisitant answer...,Immatics was founded in 2000 as a spin-out fro...
4,System: You are a helpful AI assisitant answer...,Immatics operates primarily in the biotechnolo...
5,System: You are a helpful AI assisitant answer...,Immatics is committed to sustainability and co...
6,System: You are a helpful AI assisitant answer...,To find the most recent innovations or develop...
7,System: You are a helpful AI assisitant answer...,Immatics has a structured organizational frame...
8,System: You are a helpful AI assisitant answer...,To understand how Immatics engages with its cu...
9,System: You are a helpful AI assisitant answer...,To provide you with the most accurate informat...


In [46]:
answer_df.to_csv("./tutorial/LLM+Langchain/prompt_answer.csv", index=False)