In [None]:
import urllib
import warnings
from pathlib import Path as p
from pprint import pprint

import pandas as pd
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import VertexAIEmbeddings
from langchain.llms import VertexAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma


In [None]:
vertex_llm_text = VertexAI(model_name="text-bison@001")
vertex_embeddings = VertexAIEmbeddings(model_name="textembedding-gecko@001")


In [None]:
dataset_path = 'train.jsonl'
from pathlib import Path

pprint(Path(dataset_path).read_text())



In [None]:
from vertexai.language_models import TextGenerationModel


In [None]:
from langchain.document_loaders import JSONLoader
dataset_path = 'validation.jsonl'
loader = JSONLoader(
    file_path=dataset_path,
    jq_schema='.answers[].sents[].text',
    text_content=False,
    json_lines=True)

data = loader.load()


In [None]:
data

In [None]:
PROJECT_ID = 'alkali-gworks'
REGION = 'us-central1'

In [None]:
import vertexai
vertexai.init(project=PROJECT_ID, location=REGION)

### 1. Stuffing (Single Shot Prompting)

In [None]:
context = "\n".join(str(p.page_content) for p in data[:7])
print("The total words in the context: ", len(context))

question = "Is it acceptable to format a cv for someone elsen?"


In [None]:
prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
                    not contained in the context, say "answer not available in context" \n\n
                    Context: \n {context}?\n
                    Question: \n {question} \n
                    Answer:
                  """

prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
stuff_chain = load_qa_chain(vertex_llm_text, chain_type="stuff", prompt=prompt)
stuff_answer = stuff_chain(
    {"input_documents": data[7:10], "question": question}, return_only_outputs=True
)
pprint(stuff_answer)


### 2. MapReduce 

In [None]:
context = "\n".join(str(p.page_content) for p in data[:7])
print("The total words in the context: ", len(context))

In [None]:
question_prompt_template = """
                    Answer the question as precise as possible using the provided context. \n\n
                    Context: \n {context} \n
                    Question: \n {question} \n
                    Answer:
                    """
question_prompt = PromptTemplate(
    template=question_prompt_template, input_variables=["context", "question"]
)

# summaries is required. a bit confusing.
combine_prompt_template = """Given the extracted content and the question, create a final answer.
If the answer is not contained in the context, say "answer not available in context. \n\n
Summaries: \n {summaries}?\n
Question: \n {question} \n
Answer:
"""
combine_prompt = PromptTemplate(
    template=combine_prompt_template, input_variables=["summaries", "question"]
)

In [None]:
map_reduce_chain = load_qa_chain(
    vertex_llm_text,
    chain_type="map_reduce",
    return_intermediate_steps=True,
    question_prompt=question_prompt,
    combine_prompt=combine_prompt,
)

In [None]:
question = "Is it acceptable to format a cv for someone elsen?"
map_reduce_outputs = map_reduce_chain({"input_documents": data[:100], "question": question})


In [None]:
map_reduce_outputs

### 3. Refine

In [None]:
refine_prompt_template = """
    The original question is: \n {question} \n
    The provided answer is: \n {existing_answer}\n
    Refine the existing answer if needed with the following context: \n {context_str} \n
    Given the extracted content and the question, create a final answer.
    If the answer is not contained in the context, say "answer not available in context. \n\n
"""
refine_prompt = PromptTemplate(
    input_variables=["question", "existing_answer", "context_str"],
    template=refine_prompt_template,
)


initial_question_prompt_template = """
    Answer the question as precise as possible using the provided context only. \n\n
    Context: \n {context_str} \n
    Question: \n {question} \n
    Answer:
"""

initial_question_prompt = PromptTemplate(
    input_variables=["context_str", "question"],
    template=initial_question_prompt_template,
)

In [None]:
refine_chain = load_qa_chain(
    vertex_llm_text,
    chain_type="refine",
    return_intermediate_steps=True,
    question_prompt=initial_question_prompt,
    refine_prompt=refine_prompt,
)

In [None]:
question = "Is it acceptable to format a cv for someone elsen?"

refine_outputs = refine_chain({"input_documents": data[:20], "question": question})


In [None]:
refine_outputs

### 4. Q&A with similarity search

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=10000, chunk_overlap=0)
context_ss = "\n\n".join(str(p.page_content) for p in data)
texts_ss = text_splitter.split_text(context_ss)


In [None]:
vector_index = Chroma.from_texts(texts_ss, vertex_embeddings).as_retriever()


In [None]:
docs = vector_index.get_relevant_documents(question)


In [None]:
question = "Is it acceptable to format a cv for someone elsen?"
map_reduce_embeddings_outputs = map_reduce_chain(
    {"input_documents": docs, "question": question}
)


In [None]:
print(map_reduce_embeddings_outputs["output_text"])


In [None]:
from vertexai.language_models import TextGenerationModel


In [None]:
import vertexai
from vertexai.language_models import TextGenerationModel

vertexai.init(project="alkali-gworks", location="us-central1")
parameters = {
    "candidate_count": 1,
    "max_output_tokens": 1024,
    "temperature": 0.2,
    "top_p": 0.8,
    "top_k": 40
}
model = TextGenerationModel.from_pretrained("text-bison")
response = model.predict(
    """Hi""",
    **parameters
)
print(f"Response from Model: {response.text}")

In [None]:
from google.cloud import aiplatform
aiplatform.init(project="alkali-gworks", location="us-central1")

In [None]:

# Vertex AI
from google.cloud import aiplatform
import vertexai

print(f"Vertex AI SDK version: {aiplatform.__version__}")
import langchain

print(f"LangChain version: {langchain.__version__}")