In [1]:
from ibm_watsonx_ai.foundation_models import Model
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams

import os
from dotenv import load_dotenv

load_dotenv()

api_key = os.getenv("API_KEY", None)
project_id = os.getenv("PROJECT_ID", None)

creds = {
    "url"    : "https://us-south.ml.cloud.ibm.com",
    "apikey" : api_key
}

params = {
    GenParams.DECODING_METHOD:"greedy",
    GenParams.MAX_NEW_TOKENS:500,
    GenParams.MIN_NEW_TOKENS:1,
    GenParams.TOP_K:50,
    GenParams.TOP_P:1,
    # GenParams.STOP_SEQUENCES:["<eof>"],
}

llm = Model("meta-llama/llama-2-70b-chat",creds, params,project_id)

In [2]:
import nest_asyncio

nest_asyncio.apply()

# from llama_index import (
#     SimpleDirectoryReader,
#     VectorStoreIndex,
#     ServiceContext
# )

import llama_index
from langchain.embeddings import HuggingFaceEmbeddings
# from llama_index.evaluation import DatasetGenerator

In [3]:
from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.llms.langchain import LangChainLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embedding_llm = HuggingFaceEmbedding(model_name='sentence-transformers/all-MiniLM-L6-v2')

# embeddings = LangchainEmbedding(
#     HuggingFaceEmbeddings(model_name="paraphrase-multilingual-MiniLM-L12-v2")
#     )
model = WatsonxLLM(model=llm)


llm = LangChainLLM(llm=model)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

chunk_size=1500
chunk_overlap = 200

loader = PyPDFLoader('Maintenance-Manual.pdf')
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size= chunk_size, chunk_overlap=chunk_overlap)
docs = text_splitter.split_documents(data)

In [5]:
import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
for token in doc:
    print(token.text, token.pos_, token.dep_)

Apple PROPN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.K. PROPN dobj
startup NOUN dep
for ADP prep
$ SYM quantmod
1 NUM compound
billion NUM pobj


In [19]:
from llama_index.core import Document
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex
from llama_index.core.llama_dataset.generator import RagDatasetGenerator
from llama_index.llms.langchain import LangChainLLM

eval_docs = [Document(text=docs[0].page_content)]

Settings.llm = llm
Settings.chunk_size = chunk_size
Settings.embed_model = embedding_llm

data_generator = RagDatasetGenerator.from_documents(eval_docs)
eval_questions = data_generator.generate_questions_from_nodes()

llm = LangChainLLM(llm=model)
vector_index = VectorStoreIndex.from_documents(eval_docs)

query_engine = vector_index.as_query_engine()

for question in eval_questions.examples:
    print(question.query)
    response = query_engine.query(question.query)
    response = query_engine.query("show the welcome statement")
    print(response)

Please generate 3 questions based on the provided context information.

(Note: The answer should be based on the context information provided)
