# Simple Gen AI App Using LangChain and Google Gen AI

In [2]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import WebBaseLoader
load_dotenv()
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
LANGSMITH_API_KEY = os.getenv('LANGCHAIN_API_KEY')

USER_AGENT environment variable not set, consider setting it to identify your requests.


## Data Loading from Website

In [4]:
loaders=WebBaseLoader('https://documentation.redwood.com/extensions/Content/AzureDataFactory/AzureDataFactory-2.0.0.1.htm')
docs = loaders.load()

In [21]:
# Chunk the docs with RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=500)
docs_split = text_splitter.split_documents(docs)
docs_split

[Document(metadata={'source': 'https://documentation.redwood.com/extensions/Content/AzureDataFactory/AzureDataFactory-2.0.0.1.htm', 'title': 'Azure Data Factory Connector 2.0.0.1', 'language': 'en-us'}, page_content='Azure Data Factory Connector 2.0.0.1\n\nGet instant help with our new Documentation AI Assistant. Select the Redwood logo in the bottom right to begin.\r\n\t\t\n \n\n\n\n\n\n\n\n\n\nSkip To Main Content\n\n\n\nAccount\nSettings\nLogout\n\n\n\n\n\n\n\n\n\n\n\n\nplaceholder\n\n\n\n\nAccount\nSettings\nLogout\n\n\n\n\n\n\n\n\nFilter: \n\n\n\n\n\nAll Files\n\n\n\n\n\n\nSubmit Search\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nYou are here: \n\n\n\n\n\nAzure Data Factory Connector 2.0.0.1\nPrerequisites\n\nRunMyJobs 9.2.9 or later\nConnection Management Extension 1.0.0.3 or later. Note that the Connection Management Extension will be installed or updated automatically if necessary when you install this Extension.\nAzure Connections Extension (automatically installed by Cat

In [26]:
#convert to vector with Ollama embeddings
from langchain_community.embeddings import OllamaEmbeddings
embeddings =  (OllamaEmbeddings(model="gemma:2b"))

In [24]:
# store to vector database
from langchain_community.vectorstores import FAISS
vector_store = FAISS.from_documents(docs_split,embeddings)

In [27]:
# similiarity search
query = 'install the Azure Data Factory Connector'
results = vector_store.similarity_search(query)
results[0].page_content

'Resource Group Name\nFactory Name\n\n\n\nContents\n\n\n\nObject Type\nName\n\n\n\n\nFolder\n\nGLOBAL.Redwood.REDWOOD.DataFactory\n\n\nJob Definition\nREDWOOD.Redwood_DataFactory_ImportJobTemplate\n\n\nJob Definition\nREDWOOD.Redwood_DataFactory_ShowPipelines\n\n\nJob Definition\nREDWOOD.Redwood_DataFactory_RunPipeline\n\n\nJob Definition\nREDWOOD.Redwood_DataFactory_Template\n\n\nLibrary\nREDWOOD.DataFactory\n\n\nJob Server Service\nREDWOOD.ServiceForRedwood_DataFactory\n\n\n\nProcedures\nRunning Data Factory Processes\nThe Resource Group name is defined on the Azure Subscription.\nFinding Data Factory Pipelines\nTo retrieve the list of pipelines available for scheduling, go to the Redwood_DataFactory Folder, navigate to Folders > Redwood_DataFactory > DataFactory_ShowPipelines, and run it.\n\n\n\nSelect a Connection, the Resource Group Name and the Factory Name you want to list the pipelines from. You can filter the list by adding a Job Name filter.'

In [40]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", api_key=GOOGLE_API_KEY)

  from .autonotebook import tqdm as notebook_tqdm


In [41]:
# retrieval chain and document chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(
    """
    Answer the question based on provided context below
    <context>
    {context}
    </context>

    """
)
document_chain = create_stuff_documents_chain(llm, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the question based on provided context below\n    <context>\n    {context}\n    </context>\n\n    '), additional_kwargs={})])
| ChatGoogleGenerativeAI(model='models/gemini-1.5-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001B3CB327210>, default_metadata=())
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [42]:
from langchain_core.documents import Document
document_chain.invoke(
    {
        "input":"Setup Azure Data Factory Connector",
        "context":[Document(page_content="To install the Azure Data Factory Connector, click its tile in the Catalog, select the version you want, and then click Install <version number>. Create an Microsoft Entra (Azure AD) Connection. Note: The Job Server for the Connector must have the ServiceForRedwood_DataFactory Job Server service. To use the Connector, you must first create an app registration with a service principle in Azure Active Directory (see https://docs.microsoft.com/en-gb/azure/active-directory/develop/howto-create-service-principal-portal#register-an-application-with-azure-ad-and-create-a-service-principal). This client application must be assigned the Data Factory Contributor permission. Make note of the following settings from the Data Factory: Resource Group Name Factory Name")]
    }
)

'To install the Azure Data Factory Connector, locate its tile in the Catalog, choose a version, and click "Install <version number>".  You\'ll then need to create a Microsoft Entra (Azure AD) connection.  The Connector\'s Job Server requires the "ServiceForRedwood_DataFactory" Job Server service.  Before using the connector, register an application with a service principal in Azure Active Directory (instructions at https://docs.microsoft.com/en-gb/azure/active-directory/develop/howto-create-service-principal-portal#register-an-application-with-azure-ad-and-create-a-service-principal), assigning it the "Data Factory Contributor" permission. Finally, record the Resource Group Name and Factory Name from your Data Factory settings.'

In [43]:
# using retriver chain
from langchain.chains import create_retrieval_chain
retriver = vector_store.as_retriever()
retriver_chain = create_retrieval_chain(retriver,document_chain)

In [44]:
retriver_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001B37FC99050>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the question based on provided context below\n    <context>\n    {context}\n    </context>\n\n    '), additional_kwargs={})])
          

In [None]:
## get the answer from LLM
response = retriver_chain.invoke(
    {'input': 'How to install the Azure Data Factory Connector on Redwood My Job?'}
)

In [49]:
response['answer']

'Based on the provided text, to install the Azure Data Factory Connector, you must:\n\n1.  Click its tile in the Catalog.\n2.  Select the desired version (e.g., 2.0.0.1).\n3.  Click "Install <version number>".\n\nAfter installation, you need to create a Microsoft Entra (Azure AD) Connection.  The Job Server must also have the `ServiceForRedwood_DataFactory` Job Server service.'