In [1]:
!sudo apt-get install -y pciutils
!curl https://ollama.ai/install.sh | sh
!pip install ultimate-sitemap-parser langchain-community tiktoken langchain-openai langchainhub langchain langgraph langchain-text-splitters langchain-pinecone bs4 nest_asyncio tavily-python

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libpci3 pci.ids
The following NEW packages will be installed:
  libpci3 pci.ids pciutils
0 upgraded, 3 newly installed, 0 to remove and 45 not upgraded.
Need to get 343 kB of archives.
After this operation, 1,581 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 pci.ids all 0.0~2022.01.22-1 [251 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libpci3 amd64 1:3.7.0-6 [28.9 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 pciutils amd64 1:3.7.0-6 [63.6 kB]
Fetched 343 kB in 1s (437 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 3.)
debconf: falling back to frontend: Readline
debconf: unable to initializ

**Setting up code to run ollama in the background**

In [2]:
import os
import threading
import subprocess
import requests
import json

def ollama():
    os.environ['OLLAMA_HOST'] = '0.0.0.0:11434'
    os.environ['OLLAMA_ORIGINS'] = '*'
    subprocess.Popen(["ollama", "serve"])

**Start Ollama Service**

In [3]:
ollama_thread = threading.Thread(target=ollama)
ollama_thread.start()

**Pulling LLAMA3 as main LLM and nomic-embed-text for the embedding**

In [4]:
!ollama pull llama3
!ollama pull nomic-embed-text

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest 
pulling 6a0746a1ec1a...   0% ▕▏    0 B/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 6a0746a1ec1a...   0% ▕▏    0 B/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 6a0746a1ec1a...   0% ▕▏    0 B/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 6a0746a1ec1a...   0% ▕▏    0 B/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 6a0746a1ec1a...   0% ▕▏    0 B/4.7 GB               

**Checking the model list**

In [None]:
!ollama list

NAME                   	ID          	SIZE  	MODIFIED       
nomic-embed-text:latest	0a109f422b47	274 MB	46 seconds ago	
llama3:latest          	365c0bd3c000	4.7 GB	47 seconds ago	


**Begin the main section of the program:**

In [5]:
import os
from langchain_pinecone import PineconeVectorStore
from langchain_community.embeddings import OllamaEmbeddings
from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain_community.tools.tavily_search import TavilySearchResults
os.environ['PINECONE_API_KEY'] = '26551c97-51ed-49e3-88cd-5ff51a3edb55'
os.environ['TAVILY_API_KEY']= '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'

**Building out the functions as nodes:**

In [6]:
def fetch_vectorstore():
    embeddings= OllamaEmbeddings(model="nomic-embed-text")
    vectorstore = PineconeVectorStore(index_name="snowflake-docs-rag", embedding=embeddings)
    return vectorstore



def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

###Grader Node

def retrieval_grader():
    llm = ChatOllama(model="llama3", format="json", temperature=0)
    prompt = PromptTemplate(
    template="""You are a grader assessing relevance of a retrieved document to a user question. \n
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n
    If the document contains keywords related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explanation.""",
    input_variables=["question", "document"],
)
    retrieval_grader = prompt | llm | JsonOutputParser()

    return retrieval_grader

##Generator Node
def rag_generator():
    prompt = hub.pull("rlm/rag-prompt")
    llm = ChatOllama(model="llama3", temperature=0)
    rag_chain = prompt | llm | StrOutputParser()
    return rag_chain


#Rewriter Node
def question_rewrite():
    llm = ChatOllama(model="llama3", temperature=0)
    re_write_prompt = PromptTemplate(
    template="""You a question re-writer that converts an input question to a better version that is optimized \n
     for vectorstore retrieval. Look at the initial and formulate an improved question. \n
     Here is the initial question: \n\n {question}. Improved question with no preamble: \n """,
    input_variables=["generation", "question"],
    )

    question_rewriter = re_write_prompt | llm | StrOutputParser()
    return question_rewriter















**Building a graph with these nodes:**

In [None]:
from typing import List

from typing_extensions import Dict, TypedDict


class GraphState(TypedDict):
    """
    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """

    keys: Dict[str, any]

In [8]:
def main():

    vectorstore=fetch_vectorstore()
    retriever = vectorstore.as_retriever()
    #retrieval grader:
    grader = retrieval_grader()
    question = "FOR LOOP"
    docs = retriever.invoke(question)
    print("Retrieved documents: ", len(docs))
    #doc_txt = docs[1].page_content
    #print(grader.invoke({"question": question, "document": doc_txt}))
    formatted_docs = format_docs(docs)
    rag=rag_generator()
    generation = rag.invoke({"context": formatted_docs, "question": question})
    print(generation)
    #question_rewriter = question_rewrite()
    #improved_question = question_rewriter.invoke({"question": question, "generation": generation})
    #print(improved_question)
    web_search_tool = TavilySearchResults(k=3)


if __name__ == "__main__":
    main()


ValueError: Error raised by inference endpoint: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/embeddings (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x78f00ed4f6d0>: Failed to establish a new connection: [Errno 111] Connection refused'))

**Streamlit Integration**


In [None]:
! pip install streamlit -q
!wget -q -O - ipv4.icanhazip.com

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m51.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m93.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.0/83.0 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25h34.80.58.154


In [None]:
%%writefile app.py
import streamlit as st
st.write('# Streamlit calculator')
number1= st.number_input('number 1')
number2 = st.number_input('number 2')
num3 = number1+number2
st.write('# Answer is ',num3)

Writing app.py


In [None]:
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.80.58.154:8501[0m
[0m
[K[?25hnpx: installed 22 in 3.169s
your url is: https://fuzzy-apples-take.loca.lt
[34m  Stopping...[0m
^C
