# RAG - 6. LLM(ollama)

> 1) API방식으로 사용하는 huggingface model
> 2) 목적 : GPU서버에 설치하고, 다른 client가 접속해서 API로 사용, ollama의 기능을 배우자.
> 3) 확장해보기 : 다양한 model을 설치
> 4) 선행작업 : HF_TOKEN 등록후 획득



```
!pip install ollama langchain beautifulsoup4 chromadb
 ollama pull llama3
 ollama pull nomic-embed-text
```


> 다음 2개의 함수를 비교해보자,  
> 1) from langchain_ollama import ChatOllama  --> host지정이 가능  
>
> 2) import ollama  
> from langchain_ollama import OllamaEmbeddings


In [1]:
from dotenv import load_dotenv
load_dotenv("/home/mhkwon/.env")

import os

#HF_TOKEN = "get your token in http://hf.co/settings/tokens"
HF_TOKEN = os.getenv('HF_TOKEN')
print(HF_TOKEN)

#from huggingface_hub import login
#hf_token = login(token=HF_TOKEN, add_to_git_credential=True)

# 에러가 나면, linux에서 다음 명령어를 실행
# git config --global credential.helper store

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


In [2]:
# 문서(url)을 읽어서, 쪼개는 부분

# 다음 url은 각자 변경해서 테트를 해본다.
website = 'https://www.marktechpost.com/2024/04/21/coconut-a-high-quality-large-scale-dataset-for-next-gen-segmentation-models/'


import bs4
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader

# Load the data
loader = WebBaseLoader(
    web_paths=(f"{website}",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("td-post-content tagdiv-type", "td-post-header", "td-post-title")
        )
    ),
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)



USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
len(splits)

8

In [4]:
splits[1]

Document(metadata={'source': 'https://www.marktechpost.com/2024/04/21/coconut-a-high-quality-large-scale-dataset-for-next-gen-segmentation-models/'}, page_content='RedditVoteFlipShareTweet0 Shares\nComputer vision has advanced significantly in recent decades, thanks in large part to comprehensive benchmark datasets like COCO. However, nearly a decade after its introduction, COCO’s suitability as a benchmark for modern AI models is being questioned. Its annotations may contain biases and nuances reflecting the early stages of computer vision research. With model performance plateauing on COCO, there are concerns about overfitting to the dataset’s specific characteristics, potentially limiting real-world applicability.')

In [35]:
#!pip install langchain_ollama

MODEL_NAME = "llama3"

from langchain_ollama import OllamaEmbeddings
#from langchain_community.embeddings import OllamaEmbeddings #OLD

from langchain_community.vectorstores import Chroma
import ollama


# Create Ollama embeddings and vector store
embeddings = OllamaEmbeddings(model=MODEL_NAME)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

# Call Ollama Llama3 model
def ollama_llm(question, context):
    formatted_prompt = f"Question: {question}\n\nContext: {context}"
    response = ollama.chat(model=MODEL_NAME, 
                           messages=[{'role': 'user', 'content': formatted_prompt}])
    return response['message']['content']

# RAG Setup
retriever = vectorstore.as_retriever()

def combine_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


def rag_chain(question):
    retrieved_docs = retriever.invoke(question)
    formatted_context = combine_docs(retrieved_docs)
    
    return ollama_llm(question, formatted_context)


In [36]:

# Use the RAG App
Question = "What is COCONut?"
result = rag_chain(f"{Question}")

print(f"Question : {Question}")
print(f"Response : {result}")

Question : What is COCONut?
Response : COCONut is a novel, large-scale universal segmentation dataset proposed by researchers to modernize COCO segmentation. It features human-verified mask labels for 383K images and uses an innovative assisted-manual annotation pipeline that leverages neural networks to augment human annotators.


In [37]:

# Use the RAG App
Question = "독도는 어디에 있죠?"
result = rag_chain(f"{Question}")

print(f"Question : {Question}")
print(f"Response : {result}")

Question : 독도는 어디에 있죠?
Response : The question is "독도는 어디에 있죠?" which means "Where is Dokdo?"

However, based on the provided text, there seems to be no mention of Dokdo or any geographical location related to it. The text only talks about a novel dataset for COCO segmentation called COCONut and its features.

If you meant something else, please clarify! 😊


In [39]:

ollama.show(model=MODEL_NAME)


{'license': 'META LLAMA 3 COMMUNITY LICENSE AGREEMENT\n\nMeta Llama 3 Version Release Date: April 18, 2024\n“Agreement” means the terms and conditions for use, reproduction, distribution and modification of the Llama Materials set forth herein.\n\n“Documentation” means the specifications, manuals and documentation accompanying Meta Llama 3 distributed by Meta at https://llama.meta.com/get-started/.\n\n“Licensee” or “you” means you, or your employer or any other person or entity (if you are entering into this Agreement on such person or entity’s behalf), of the age required under applicable laws, rules or regulations to provide legal consent and that has legal authority to bind your employer or such other person or entity if you are entering in this Agreement on their behalf.\n\n“Meta Llama 3” means the foundational large language models and software and algorithms, including machine-learning model code, trained model weights, inference-enabling code, training-enabling code, fine-tuning

In [41]:

ollama.show(model='mistral:7b')['details']

#ollama.show(model='mistral:7b')['template']
#ollama.show(model='mistral:7b')['model_info']
#ollama.show(model='mistral:7b')['modified_at']


{'parent_model': '',
 'format': 'gguf',
 'family': 'llama',
 'families': ['llama'],
 'parameter_size': '7.2B',
 'quantization_level': 'Q4_0'}

In [38]:
ollama.list()

{'models': [{'name': 'llama3.2:latest',
   'model': 'llama3.2:latest',
   'modified_at': '2024-11-22T23:20:20.179528418+09:00',
   'size': 2019393189,
   'digest': 'a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72',
   'details': {'parent_model': '',
    'format': 'gguf',
    'family': 'llama',
    'families': ['llama'],
    'parameter_size': '3.2B',
    'quantization_level': 'Q4_K_M'}},
  {'name': 'mistral:7b',
   'model': 'mistral:7b',
   'modified_at': '2024-11-18T23:22:35.311006724+09:00',
   'size': 4113301824,
   'digest': 'f974a74358d62a017b37c6f424fcdf2744ca02926c4f952513ddf474b2fa5091',
   'details': {'parent_model': '',
    'format': 'gguf',
    'family': 'llama',
    'families': ['llama'],
    'parameter_size': '7.2B',
    'quantization_level': 'Q4_0'}},
  {'name': 'mxbai-embed-large:latest',
   'model': 'mxbai-embed-large:latest',
   'modified_at': '2024-11-18T23:21:03.45707274+09:00',
   'size': 669615493,
   'digest': '468836162de7f81e041c43663fedbbba921dc

In [23]:
!curl http://localhost:11434/api/chat -d '{\
    "model": "llama3.2",\
    "messages": [\
        { "role": "user", "content": "why is the sky blue?" }\
    ]\
}'

{"model":"llama3.2","created_at":"2024-11-26T14:17:06.155816439Z","message":{"role":"assistant","content":"The"},"done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:17:06.161813713Z","message":{"role":"assistant","content":" sky"},"done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:17:06.169415843Z","message":{"role":"assistant","content":" appears"},"done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:17:06.175362903Z","message":{"role":"assistant","content":" blue"},"done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:17:06.181296699Z","message":{"role":"assistant","content":" because"},"done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:17:06.187216939Z","message":{"role":"assistant","content":" of"},"done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:17:06.19319106Z","message":{"role":"assistant","content":" a"},"done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:17:06.199146687Z","message":{"role":"assistant","con

In [32]:
!curl http://localhost:11434/api/generate -d '{\
    "model": "llama3.2",\
    "prompt": "Why is the sky blue?"\
}'

{"model":"llama3.2","created_at":"2024-11-26T14:22:15.12840112Z","response":"The","done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:22:15.134466728Z","response":" sky","done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:22:15.142060255Z","response":" appears","done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:22:15.148019691Z","response":" blue","done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:22:15.153947235Z","response":" because","done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:22:15.159886994Z","response":" of","done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:22:15.165844104Z","response":" a","done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:22:15.171823097Z","response":" phenomenon","done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:22:15.177544841Z","response":" called","done":false}
{"model":"llama3.2","created_at":"2024-11-26T14:22:15.183249383Z","response":" scattering","done":false}

In [55]:
# langchain 함수 사용

from langchain_ollama import ChatOllama

llm = ChatOllama(
    base_url="localhost",  # host 주소
    model="llama3.2",
    temperature=0.7,
    top_k=5,
    # other params...
)

from langchain_core.messages import AIMessage

messages = [
    (
        "system",
        "You are a helpful assistant that translates English to French. Translate the user sentence.",
    ),
    ("human", "I love programming."),
]
ai_msg = llm.invoke(messages)
ai_msg

AIMessage(content='Je aime le programmation.', additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2024-11-26T14:50:04.84077206Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 87149265, 'load_duration': 19757971, 'prompt_eval_count': 45, 'prompt_eval_duration': 28354000, 'eval_count': 7, 'eval_duration': 36818000}, id='run-dda13be6-96f8-4b69-93ba-3c7661d35ebe-0', usage_metadata={'input_tokens': 45, 'output_tokens': 7, 'total_tokens': 52})

In [53]:
print(ai_msg.content)

J'aime le programmement.

(Note: "le programmement" is the masculine form of the noun, while "le programme" would be used if referring to a specific program. The feminine form would be "le programmation")


In [56]:
# prompt

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant that translates {input_language} to {output_language}.",
        ),
        ("human", "{input}"),
    ]
)

chain = prompt | llm
chain.invoke(
    {
        "input_language": "English",
        "output_language": "German",
        "input": "I love programming.",
    }
)


AIMessage(content='Das Programmieren ist für dich ein großes Hobby! (That programming is your big hobby!)\n\nAlternatively:\n\nIch liebe das Programmieren sehr. (I love programming very much.)\n\nOr, if you want to sound more casual:\n\nProgrammieren macht mir Spaß! (Programming is fun for me!)', additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2024-11-26T14:50:36.096542128Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 405822706, 'load_duration': 19509550, 'prompt_eval_count': 40, 'prompt_eval_duration': 26789000, 'eval_count': 61, 'eval_duration': 357120000}, id='run-cbd7fe28-d814-4b63-9bc2-edd4f2e82d4b-0', usage_metadata={'input_tokens': 40, 'output_tokens': 61, 'total_tokens': 101})

In [57]:
# tool calling


from typing import List

from langchain_core.tools import tool
from langchain_ollama import ChatOllama


@tool
def validate_user(user_id: int, addresses: List[str]) -> bool:
    """Validate user using historical addresses.

    Args:
        user_id (int): the user ID.
        addresses (List[str]): Previous addresses as a list of strings.
    """
    return True


llm = ChatOllama(
    base_url="localhost",  # host 주소
    model="llama3.2",
    temperature=0.7,
    top_k=5,
).bind_tools([validate_user])

result = llm.invoke(
    "Could you validate user 123? They previously lived at "
    "123 Fake St in Boston MA and 234 Pretend Boulevard in "
    "Houston TX."
)
result.tool_calls

[{'name': 'validate_user',
  'args': {'addresses': '["123 Fake St", "234 Pretend Boulevard"]',
   'user_id': '123'},
  'id': '34ecc536-cbff-431b-9afc-d4c325185ea8',
  'type': 'tool_call'}]