# LLM Models

> Helper functions for LLM Models

In [None]:
#| default_exp policy/helper/llm_functions

In [None]:
#| export
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from app.configs.settings import settings
from langchain_ollama import ChatOllama, OllamaEmbeddings

In [None]:
#| export
def init_azure_openai_llm():
    "Azure OpenAI gpt-4o model instance"
    llm = AzureChatOpenAI(
        openai_api_version = settings.common_secrets.azure_openai_api_version,
        azure_deployment = settings.common_secrets.azure_openai_deployment_id,
        api_key = settings.common_secrets.azure_openai_api_key,
        azure_endpoint = settings.common_secrets.azure_openai_endpoint,
        temperature=0,
        max_tokens=4096,
        timeout=None,
        max_retries=2,
    )
    return llm

In [None]:
llm = init_azure_openai_llm()
llm.invoke("what is 4+5").content

'4 + 5 equals 9.'

In [None]:
#| export
def init_ollama_llm():
    "Ollama Llama3.2 model instance"
    # llm = ChatOllama(model="llama3.2")
    llm = ChatOllama(
        base_url="http://135.232.123.7:11434",
        model="llama3.3:70b-instruct-q8_0"
    )
    return llm  


In [None]:
llm = init_ollama_llm()
llm.invoke("what is 4+5").content

'4 + 5 = 9'

In [None]:
#| export
# TODO: change the function name with init_llm(Ollama,Azure)
def init_azure_llm(type = 'Ollama'):
    "Azure OpenAI gpt-4o model instance"
    if type == 'Ollama':
        return init_ollama_llm()
    if type == 'Azure':
        return init_azure_openai_llm()
    return None

In [None]:
llm = init_azure_llm()
llm.invoke("what is 4+5").content

'4 + 5 = 9'

In [None]:
#| export 
def get_ollama_embedding(array):
    # embeddings = OllamaEmbeddings(
    #     model="llama3.2"
    # )
    embeddings = OllamaEmbeddings(
        base_url="http://135.232.123.7:11434",
        model="llama3.3:70b-instruct-q8_0"
    )
    # Generate embeddings for each document
    embedding = embeddings.embed_documents(array)
    # embedding = embeddings.embed_documents(array)
    return embedding

In [None]:
sample_data = ['Text 1', 'Text 2']
results = get_ollama_embedding(sample_data)
print("No of vectors",len(results)," of ",len(results[0]),'size')
print("Vector 1",results[0][:5])
print("Vector 1",results[1][:5])

No of vectors 2  of  8192 size
Vector 1 [0.00090003916, -0.008852661, -0.0074942918, 0.002944415, 0.009286398]
Vector 1 [0.0, 0.0, 0.0, 0.0, 0.0]


In [None]:
#| export
def get_azure_openai_embedding(array):#list of string
    "Using Azure OpenAI text-embedding-3-large model for embeddings"
    embeddings = AzureOpenAIEmbeddings(
        azure_deployment="text-embedding-3-large",
        api_key = settings.common_secrets.azure_openai_api_key,
        azure_endpoint = settings.common_secrets.azure_openai_endpoint,
    )

    # Generate embeddings for each document
    # embedding = embeddings.embed_query(text)
    embedding = embeddings.embed_documents(array)
    return embedding

In [None]:
sample_data = ['Text 1', 'Text 2']
results = get_azure_openai_embedding(sample_data)
print("No of vectors",len(results)," of ",len(results[0]),'size')
print("Vector 1",results[0][:5])
print("Vector 1",results[1][:5])

No of vectors 2  of  3072 size
Vector 1 [0.016161445528268814, 0.0029149053152650595, -0.0036840096581727266, -0.00524016423150897, 0.026518717408180237]
Vector 1 [0.029930606484413147, 0.020368576049804688, -0.016199447214603424, -0.011128045618534088, 0.03714879974722862]


Applying cosine similarity

In [None]:
#| eval: false
from sklearn.metrics.pairwise import cosine_distances
sample_data = [
    'Information stored on user endpoint devices should be protected through proper security configurations and device management policies.', 
    'Ensure that information stored on user devices is protected with security configurations and policies.']
results = get_azure_openai_embedding(sample_data)
cosine_dist = cosine_distances(results)
1 - cosine_dist

array([[1.      , 0.794463],
       [0.794463, 1.      ]])

In [None]:
#| hide
import unittest
from unittest.mock import patch

@patch('langchain_openai.AzureOpenAIEmbeddings.embed_documents')
def test_get_openai_embedding(mock_embed_documents):
    # Mock the return value of embed_documents
    mock_embed_documents.return_value = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]

    # Sample data to test
    sample_data = ['Text 1', 'Text 2']

    # Call the function
    result = get_azure_openai_embedding(sample_data)
    print(result)
    # Check if the result is as expected
    assert result == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]

    # Check if embed_documents was called with the correct arguments
    mock_embed_documents.assert_called_once_with(sample_data)

# Run the test
test_get_openai_embedding()


[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]


In [None]:
#| export
def get_openai_embedding(array,type = 'Ollama'):#Either 'Azure' or 'Ollama'
    """Retrieve the embedding from either Azure OpenAI or Ollama."""
    if type == 'Ollama':
        return get_ollama_embedding(array)
    if type == 'Azure':
        return get_azure_openai_embedding(array)
    return None

In [None]:
sample_data = ['Text 1', 'Text 2']
results = get_openai_embedding(sample_data)
print("No of vectors",len(results)," of ",len(results[0]),'size')
print("Vector 1",results[0][:5])
print("Vector 1",results[1][:5])

No of vectors 2  of  8192 size
Vector 1 [0.0073995544, -0.07278099, -0.06161334, 0.024207123, 0.076346904]
Vector 1 [0.009282724, 0.00527284, -0.0050344, -0.002213817, 0.013588943]


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()