## Example of querying the Ollama Inference server with Mistral-7b, Langchain and a custom Prompt

### Set the Inference server url (replace with your own address)

In [None]:
inference_server_url = "http://ollama.ic-shared-llm.svc.cluster.local:11434"

## Preparation

In [None]:
!pip install -q langchain==0.1.14

In [None]:
# Imports
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import Ollama
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate

#### Create the LLM instance

In [None]:
# LLM definition
llm = Ollama(
    base_url=inference_server_url,
    model="mistral",
    top_p=0.92,
    temperature=0.01,
    num_predict=512,
    repeat_penalty=1.03,
    callbacks=[StreamingStdOutCallbackHandler()]
)

#### Create the Prompt

In [None]:
template="""<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always be as helpful as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>

Current conversation:
{history}
Human: {input}
AI:
[/INST]
"""
PROMPT = PromptTemplate(input_variables=["history", "input"], template=template)

#### And finally some memory for the conversation

In [None]:
memory=ConversationBufferMemory()

## First example, fully verbose mode

#### Create the Chain using the different components

In [None]:
# Verbose mode is intentionally set to True so you can see the prompt and the history from the buffer memory
conversation = ConversationChain(llm=llm,
                                 prompt=PROMPT,
                                 verbose=True,
                                 memory=memory
                                )

#### Let's talk...

In [None]:
first_input = "Describe Paris in 100 words or less."
conversation.predict(input=first_input);

In [None]:
# This is a follow-up question without context to showcase the conversation memory
second_input = "Is there a river?"
conversation.predict(input=second_input);

### Second example, no verbosity

In [None]:
# Verbose mode is intentionally set to True so you can see the prompt and the history from the buffer memory
conversation2 = ConversationChain(llm=llm,
                                 prompt=PROMPT,
                                 verbose=False,
                                 memory=memory
                                )

In [None]:
# Let's clear the previous conversation first
memory.clear()

In [None]:
first_input = "Can you describe London in 100 words?"
conversation2.predict(input=first_input);

In [None]:
# This is a follow-up question without context to showcase the conversation memory
second_input = "Is there a river?"
conversation2.predict(input=second_input);