In [2]:
"""LLM."""

import streamlit as st
from llama_cpp import Llama
from pyspark.sql import functions as F

from webapp.Hello import data, full_df


MODEL_PATH = "../../models/Phi-3-mini-4k-instruct-q4.gguf"

MODEL = Llama(
    model_path=MODEL_PATH,
    n_ctx=4096,  # The max sequence length to use - note that longer sequence lengths require much more resources
    n_threads=8,  # The number of CPU threads to use, tailor to your system and the resulting performance
    n_gpu_layers=64,  # The number of layers to offload to GPU, if you have GPU acceleration available. Set to 0 if no GPU acceleration is available on your system.
)

PROMPT = """
Context: ```
{context}
```
Given the context inside ``` solve the following taks: {task}.
If the context is not enough, try to solve the task with the
knowledge you have. But inform the user that the context is not
enough to solve the task.
"""


def get_most_similar_docs(text: str, num_docs: int) -> str:
    """Get most similar documents."""
    from rag.processor.most_similar_docs import get_most_similar_documents

    ids = get_most_similar_documents(
        text=text,
        data=data,
        num_docs=num_docs,
    )
    texts = full_df.filter(F.col("_id").isin(ids)).select("full_text").collect()
    context = "\n".join([text["full_text"] for text in texts])
    return ids, context


def qa(text: str, num_docs: int = 3) -> str:
    """Question & Answers."""
    ids, context = get_most_similar_docs(text=text, num_docs=num_docs)
    prompt = PROMPT.format(context=context, task=text)
    output = MODEL(
        f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
        max_tokens=256,  # Generate up to 256 tokens
        stop=["<|end|>"],
        echo=False,  # Whether to echo the prompt
    )
    return ids, output

llama_model_loader: loaded meta data with 24 key-value pairs and 195 tensors from ../../models/Phi-3-mini-4k-instruct-q4.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = phi3
llama_model_loader: - kv   1:                               general.name str              = Phi3
llama_model_loader: - kv   2:                        phi3.context_length u32              = 4096
llama_model_loader: - kv   3:                      phi3.embedding_length u32              = 3072
llama_model_loader: - kv   4:                   phi3.feed_forward_length u32              = 8192
llama_model_loader: - kv   5:                           phi3.block_count u32              = 32
llama_model_loader: - kv   6:                  phi3.attention.head_count u32              = 32
llama_model_loader: - kv   7:               phi3.attention.head_count_kv u

In [3]:
ids, answer = qa("What is phytoplankton?")

  from .autonotebook import tqdm as notebook_tqdm
INFO:rag.processor.most_similar_docs:Processing query...
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.85it/s]
INFO:rag.processor.most_similar_docs:Calculating cosine similarity...
INFO:rag.processor.most_similar_docs:Number of obs: 2402                        
INFO:rag.processor.most_similar_docs:Getting most similar documents...
INFO:rag.processor.most_similar_docs:Most similar documents:                    
INFO:rag.processor.most_similar_docs:['0704.0322', '0704.0114', '0704.0310']

llama_print_timings:        load time =    8632.89 ms
llama_print_timings:      sample time =      21.82 ms /   256 runs   (

In [10]:
def stream(output):
    for token in output["choices"][0]["text"].split(" "):
        yield token

In [12]:
" ".join(stream(answer))

' Phytoplankton are a diverse collection of microscopic organisms found in aquatic environments, both saline and freshwater. They are autotrophs, meaning they produce their own food through photosynthesis by converting sunlight, carbon dioxide, and water into glucose and oxygen. This makes phytoplankton the primary producers in aquatic ecosystems, forming a fundamental base for most marine and freshwater food webs. They are crucial not only because they contribute significantly to the global oxygen production but also play a pivotal role in carbon cycling by absorbing CO2 from the atmosphere.\n\nGiven the context provided, phytoplankton are not directly mentioned. However, based on general knowledge outside of this specific text, we can understand that phytoplankton were likely discussed within the broader framework of their role in reaction-diffusion models and how these organisms might exhibit various behaviors (including regular, chaotic behavior, and spatiotemporal patterns) under 

In [9]:
for word in answer["choices"][0]["text"].split(" "):
    print(word)


Phytoplankton
are
a
diverse
collection
of
microscopic
organisms
found
in
aquatic
environments,
both
saline
and
freshwater.
They
are
autotrophs,
meaning
they
produce
their
own
food
through
photosynthesis
by
converting
sunlight,
carbon
dioxide,
and
water
into
glucose
and
oxygen.
This
makes
phytoplankton
the
primary
producers
in
aquatic
ecosystems,
forming
a
fundamental
base
for
most
marine
and
freshwater
food
webs.
They
are
crucial
not
only
because
they
contribute
significantly
to
the
global
oxygen
production
but
also
play
a
pivotal
role
in
carbon
cycling
by
absorbing
CO2
from
the
atmosphere.

Given
the
context
provided,
phytoplankton
are
not
directly
mentioned.
However,
based
on
general
knowledge
outside
of
this
specific
text,
we
can
understand
that
phytoplankton
were
likely
discussed
within
the
broader
framework
of
their
role
in
reaction-diffusion
models
and
how
these
organisms
might
exhibit
various
behaviors
(including
regular,
chaotic
behavior,
and
spatiotemporal
patterns)
under
dif