In [11]:
import os
import json
from datetime import datetime
from groq import Groq

In [3]:
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [4]:
groq = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

In [18]:
my_schema = memory_schema = {
  "type": "object",
  "properties": {
    "date": {
      "type": "string",
      "description": "The current date (YYYY-MM-DD HH-MM-SS format)"
    },
    "me": {
      "type": "array",
      "description": "My name"
    },
    "people": {
      "type": "array",
      "description": "List of people involved in the event (optional)"
    },
    "feeling": {
      "type": "string",
      "description": "The main character's feeling during the event"
    },
    "short_description": {
      "type": "string",
      "description": "A brief description of the event"
    },
    "weather": {
      "type": "string",
      "description": "Current weather conditions (e.g., sunny, rainy, cloudy)"
    },
    "location": {
      "type": "string",
      "description": "Location name (e.g., city, town)"
    },
    "insight": {
      "type": "string",
      "description": "Additional details or insights about the event"
    },
    "memorable_because": {
      "type": "string",
      "description": "The reason why the event is memorable"
    }
  }
}


In [20]:
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
persona = "Ros"
home_location = "Milan"

In [21]:
chat_completion = groq.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": f"You are helpful memory recorder.\nWrite outputs in JSON in schema: {my_schema}.\nCurrent time is {now}.\nI am {persona} living in {home_location} and events may take place in more specific places inside the home location or outside it, so record precisely.\n",
            #"content": "You are helpful memory recorder. Write outputs in JSON schema.\n",
            #f" The JSON object must use the schema: {json.dumps(my_schema.model_json_schema(), indent=1)}",
        },
        {
            "role": "user",
            "content": "Today was sunny day and then rained, I went to city to have a dinner with friends and I ate the best Sushi I have ever tested in restaurant called Sushita Cafe, where my friend Paco is a chef.",
        }
    ],
    model="llama3-70b-8192",
    response_format={"type": "json_object"},
)

In [23]:
chat_completion.choices[0].message.content

'{\n  "date": "2024-05-16 17:33:19",\n  "me": ["Ros"],\n  "people": ["Paco"],\n  "feeling": "",\n  "short_description": "Dinner with friends at Sushita Cafe",\n  "weather": "Sunny and later rainy",\n  "location": "Sushita Cafe, Milan",\n  "insight": "Tasted the best Sushi ever",\n  "memorable_because": "Delicious food and great company"\n}'

Langchain 

In [12]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

In [25]:
chat = ChatGroq(temperature=0, model_name="Llama3-8b-8192")

In [26]:
system = "You are a helpful assistant."
human = "{text}"
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

chain = prompt | chat
chain.invoke({"text": "Explain the importance of low latency LLMs."})

AIMessage(content="Large Language Models (LLMs) have revolutionized the field of natural language processing (NLP) by enabling applications such as language translation, text summarization, and chatbots. However, traditional LLMs often suffer from high latency, which can be a significant limitation in many real-world applications. Low latency LLMs, on the other hand, offer several advantages that make them crucial for various use cases. Here are some reasons why low latency LLMs are important:\n\n1. **Real-time processing**: Low latency LLMs enable real-time processing of user input, which is essential for applications like chatbots, virtual assistants, and live language translation. This allows for a more seamless and responsive user experience.\n2. **Improved user engagement**: Fast response times can significantly improve user engagement and satisfaction. Low latency LLMs can quickly respond to user queries, reducing the likelihood of user frustration and abandonment.\n3. **Enhanced

with streaming 

see: https://python.langchain.com/v0.1/docs/integrations/chat/groq/

In [28]:
for chunk in chain.stream({"text": "Explain the importance of low latency LLMs."}):
    print(chunk.content, end="", flush=True)

Large Language Models (LLMs) have revolutionized the field of natural language processing (NLP) by enabling applications such as language translation, text summarization, and chatbots. However, traditional LLMs often suffer from high latency, which can be a significant limitation in many real-world applications. Low latency LLMs, on the other hand, offer several advantages that make them crucial for various use cases. Here are some reasons why low latency LLMs are important:

1. **Real-time processing**: Low latency LLMs enable real-time processing of user input, which is essential for applications like chatbots, virtual assistants, and live language translation. This allows for a more seamless and responsive user experience.
2. **Improved user engagement**: Fast response times can significantly improve user engagement and satisfaction. Low latency LLMs can process user input quickly, reducing the likelihood of user frustration and abandonment.
3. **Enhanced decision-making**: In appli

With RAG 

In [22]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
import sys
import os
sys.path.append("..")
from utils import database_managers, embedding
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain.chains import LLMChain
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain.schema import SystemMessage
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [4]:
embedding = embedding.EmbeddingFunction('fast-bgeEmbedding').embedder

Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 4993.22it/s]


In [5]:
COLLECTION_NAME = "web-places"

In [6]:
vectore_store=qdrantClient = database_managers.QDrantDBManager(
    url=os.getenv('QDRANT_URL'),
    port=6333,
    collection_name=COLLECTION_NAME,
    vector_size=768, #??
    embedding=embedding,
    record_manager_url=r"sqlite:///record_manager_cache.sql"
)
vectore_store_client=vectore_store.vector_store

Collection web-places already exists!


  warn_deprecated(


In [7]:
retriever = vectore_store_client.as_retriever()

In [17]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history")

Questo codice in basso è utile per **fornire i documenti provenienti da un retrieval e fornirli ad un [prompt template](https://python.langchain.com/v0.1/docs/modules/model_io/prompts/quick_start/) !!!**

In [20]:
# RAG Setup
def llmama3_llm(question, context):
    chat = ChatGroq(temperature=0, model_name="Llama3-8b-8192")
    system = """
        Sei un assistente nella ricerca di ristoranti, locali, eventi. Date le informazioni di contesto restituisci una lista di ristoranti, locali, eventi richiesti,
        che soddisfano i requisiti di ricerca.
        Per ogni elemento indica:
          + nome del ristorante, locale, evento
          + indirizzo
          + descrizione
        """
    
    formatted_prompt = f"Domanda: {question}\n\nContesto: {context}"
    prompt = ChatPromptTemplate.from_messages([("system", system), ("user", formatted_prompt)])

    chain = prompt | chat
    return chain.stream({"text": question})
    
def combine_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
def rag_chain(question):
    retrieved_docs = retriever.invoke(question)
    formatted_context = combine_docs(retrieved_docs)
    return llmama3_llm(question, formatted_context)

In [21]:
for chunk in rag_chain("Dove posso fare un pranzo montanaro?"):
    print(chunk.content, end="", flush=True)

Ecco la lista di ristoranti, locali e eventi che soddisfano i requisiti di ricerca:

1. RIFUGIO IL PASTORE
	* Indirizzo: Localit√† Alpe Pile, 13021 Alagna Valsesia (VC)
	* Descrizione: Ristorante montanaro con vista sulla natura, offre specialit√† come polenta e tartare di fassona. Prezzo: circa 20-25 euro.
2. HOSTARIA VIOLA
	* Indirizzo: Via Giuseppe Verdi 32, Castiglione delle Stiviere (MN)
	* Descrizione: Osteria con menu tradizionale mantovana, offre piatti come tortelli di zucca e riso alla pilota. Prezzo: circa 40 euro a testa.
3. ENO AGRITURISMO GALLINA GIACINTO
	* Indirizzo: Loc. Maurizio 6, Santo Stefano Belbo (CN)
	* Descrizione: Agriturismo con menu fisso che si snoda tra 8 portate, offre specialit√† come ravioli al plin e capunet. Prezzo: circa 37‚Ç¨ inclusa mezza bottiglia di vino.
4. AGRITURISMO LOLOC
	* Indirizzo: (non specificato)
	* Descrizione: Agriturismo che offre pizzoccheri e tagliere di formaggi. (Nota: non è stato specificato l'indirizzo, quindi non è possibile 

References
1. [RAG with Llama3 and Langchain](https://medium.com/@nookalabadrinath/rag-and-its-application-using-llama3-lang-chain-and-chroma-db-ec61e905b9a5)
2. [LLmama3 via Groq](https://tmmtt.medium.com/llama-3-via-groq-api-9d4e5cef3640)
