In [1]:
!pip install groq

Collecting groq
  Downloading groq-0.20.0-py3-none-any.whl.metadata (15 kB)
Downloading groq-0.20.0-py3-none-any.whl (124 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.9/124.9 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.20.0


In [2]:
!pip install pinecone

Collecting pinecone
  Downloading pinecone-6.0.2-py3-none-any.whl.metadata (9.0 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Downloading pinecone-6.0.2-py3-none-any.whl (421 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m421.9/421.9 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pinecone_plugin_interface-0.0.7-py3-none-any.whl (6.2 kB)
Installing collected packages: pinecone-plugin-interface, pinecone
Successfully installed pinecone-6.0.2 pinecone-plugin-interface-0.0.7


In [4]:
import os
import pinecone
import openai
from sentence_transformers import SentenceTransformer
from groq import Groq
import torch

# Initialize Pinecone
pc = pinecone.Pinecone(api_key=os.environ.get("PINECONE_API_KEY", "pcsk_5KEN7q_TRVH2gFB5Xgh6DjTx6VVfADn2rRow5z5KWYd3cFVguHNbfWJH2yndKDyMNfKpsP"))
index_name = "agribot"
index = pc.Index(index_name)

# Initialize Groq API
groq_api_key = "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"
groq_client = Groq(api_key=groq_api_key)

# Check for GPU availability
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Declare model at the global scope so it's accessible to all functions
model = None

def initialize_model():
    global model
    # Load a 768-dimensional embedding model that doesn't require authentication
    try:
        model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
        print(f"Successfully loaded embedding model: all-MiniLM-L6-v2")
        # Check the embedding dimension
        sample_text = "This is a test sentence."
        sample_embedding = model.encode(sample_text)
        print(f"Embedding dimension: {len(sample_embedding)}")  # Should output 768
        return True
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        print("Attempting to load with a specific version of sentence-transformers...")
        try:
            # Try a more universally available model as backup
            model = SentenceTransformer("distilbert-base-nli-mean-tokens", device=device)
            print(f"Successfully loaded backup model: distilbert-base-nli-mean-tokens")
            sample_embedding = model.encode(sample_text)
            print(f"Embedding dimension: {len(sample_embedding)}")
            return True
        except Exception as e2:
            print(f"Error loading backup model: {str(e2)}")
            return False

def get_relevant_chunks(query, top_k=3):
    """Fetches the most relevant text chunks from Pinecone based on query embedding similarity."""
    global model

    if model is None:
        print("Model not initialized. Initializing now...")
        if not initialize_model():
            return None

    try:
        # Get embedding for the query
        query_embedding = model.encode(query).tolist()

        # Check the dimensionality
        embedding_dim = len(query_embedding)
        print(f"Query embedding dimension: {embedding_dim}")

        # Search in Pinecone
        search_results = index.query(vector=query_embedding, top_k=top_k, include_metadata=True)

        # Print the search results structure to inspect the metadata
        print(f"Found {len(search_results['matches'])} matching documents")

        if not search_results["matches"]:
            return None

        # Extract text from the metadata
        chunks = []
        for match in search_results["matches"]:
            # Try different common keys that might contain the text
            text = match["metadata"].get("text_sample",
                  match["metadata"].get("text",
                  match["metadata"].get("content",
                  match["metadata"].get("chunk", "No text found"))))
            chunks.append(text)

        return chunks

    except Exception as e:
        print(f"Error during retrieval: {str(e)}")
        return None

def generate_answer_with_groq(query):
    """Generates an answer using Groq after retrieving relevant chunks."""
    relevant_texts = get_relevant_chunks(query)

    if relevant_texts:
        # Create a Groq prompt for reasoning with relevant texts
        context = "\n\n".join(relevant_texts)
        prompt = f"""
        Use the following agricultural context to answer the question:

        {context}

        Question: {query}
        Answer:
        """
    else:
        # If no relevant information is found, Groq will generate an answer without context
        prompt = f"""
        Answer the following agricultural question to the best of your ability:

        {query}
        Answer:
        """

    try:
        # Use Groq API for generating an answer based on the prompt
        response = groq_client.chat.completions.create(
            model="llama3-70b-8192",
            messages=[
                {"role": "system", "content": "You are an AI assistant specialized in agriculture."},
                {"role": "user", "content": prompt}
            ]
        )

        return response.choices[0].message.content
    except Exception as e:
        print(f"Error generating answer with Groq: {str(e)}")
        return f"Sorry, I couldn't generate an answer due to an error: {str(e)}"

if __name__ == "__main__":
    print("Agricultural Knowledge Bot initializing...")

    # Initialize the model at startup
    if initialize_model():
        print("Model initialization successful.")
    else:
        print("WARNING: Failed to initialize embedding model. The system may not work correctly.")

    print("\nAgricultural Knowledge Bot ready.")
    print("This bot uses a RAG system with 768-dimensional embeddings to retrieve relevant agricultural information.")
    print("Type 'exit' to quit the program.")

    while True:
        try:
            query = input("\nEnter your agricultural query: ")
            if query.lower() == 'exit':
                break
            print("Processing your query...")
            answer = generate_answer_with_groq(query)
            print(f"\nAI Response:\n{answer}\n")
        except KeyboardInterrupt:
            print("\nExiting program...")
            break
        except Exception as e:
            print(f"An error occurred: {str(e)}")

Using device: cpu
Agricultural Knowledge Bot initializing...
Successfully loaded embedding model: all-MiniLM-L6-v2
Embedding dimension: 384
Model initialization successful.

Agricultural Knowledge Bot ready.
This bot uses a RAG system with 768-dimensional embeddings to retrieve relevant agricultural information.
Type 'exit' to quit the program.

Enter your agricultural query: what is witchweed?
Processing your query...
Query embedding dimension: 384
Found 3 matching documents

AI Response:
I'm happy to help!

Witchweed (Striga spp.) is a parasitic weed that is native to Africa and Asia. It is a major pest in many crops, including corn, sorghum, sugarcane, and others. Witchweed attaches itself to the roots of host plants and derives its nutrients and water from them, causing significant yield loss and reducing crop quality. It is considered one of the most destructive weeds in the world, especially in sub-Saharan Africa where it can reduce crop yields by up to 80%.


Exiting program...
