This is an example of a RAG search engine powered by Openai and Pinecone

In [None]:
# Import necessary libraries
import os
import json
from tqdm import tqdm
from dotenv import load_dotenv
from openai import OpenAI
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

# Load environment variables
load_dotenv()

# Set up Pinecone API key and initialize
pinecone_api_key = os.getenv("PINECONE_API_KEY")
if not pinecone_api_key:
    raise ValueError("PINECONE_API_KEY is not set in the .env file")

pc = Pinecone(api_key=pinecone_api_key)

client = OpenAI()

# Connect to the index
index_name = 'idea-index'
namespace = os.getenv("PINECONE_NAMESPACE")
index = pc.Index(index_name)

# Define functions
def print_matches(pinecone_results):
    """Format and print matches with Score, Code, and Title."""
    matches = pinecone_results.get("matches", [])
    if not matches:
        print("\nNo matches found.")
        return
    
    print(f"\n{'Score':<10} - [Code] - Title")
    print("-" * 50)
    for match in tqdm(matches, desc="Processing Matches"):
        score = round(match["score"], 4)  # Limit score to 4 decimal places
        code = match.get("metadata", {}).get("code", "N/A")  # Handle missing codes
        title = match.get("metadata", {}).get("title", "No Title Available")  # Handle missing titles
        print(f"{score:<10} - [{code}] - {title}")

def get_embeddings(text, model="text-embedding-3-large"):
    """Generate embeddings using OpenAI."""
    response = client.embeddings.create(
        model=model,
        input=text
    )
    return response.data[0].embedding

def get_chat_completion(messages, model, response_format=None):
    """Generate chat completion using OpenAI."""
    completion_params = {
        "model": model,
        "messages": messages
    }
    
    if response_format:
        completion_params["response_format"] = response_format

    completion = client.chat.completions.create(**completion_params)

    result = completion.choices[0].message.content

    return result

def build_cosine_query(query_text):
    """Build a cosine similarity query."""

    messages = [
        {"role": "developer", "content": "You are a helpful innovation assistant. Format my question in a way that would get the best results from a vector search of ideas in my database. The ideas have been embedded with their Title, Description, and Comments combined and lemmatized before uploading. I am going to take the output of this and give it to Pinecone to perform a cosine similarity search."},
        {"role": "user", "content": "I want to find ideas related to renewable energy sources."},
        {"role": "assistant", "content": "Looking for innovative ideas, projects, or discussions related to renewable energy sources (e.g. solar, wind, hydroelectric, geothermal, biomass), clean energy technologies, sustainable power generation, reduced carbon footprint, zero-emission strategies, and other eco-friendly solutions."},
        {"role": "user", "content": "What are some innovative ideas for increasing revenue?"},
        {"role": "assistant", "content": "Seeking creative and novel concepts, strategies, or initiatives to boost income, generate profits, enhance financial performance, drive sales growth, improve monetization, optimize revenue streams, or maximize earnings."},
        {"role": "user", "content": "How can I improve customer satisfaction in my business?"},
        {"role": "assistant", "content": "Exploring methods, techniques, approaches, or practices to enhance customer experience, increase client happiness, elevate service quality, improve customer relations, foster client loyalty, or deliver exceptional customer service."},
        {"role": "user", "content": "What are some ideas for reducing waste in manufacturing processes?"},
        {"role": "assistant", "content": "Investigating innovative solutions, strategies, technologies, or methodologies to minimize waste production, reduce resource consumption, optimize material efficiency, enhance sustainability, or implement eco-friendly practices in manufacturing operations."},
        {"role": "user", "content": query_text}
    ]

    response_format = {
        "type": "json_schema",
        "json_schema": {
            "name": "query_schema",
            "schema": {
                "type": "object",
                "properties": {
                    "result": {
                        "description": "The reformatted query string to use for cosine similarity search on the vector database.",
                        "type": "string"
                    }
                },
                "additionalProperties": False
            }
        }
    }

    response = get_chat_completion(messages, model="gpt-4o", response_format=response_format)

    # Parse the JSON response
    response_json = json.loads(response)

    query = response_json["result"]  # Safely access "result" key from JSON response

    return query

def query_pinecone(query_text, index, top_k=10):
    """Query Pinecone index with a text input."""
    query_embedding = get_embeddings(query_text)
    response = index.query(
        vector=query_embedding,
        top_k=top_k,
        include_metadata=True,
        namespace=namespace
    )

    # Convert response to a dictionary
    response_dict = response.to_dict()  # Use Pinecone's `to_dict()` method if available

    return response_dict

def generate_response(query_text, response_dict):
    """Generate a response based on the retrieved ideas."""

    initiative_title = "Brightidea Product Requests"
    initiative_desc = "One location for all internal enhancement and new feature requests for our product."
    company = "Brightidea is a leading idea and innovation management software designed to help organizations collect, evaluate, and implement employee ideas, fostering a culture of innovation and collaboration."

    messages = [
        {
            "role": "developer",
            "content": (
                f"You are a helpful innovation assistant tasked with analyzing and summarizing ideas for the intiative {initiative_title}, {initiative_desc}. {company} \n"
                f"{json.dumps(response_dict, indent=2)}\n"  # Serialize response to JSON format
                "### Instructions:\n"
                "- Focus your response on addressing the user's query clearly and comprehensively.\n"
                "- Create a concise narrative summarizing the most relevant ideas, ensuring the response is easy to read and avoids simply listing items.\n"
                "- If no ideas are directly relevant, offer general guidance or propose methods to refine the query.\n\n"
                "### Example Format:\n"
                "# Summary\n"
                "Brief overview of the key themes and notable ideas. Highlight the most promising suggestions.\n"
            )
        },
        {
            "role": "user",
            "content": query_text
        }

    ]

    respone_format = {
        "type": "json_schema",
        "json_schema": {
            "name": "response_schema",
            "schema": {
                "type": "object",
                "properties": {
                    "response": {
                        "description": "The response to the user's query based on the retrieved ideas.",
                        "type": "string"
                    }
                },
                "additionalProperties": False
            }
        }
    }

    response = get_chat_completion(messages, model="gpt-4o", response_format=respone_format)

    # Parse the JSON response
    response_json = json.loads(response)

    query = response_json["response"]  # Safely access "result" key from JSON response
    
    return query

# Interactive Cells
# Query Example
query_text = input("\nEnter a query to search the ideaspace: ")

print(f"\nUser Query: {query_text}\n")

cosine_query = build_cosine_query(query_text)
print(f"\nCosine Query: {cosine_query}\n")

pinecone_results = query_pinecone(cosine_query, index)
print_matches(pinecone_results)

response = generate_response(query_text, pinecone_results)

print(f"BI BOT:\n{response}\n")



User Query: show me the ideas for integrating chatgpt


Cosine Query: Searching for creative implementations, projects, or strategies for integrating ChatGPT or similar AI technologies into applications, systems, platforms, enhancing communication, automating customer support, providing virtual assistance, enriching user interaction, or driving business efficiency through AI-driven solutions.


Score      - [Code] - Title
--------------------------------------------------


Processing Matches: 100%|██████████| 10/10 [00:00<00:00, 55553.70it/s]

0.5426     - [D18104] - Chat Bot Chatbot - add to the platform 
0.5378     - [D25732] - Use AI Text generator for Brightidea content development
0.4992     - [D26102] - Add the AI Writer from Memo to the Idea Submission from Description 
0.4876     - [D26508] - Smarter Platform
0.4661     - [D26586] - Intelligent Assistant for BI Admins and Users
0.4472     - [D26553] - Generative AI to add value  on a submitted idea
0.4432     - [D26747] - Leverage AI for activity-level comment and idea description sentiment and analysis.
0.4419     - [D19469] - Brightidea "Inspiration Engine" for Employee users
0.4348     - [D16730] - Text Analytics
0.434      - [D18586] - Suggested or auto tagging via image and text analysis





BI BOT:
# Summary
Several ideas focus on integrating AI and chatbot capabilities, including utilizing ChatGPT and similar technologies to enhance conversation, content generation, and idea submission processes on the Brightidea platform.

**All Ideas:**

- **D18104:** Suggests adding chatbot capabilities to the Brightidea platform to assist with search and FAQs. This can be customized for end users by admins or with BI's technical services assistance.
  
- **D26586:** Proposes an Intelligent Assistant for BI Admins and Users utilizing Retrieval-Augmented Generation, offering context-aware support based on Knowledge Base Articles to improve interaction and operational efficiency.
  
- **D26102:** Involves using AI Writer from Memo in the Description field of the Idea Submission form. This would leverage AI tools like ChatGPT to assist users with articulating and clarifying their ideas.
  
- **D26553:** Covers the use of generative AI to enhance idea submissions by automatically gatherin