# Installing Necessary Libraries

In [1]:
!pip install openai pinecone-client flask flask-restful

Collecting openai
  Downloading openai-1.52.1-py3-none-any.whl.metadata (24 kB)
Collecting pinecone-client
  Downloading pinecone_client-5.0.1-py3-none-any.whl.metadata (19 kB)
Collecting flask-restful
  Downloading Flask_RESTful-0.3.10-py2.py3-none-any.whl.metadata (1.0 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting pinecone-plugin-inference<2.0.0,>=1.0.3 (from pinecone-client)
  Downloading pinecone_plugin_inference-1.1.0-py3-none-any.whl.metadata (2.2 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone-client)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Collecting aniso8601>=0.82 (from flask-restful)
  Downloading aniso8601-9.0.1-py2.py3-none-any.whl.metadata (23 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->o

In [3]:
pip install pinecone



# Setting Up Pinecone and OpenAI
I utilized my personal API key for execution; however, I will be removing it during submission due to confidentiality concerns. Thank you for your understanding.

In [10]:
import openai
from pinecone import Pinecone, ServerlessSpec

# Initialize OpenAI
openai.api_key = 'OpenAI API Key goes here'

# Initialize Pinecone
pc = Pinecone(api_key="Pinecone API Key goes here")
index_name = "rag-qa-bot"

pc.create_index(
    name=index_name,
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)
def delete_pinecone_index():
  pc.delete_index(index_name)

# Setting Up RAG Functionality

In [11]:
def retrieve_augmented_response(query):
    # Embed the query using OpenAI embeddings
    response = openai.embeddings.create(input=query, model="text-embedding-ada-002")
    embeddings = response['data'][0]['embedding']

    # Retrieve relevant context from Pinecone
    results = index.query(query_embedding, top_k=5, include_metadata=True)

    # Combine the retrieved context for better response
    context = "\n".join([match['metadata']['text'] for match in results['matches']])

    # Use OpenAI GPT to generate the response
    gpt_response = openai.Completion.create(
        model="text-davinci-003",
        prompt=f"Answer the question based on the following context:\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:",
        max_tokens=150
    )

    return gpt_response['choices'][0]['text'].strip()


# Setting Up Flask Server in a Thread
The Flask server will be set up to run in a separate thread, allowing other cells to be executed while the server is running.

In [12]:
from flask import Flask, request, jsonify
from flask_restful import Api, Resource
import threading

app = Flask(__name__)
api = Api(app)

# Define the QA endpoint
class QABot(Resource):
    def post(self):
        data = request.get_json()
        query = data.get("query")
        if query:
            response = retrieve_augmented_response(query)
            return jsonify({"answer": response})
        return jsonify({"error": "No query provided"}), 400

api.add_resource(QABot, '/qa')

# Function to start Flask server in a thread
def run_flask():
    app.run(port=5000, threaded=True)

# Start the Flask server in a thread
flask_thread = threading.Thread(target=run_flask)
flask_thread.start()


 * Serving Flask app '__main__'
 * Debug mode: off


Address already in use
Port 5000 is in use by another program. Either identify and stop that program, or start the server with a different port.


# Stop Flask Server Functionality

In [13]:
import os
import signal

def stop_flask_server():
    os.kill(os.getpid(), signal.SIGTERM)

# Testing with Curl

In [14]:
!curl -X POST http://127.0.0.1:5000/qa -H "Content-Type: application/json" -d '{"query":"What is your business about?"}'

ERROR:__main__:Exception on /qa [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 1823, in full_dispatch_request
    rv = self.dispatch_request()
  File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 1799, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
  File "/usr/local/lib/python3.10/dist-packages/flask_restful/__init__.py", line 489, in wrapper
    resp = resource(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/flask/views.py", line 107, in view
    return current_app.ensure_sync(self.dispatch_request)(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/flask_restful/__init__.py", line 604, in dispatch_request
    resp = meth(*args, **kwargs)
  File "<ipython-input-3-4b7388de4959>", line 14, in post
    response = retrieve_augmented_response(query)
  File "<ipython-input-11-2cc6389e9c95>", line 3, in retrieve_augmented_response
    respon

{"message": "Internal Server Error"}


# Temporary Workaround


*   The error message I received, openai.RateLimitError, indicates that I have exceeded your current quota for the OpenAI API.
*   I've temporarily modified the response function to return a static message instead of generating embeddings, but in theory, the code should work.





In [15]:
def retrieve_augmented_response(query):
    # Temporary static response for testing
    return "This is a static response for testing purposes."

# Final Test

In [16]:
!curl -X POST http://127.0.0.1:5000/qa -H "Content-Type: application/json" -d '{"query":"What is your business about?"}'

INFO:werkzeug:127.0.0.1 - - [23/Oct/2024 13:49:55] "POST /qa HTTP/1.1" 200 -


{"answer":"This is a static response for testing purposes."}


# Terminating

In [None]:
delete_pinecone_index()
stop_flask_server()