<a href="https://colab.research.google.com/github/nithin-k-mundrathi/LLM-practice/blob/main/RAG/RAG_C_6/3_RAG_Generative_Ai_PineCone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [23]:
%%capture
!pip install pinecone-client==5.0.1

In [4]:
f = open("pinecone.txt", "r")
PINECONE_API_KEY=f.readline()
f.close()

## The Pinecone index

In [5]:
import os
from pinecone import Pinecone, ServerlessSpec

# initialize connection to pinecone (get API key at app.pinecone.io)
api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'

from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key=PINECONE_API_KEY)

In [6]:
from pinecone import ServerlessSpec

index_name = 'bank-index-50000'
cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'

spec = ServerlessSpec(cloud=cloud, region=region)

In [7]:
import time
import pinecone
# check if index already exists (it shouldn't if this is first time)
if index_name not in pc.list_indexes().names():
    # if does not exist, create index
    pc.create_index(
        index_name,
        dimension=384,  # dimension of the embedding model
        metric='cosine',
        spec=spec
    )
    # wait for index to be initialized
    time.sleep(1)

# connect to index
index = pc.Index(index_name)
# view index stats
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 301}},
 'total_vector_count': 301}

## RAG with GPT-4o
Query the dataset

In [24]:
%%capture
from sentence_transformers import SentenceTransformer
model_sentence = SentenceTransformer('all-MiniLm-L6-v2')

In [11]:
def get_embedding(text, model=model_sentence):
    text = text.replace("\n", " ")
    embedding = model.encode(text,ignore_errors=True)
    return embedding

In [14]:

import time
start_time = time.time()
query_text = "Customer Hill CreditScore 608Age 41 Tenure 1Balance 83807.86NumOfProducts 1HasCrCard 0IsActiveMember 1EstimatedSalary 112542.58 Exited 0Complain 1Satisfaction Score 3Card Type DIAMONDPoint Earned 456"
query_embedding = get_embedding(query_text,model=model_sentence)

In [15]:
# Perform the query using the embedding
query_results = index.query(
    vector=query_embedding.tolist(),
    include_metadata=True,
    top_k=1
)
# Print the query results along with metadata
print("Query Results:")
for match in query_results['matches']:
    print(f"ID: {match['id']}, Score: {match['score']}")
    if 'metadata' in match and 'text' in match['metadata']:
        print(f"Text: {match['metadata']['text']}")
    else:
        print("No metadata available.")

response_time = time.time() - start_time              # Measure response time
print(f"Querying response time: {response_time:.2f} seconds")  # Print response time

Query Results:
ID: 2, Score: 0.714985251
Text: RowNumber: 2 CustomerId: 15647311 Surname: Hill CreditScore: 608 Geography: Spain Gender: Female Age: 41 Tenure: 1 Balance: 83807.86 NumOfProducts: 1 HasCrCard: 0 IsActiveMember: 1 EstimatedSalary: 112542.58 Exited: 0 Complain: 1 Satisfaction Score: 3 Card Type: DIAMOND Point Earned: 456
Querying response time: 1.10 seconds


## Extract Relevant Texts

In [16]:
relevant_texts = [match['metadata']['text'] for match in query_results['matches'] if 'metadata' in match and 'text' in match['metadata']]

# Join all items in the list into a single string separated by a specific delimiter (e.g., a newline or space)
combined_text = '\n'.join(relevant_texts)  # Using newline as a separator for readability
print(combined_text)

RowNumber: 2 CustomerId: 15647311 Surname: Hill CreditScore: 608 Geography: Spain Gender: Female Age: 41 Tenure: 1 Balance: 83807.86 NumOfProducts: 1 HasCrCard: 0 IsActiveMember: 1 EstimatedSalary: 112542.58 Exited: 0 Complain: 1 Satisfaction Score: 3 Card Type: DIAMOND Point Earned: 456


## Augemented Prompts

In [17]:
# Combine texts into a single string, separated by new lines
combined_context = "\n".join(relevant_texts)
#prompt
query_prompt="I have this customer bank record with interesting information on age, credit score and more and similar customers. What could I suggest to keep them in my bank in an email with an url to get new advantages based on the fields for each Customer ID:"
itext=query_prompt+ query_text+combined_context
# Augmented input
print("Prompt for the Generative AI model:", itext)

Prompt for the Generative AI model: I have this customer bank record with interesting information on age, credit score and more and similar customers. What could I suggest to keep them in my bank in an email with an url to get new advantages based on the fields for each Customer ID:Customer Hill CreditScore 608Age 41 Tenure 1Balance 83807.86NumOfProducts 1HasCrCard 0IsActiveMember 1EstimatedSalary 112542.58 Exited 0Complain 1Satisfaction Score 3Card Type DIAMONDPoint Earned 456RowNumber: 2 CustomerId: 15647311 Surname: Hill CreditScore: 608 Geography: Spain Gender: Female Age: 41 Tenure: 1 Balance: 83807.86 NumOfProducts: 1 HasCrCard: 0 IsActiveMember: 1 EstimatedSalary: 112542.58 Exited: 0 Complain: 1 Satisfaction Score: 3 Card Type: DIAMOND Point Earned: 456


## Augmented generation

In [19]:
f = open("open-aikey.txt", "r")
API_KEY=f.readline()
f.close()

In [20]:
#The OpenAI Key
import os
import openai
os.environ['OPENAI_API_KEY'] =API_KEY
openai.api_key = os.getenv("OPENAI_API_KEY")

In [22]:
import openai
import time

# Initialize the OpenAI client
client = openai.OpenAI()

gpt_model = "gpt-4o-mini"

import time
start_time = time.time()  # Start timing before the request

response = client.chat.completions.create(
  model=gpt_model,
  messages=[
    {
      "role": "system",
      "content": "You are the community manager can write engaging email based on the text you have. Do not use a surname but simply Dear Valued Customer instead."
    },
    {
      "role": "user",
      "content": itext
    }
  ],
  temperature=0,
  max_tokens=300,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0
)
print(response.choices[0].message.content)

response_time = time.time() - start_time              # Measure response time
print(f"Querying response time: {response_time:.2f} seconds")  # Print response time

Subject: Unlock Exclusive Benefits Tailored Just for You!

Dear Valued Customer,

We hope this message finds you well! At our bank, we are committed to providing you with the best possible experience and ensuring that you have access to exclusive benefits that suit your needs.

We noticed that you have been with us for a year and have a Diamond Card, which comes with its own set of advantages. However, we believe thereâ€™s even more we can offer you to enhance your banking experience. 

Here are a few personalized suggestions based on your profile:

1. **Enhanced Credit Options**: With your current credit score, you may qualify for better credit products that can help you manage your finances more effectively.

2. **Loyalty Rewards**: As a valued customer, you can earn additional points on your Diamond Card. The more you use it, the more rewards you can accumulate!

3. **Financial Planning Services**: Our financial advisors are here to help you make the most of your estimated salary an