In [1]:
from datasets import load_dataset
import pandas as pd

# Load the dataset
dataset = load_dataset("Abirate/english_quotes")

# Convert to DataFrame
df = pd.DataFrame(dataset['train'])
df.head()

Unnamed: 0,quote,author,tags
0,“Be yourself; everyone else is already taken.”,Oscar Wilde,"[be-yourself, gilbert-perreira, honesty, inspi..."
1,"“I'm selfish, impatient and a little insecure....",Marilyn Monroe,"[best, life, love, mistakes, out-of-control, t..."
2,“Two things are infinite: the universe and hum...,Albert Einstein,"[human-nature, humor, infinity, philosophy, sc..."
3,"“So many books, so little time.”",Frank Zappa,"[books, humor]"
4,“A room without books is like a body without a...,Marcus Tullius Cicero,"[books, simile, soul]"


In [2]:
# Drop rows with missing values
df.dropna(inplace=True)

# Lowercase text for uniformity
df['quote'] = df['quote'].str.lower()
df['author'] = df['author'].str.lower()
df['tags'] = df['tags'].apply(lambda tags: [tag.lower() for tag in tags])


In [3]:
from sentence_transformers import InputExample
import random

train_examples = []

for index, row in df.iterrows():
    quote = row['quote']
    author = row['author']
    tags = ', '.join(row['tags'])

    synthetic_query = f"Quotes about {tags} by {author}"

    # Add label 1.0 for positive pair
    train_examples.append(InputExample(texts=[synthetic_query, quote], label=1.0))

In [None]:
from sentence_transformers import SentenceTransformer, losses, models
from torch.utils.data import DataLoader
#Loading the pre trained model

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')




In [None]:
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
train_loss = losses.CosineSimilarityLoss(model=model)

# Train the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=4,     # Use 3+ for better results if time/compute allows
    warmup_steps=10
)


  0%|          | 0/628 [00:00<?, ?it/s]

{'loss': 0.0, 'grad_norm': 6.0963273426750675e-05, 'learning_rate': 4.039087947882737e-06, 'epoch': 3.18}
{'train_runtime': 745.9271, 'train_samples_per_second': 13.449, 'train_steps_per_second': 0.842, 'train_loss': 2.8536834636892957e-05, 'epoch': 4.0}


In [7]:
model.save("fine_tuned_quote_model")

In [8]:
import numpy as np
import faiss

# Load your fine-tuned model
model = SentenceTransformer("fine_tuned_quote_model")

# Get list of all quotes
quotes = df['quote'].tolist()

# Generate embeddings
quote_embeddings = model.encode(quotes, show_progress_bar=True)

# Convert to float32 (FAISS requirement)
quote_embeddings = np.array(quote_embeddings).astype("float32")

Batches:   0%|          | 0/79 [00:00<?, ?it/s]

In [9]:
# Create FAISS index
index = faiss.IndexFlatL2(quote_embeddings.shape[1])
index.add(quote_embeddings)

# Save it to reuse later
faiss.write_index(index, "quotes_index.faiss")


In [39]:
df.to_csv("your_cleaned_quotes.csv", index=False)

In [10]:
def search_quotes(query, top_k=5):
    query_embedding = model.encode([query])
    query_embedding = np.array(query_embedding).astype("float32")
    D, I = index.search(query_embedding, top_k)

    results = []
    for i in I[0]:
        quote_data = {
            "quote": df.iloc[i]['quote'],
            "author": df.iloc[i]['author'],
            "tags": df.iloc[i]['tags']
        }
        results.append(quote_data)
    return results


In [11]:
results = search_quotes("Quotes about insanity attributed to Einstein")
for r in results:
    print(f"\nQuote: {r['quote']}\nAuthor: {r['author']}\nTags: {r['tags']}")



Quote: “sanity and happiness are an impossible combination.”
Author: mark twain
Tags: ['happiness', 'sanity']

Quote: “angry people are not always wise.”
Author: jane austen,
Tags: ['anger', 'jane-austen', 'wisdom']

Quote: “even strength must bow to wisdom sometimes.”
Author: rick riordan,
Tags: ['inspirational', 'wisdom']

Quote: “no good deed goes unpunished.”
Author: oscar wilde
Tags: ['humor']

Quote: “better a cruel truth than a comfortable delusion.”
Author: edward abbey
Tags: ['truth']


In [44]:
from dotenv import load_dotenv
import os
load_dotenv()
gen_api_key = os.getenv("GOOGLE_API_KEY")

In [None]:
import google.generativeai as genai

genai.configure(api_key=gen_api_key)

model_gemini = genai.GenerativeModel("gemini-1.5-flash-latest")

## This function will be using the gemini model and prompt engineering was done according to the use case so that quotes can be generated

In [13]:
def generate_structured_response_gemini(query, retrieved_quotes):
    context = "\n".join([f"{r['quote']} — {r['author']} (Tags: {', '.join(r['tags'])})" for r in retrieved_quotes])

    prompt = f"""
You are a smart assistant helping with quotes. Use the following quotes to answer the query.

Query: "{query}"

Quotes:
{context}

Return a structured JSON with the following fields:
- quotes: list of best matching quotes
- authors: list of authors
- tags: list of relevant tags
- summary: a short 2-line summary
"""

    response = model_gemini.generate_content(prompt)
    return response.text


In [14]:
results = search_quotes("Quotes about insanity attributed to Einstein")
results

[{'quote': '“sanity and happiness are an impossible combination.”',
  'author': 'mark twain',
  'tags': ['happiness', 'sanity']},
 {'quote': '“angry people are not always wise.”',
  'author': 'jane austen,',
  'tags': ['anger', 'jane-austen', 'wisdom']},
 {'quote': '“even strength must bow to wisdom sometimes.”',
  'author': 'rick riordan,',
  'tags': ['inspirational', 'wisdom']},
 {'quote': '“no good deed goes unpunished.”',
  'author': 'oscar wilde',
  'tags': ['humor']},
 {'quote': '“better a cruel truth than a comfortable delusion.”',
  'author': 'edward abbey',
  'tags': ['truth']}]

In [15]:
results = search_quotes("Quotes about insanity attributed to mark twain")
print(generate_structured_response_gemini("Quotes about insanity attributed to mark twain", results))

```json
{
  "quotes": [
    "sanity and happiness are an impossible combination."
  ],
  "authors": [
    "Mark Twain"
  ],
  "tags": [
    "happiness",
    "sanity"
  ],
  "summary": [
    "Mark Twain's quote on sanity and happiness highlights an ironic juxtaposition.",
    "It suggests that true happiness might exist outside conventional notions of sanity."
  ]
}
```



In [17]:
results = search_quotes("Motivational quotes tagged accomplishment")
print(generate_structured_response_gemini("Motivational quotes tagged accomplishment", results))

```json
{
  "quotes": [
    "success is stumbling from failure to failure with no loss of enthusiasm.",
    "pain is temporary. quitting lasts forever."
  ],
  "authors": [
    "winston s. churchill",
    "lance armstrong sally jenkins"
  ],
  "tags": [
    "failure",
    "persistence",
    "success",
    "cancer",
    "inspiration",
    "inspirational",
    "pain",
    "quitting"
  ],
  "summary": "These quotes highlight the importance of perseverance in achieving success.  Overcoming failures and pushing through pain are key to accomplishing goals."
}
```



In [18]:
results = search_quotes("All Oscar Wilde quotes with humor")
print(generate_structured_response_gemini("All Oscar Wilde quotes with humor", results))

```json
{
  "quotes": [],
  "authors": [],
  "tags": [],
  "summary": "There are no humorous quotes by Oscar Wilde among the provided quotes.  The provided dataset does not contain any quotes fitting this query."
}
```



In [24]:
from datasets import Dataset

examples = [
    {
        "user_input": "quotes about insanity attributed to Einstein",
        "contexts": [
            "Insanity is doing the same thing over and over again and expecting different results."
        ],
        "ground_truth": "Insanity is doing the same thing over and over again and expecting different results.",
        "response": "Einstein once said, 'Insanity is doing the same thing over and over again and expecting different results.'"
    },
    {
        "user_input": "motivational quotes tagged accomplishment",
        "contexts": [
            "Success is not final, failure is not fatal: It is the courage to continue that counts.",
            "What you get by achieving your goals is not as important as what you become by achieving your goals."
        ],
        "ground_truth": "Motivational quotes that talk about perseverance and growth through accomplishments.",
        "response": "Quotes about accomplishment: 'Success is not final...', 'What you become is more important...'"
    }
]

ds = Dataset.from_list(examples)


In [22]:
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

In [25]:
from ragas.metrics import context_precision
from ragas import evaluate

# Make sure the dataset has: user_input, response, contexts
result = evaluate(
    dataset=ds,
    metrics=[context_precision]
)

print(result)


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}})
Exception raised in Job[0]: RateLimitError(Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}})


{'context_precision': nan}


In [28]:
import phoenix as px
px.launch_app()

  next(self.gen)
  next(self.gen)


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://arize.com/docs/phoenix


<phoenix.session.session.ThreadSession at 0x1c1c272dea0>

In [32]:
from openinference.instrumentation.langchain import LangChainInstrumentor
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.trace import set_tracer_provider

# Set up the OpenTelemetry Tracer
provider = TracerProvider()
exporter = OTLPSpanExporter(endpoint="http://localhost:4318/v1/traces")  # default Phoenix endpoint
processor = BatchSpanProcessor(exporter)
provider.add_span_processor(processor)
set_tracer_provider(provider)

# Instrument LangChain with OpenInference
LangChainInstrumentor().instrument(tracer_provider=provider)


Exception while exporting Span.
Traceback (most recent call last):
  File "C:\Users\ARYAN SURI\AppData\Roaming\Python\Python310\site-packages\urllib3\connection.py", line 198, in _new_conn
    sock = connection.create_connection(
  File "C:\Users\ARYAN SURI\AppData\Roaming\Python\Python310\site-packages\urllib3\util\connection.py", line 85, in create_connection
    raise err
  File "C:\Users\ARYAN SURI\AppData\Roaming\Python\Python310\site-packages\urllib3\util\connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused it

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\ARYAN SURI\AppData\Roaming\Python\Python310\site-packages\urllib3\connectionpool.py", line 793, in urlopen
    response = self._make_request(
  File "C:\Users\ARYAN SURI\AppData\Roaming\Python\Python310\site-packages\urllib3\con

In [None]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMChain

prompt = PromptTemplate.from_template("What is the capital of {country}?")
llm = OpenAI(temperature=0)
chain = LLMChain(llm=llm, prompt=prompt)

response = chain.run("France")
print(response) 


  llm = OpenAI(temperature=0)
  chain = LLMChain(llm=llm, prompt=prompt)
  response = chain.run("France")


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [43]:
from ragas.evaluation import evaluate
from ragas.metrics import faithfulness

results = evaluate(
    ds,
    metrics=[faithfulness]
)

print(result)


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Exception raised in Job[0]: RateLimitError(Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}})
Exception raised in Job[1]: TimeoutError()


{'context_precision': nan}
