In [1]:
import requests
from dotenv import load_dotenv
import os
import sys
import os
sys.path.insert(0, os.path.abspath('..'))

load_dotenv('../.env')
rapidapi_key = os.getenv('X-RapidAPI-Key')
open_ai_key = os.getenv('OPENAI')


In [None]:
url = "https://everyearthquake.p.rapidapi.com/earthquakesByDate"

querystring = {"startDate":"2021-01-01",
               "endDate":"2023-12-10",
               "start":"1","count":"100",
               "type":"earthquake",
               "latitude":"33.962523",
               "longitude":"-118.3706975",
               "radius":"1000",
               "units":"miles"
               ,"magnitude":"3",
               "intensity":"1"}

headers = {
	"X-RapidAPI-Key": rapidapi_key,
	"X-RapidAPI-Host": "everyearthquake.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)


In [None]:
import json
data=response.json()['data']
with open('earthquakes.json', 'w') as f:
    json.dump(data, f)

In [None]:
import json
with open('earthquakes.json', 'r') as f:
    data_r = json.load(f)

In [None]:
import pandas as pd 
df = pd.json_normalize(data_r, record_path=None)

In [None]:
from src.jsonconverter import JsonToDocument

converter = JsonToDocument(content_field="title")
results = converter.run(sources=["earthquakes.json"])
documents = results["documents"]

## Build Indexing pipeline

In [2]:
from src.jsonconverter import JsonToDocument

from haystack import Pipeline
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.writers import DocumentWriter


document_store = InMemoryDocumentStore()
converter = JsonToDocument(content_field="title", flatten_field=None, one_doc_per_row=True)
writer = DocumentWriter(document_store=document_store)

indexing_pipeline = Pipeline()
indexing_pipeline.add_component("converter", converter)
indexing_pipeline.add_component("writer", writer)

indexing_pipeline.connect("converter", "writer")


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
indexing_pipeline.run(data={"converter":{"sources": ["earthquakes.json"]}})

Converting JSON files to Documents: 100%|██████████| 1/1 [00:00<00:00, 144.53it/s]


{'writer': {'documents_written': 100}}

In [None]:
document_store.filter_documents()

### Question and Answering pipeline

In [4]:
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.generators import GPTGenerator
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores import InMemoryDocumentStore

######## Complete this section #############
prompt_template = """\
Use the following context to answer the user's question. If the context provided doesn't answer the question - please respond with: "I don't know".

### CONTEXT
{% for doc in documents %}
  {{ doc.content }}
{% endfor %}

### USER QUESTION
{{query}}


"""
prompt_builder = PromptBuilder(prompt_template)
############################################
retriever = InMemoryBM25Retriever(document_store=document_store)
llm = GPTGenerator(api_key=open_ai_key)

In [5]:
rag_pipeline = Pipeline()
rag_pipeline.add_component(instance=retriever, name="retriever")
rag_pipeline.add_component(instance=prompt_builder, name="prompt_builder")
rag_pipeline.add_component(instance=llm, name="llm")
rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")
rag_pipeline.connect("llm.replies", "answer_builder.replies")
rag_pipeline.connect("llm.metadata", "answer_builder.metadata")
rag_pipeline.connect("retriever", "answer_builder.documents")

In [6]:
question = "Where did the earthquake happen?"
result = rag_pipeline.run(
            {
                "retriever": {"query": question},
                "prompt_builder": {"query": question},
                "answer_builder": {"query": question},
            }
        )
                     


Ranking by BM25...: 100%|██████████| 100/100 [00:00<00:00, 149051.31 docs/s]


In [8]:
question = "Where did earthquakes of a magnitude of 5 or higher happen?"
result = rag_pipeline.run(
            {
                "retriever": {"query": question},
                "prompt_builder": {"query": question},
                "answer_builder": {"query": question},
            }
        )
      

Ranking by BM25...: 100%|██████████| 100/100 [00:00<00:00, 296626.87 docs/s]


In [9]:
result

{'answer_builder': {'answers': [GeneratedAnswer(data='Based on the information provided, there are no earthquakes of a magnitude of 5 or higher mentioned in the context.', query='Where did earthquakes of a magnitude of 5 or higher happen?', metadata={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 232, 'completion_tokens': 23, 'total_tokens': 255}}, documents=[Document(id=ci40616032, content: 'M 3.3 - 2 km E of Fullerton, CA', meta: {'magnitude': '3.33', 'type': 'earthquake', 'title': 'M 3.3 - 2 km E of Fullerton, CA', 'date': '2023-12-05T04:09:58', 'time': '1701749398340', 'updated': '1701835576787', 'url': 'https://earthquake.usgs.gov/earthquakes/eventpage/ci40616032', 'detailUrl': 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/detail/ci40616032.geojson', 'felt': '3236', 'cdi': '4', 'mmi': '4', 'alert': '', 'status': 'reviewed', 'tsunami': '0', 'sig': '531', 'net': 'ci', 'code': '40616032', 'ids': ',ci40616032,us7000lgbg,', 'sources'

## Compare against the dataframe

In [None]:
df