In [1]:
import requests
from dotenv import load_dotenv
import os
import sys
import os
sys.path.insert(0, os.path.abspath('..'))

load_dotenv('../.env')
rapidapi_key = os.getenv('X-RapidAPI-Key')
open_ai_key = os.getenv('OPENAI')


In [None]:
url = "https://everyearthquake.p.rapidapi.com/earthquakesByDate"

querystring = {"startDate":"2021-01-01",
               "endDate":"2023-12-10",
               "start":"1","count":"100",
               "type":"earthquake",
               "latitude":"33.962523",
               "longitude":"-118.3706975",
               "radius":"1000",
               "units":"miles"
               ,"magnitude":"3",
               "intensity":"1"}

headers = {
	"X-RapidAPI-Key": rapidapi_key,
	"X-RapidAPI-Host": "everyearthquake.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)


In [None]:
import pandas as pd 
df = pd.json_normalize(response.json()['data'], record_path=None)

In [None]:
import json
data=response.json()['data']
with open('earthquakes.json', 'w') as f:
    json.dump(data, f)

In [None]:
from src.jsonconverter import JsonToDocument

converter = JsonToDocument(content_field="title")
results = converter.run(sources=["earthquakes.json"])
documents = results["documents"]

## Build Indexing pipeline

In [2]:
from src.jsonconverter import JsonToDocument

from haystack import Pipeline
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.writers import DocumentWriter


document_store = InMemoryDocumentStore()
converter = JsonToDocument(content_field="title", flatten_field=None, one_doc_per_row=True)
embedder = SentenceTransformersDocumentEmbedder()
writer = DocumentWriter(document_store=document_store)

indexing_pipeline = Pipeline()
indexing_pipeline.add_component("converter", converter)
indexing_pipeline.add_component("embedder", embedder)
indexing_pipeline.add_component("writer", writer)

indexing_pipeline.connect("converter", "embedder")
indexing_pipeline.connect("embedder", "writer")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
indexing_pipeline.run(data={"converter":{"sources": ["earthquakes.json"]}})

Converting JSON files to Documents: 100%|██████████| 1/1 [00:00<00:00,  2.03it/s]
Batches: 100%|██████████| 4/4 [00:05<00:00,  1.30s/it]


{'writer': {'documents_written': 100}}

In [None]:
document_store.filter_documents()[0].dataframe.columns

### Question and Answering pipeline

In [4]:

from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.generators import GPTGenerator

######## Complete this section #############
prompt_template = """\
Use the following context to answer the user's question. If the context provided doesn't answer the question - please respond with: "I don't know".

### CONTEXT
{% for doc in documents %}
  {% if doc.dataframe is not none and not doc.dataframe.empty %}
    {% for column in doc.dataframe.columns %}
      Column: {{ column }}
      Values: 
      {% for value in doc.dataframe[column] %}
        {{ value }}
      {% endfor %}
    {% endfor %}
  {% else %}
    {{ doc.content }}
  {% endif %}
{% endfor %}

### USER QUESTION
{{query}}


"""
prompt_builder = PromptBuilder(prompt_template)
############################################
query_embedder = SentenceTransformersTextEmbedder()
retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=2)
llm = GPTGenerator(api_key=open_ai_key)

In [5]:
pipeline = Pipeline()
pipeline.add_component(instance=query_embedder, name="query_embedder")
pipeline.add_component(instance=retriever, name="retriever")
pipeline.add_component(instance=prompt_builder, name="prompt_builder")
pipeline.add_component(instance=llm, name="llm")

pipeline.connect("query_embedder.embedding", "retriever.query_embedding")
pipeline.connect("retriever.documents", "prompt_builder.documents")
pipeline.connect("prompt_builder", "llm")

In [6]:
query = "Where did the earthquake happen?"
result = pipeline.run(data={"query_embedder": {"text": query}, "prompt_builder": {"query": query}})
print(result['llm']['replies'][0])

Batches: 100%|██████████| 1/1 [00:00<00:00,  3.47it/s]


The earthquake mentioned in the context happened 11 km east of Ocotillo, CA.


In [7]:
query = "What were the magnitudes of the earthquakes? Provide the date,location, and magnitude."
result = pipeline.run(data={"query_embedder": {"text": query}, "prompt_builder": {"query": query}})
print(result['llm']['replies'][0])

Batches: 100%|██████████| 1/1 [00:00<00:00,  3.22it/s]


The magnitudes of the earthquakes are as follows:

1. Date: 2023-10-16, Location: 28 km W of Humboldt Hill, CA, Magnitude: 4.02.

2. Date: 2023-12-01, Location: 11 km E of Ocotillo, CA, Magnitude: 4.77.
