In [1]:
import requests
from dotenv import load_dotenv
import os
import sys
import os
sys.path.insert(0, os.path.abspath('..'))
import pandas as pd
load_dotenv('../.env')
rapidapi_key = os.getenv('X-RapidAPI-Key')
open_ai_key = os.getenv('OPENAI')


In [2]:
url = "https://everyearthquake.p.rapidapi.com/earthquakesByDate"

querystring = {"startDate":"2021-01-01",
               "endDate":"2023-12-10",
               "start":"1","count":"100",
               "type":"earthquake",
               "latitude":"33.962523",
               "longitude":"-118.3706975",
               "radius":"1000",
               "units":"miles"
               ,"magnitude":"3",
               "intensity":"1"}

headers = {
	"X-RapidAPI-Key": rapidapi_key,
	"X-RapidAPI-Host": "everyearthquake.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)


In [3]:
import re 
import json
data=response.json()['data']
for item in data:
    item['title'] = re.sub(r"\bM\b", "Earthquake with Magnitude", item['title'])


with open('earthquakes.json', 'w') as f:
    json.dump(data, f)

In [None]:
import json
import pandas as pd



In [None]:
from src.jsonconverter import JsonToDocument

converter = JsonToDocument(content_field="title")
results = converter.run(sources=["earthquakes.json"])
documents = results["documents"]

## Build Indexing pipeline

In [4]:
from src.jsonconverter import JsonToDocument

from haystack import Pipeline
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.writers import DocumentWriter


document_store = InMemoryDocumentStore()
converter = JsonToDocument(content_field="title", flatten_field=None, one_doc_per_row=True)
writer = DocumentWriter(document_store=document_store)

indexing_pipeline = Pipeline()
indexing_pipeline.add_component("converter", converter)
indexing_pipeline.add_component("writer", writer)

indexing_pipeline.connect("converter", "writer")


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
indexing_pipeline.run(data={"converter":{"sources": ["earthquakes.json"]}})

Converting JSON files to Documents: 100%|██████████| 1/1 [00:00<00:00, 103.15it/s]


{'writer': {'documents_written': 100}}

In [25]:
filters_exact_match = {
    "meta['magnitude']": {"$eq": '3.7'}
}
#document_store.filter_documents()

### Question and Answering pipeline

In [6]:
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.generators import GPTGenerator
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores import InMemoryDocumentStore

######## Complete this section #############
prompt_template = """\
Given these documents, answer the question.\nDocuments:
    {% for doc in documents %}
        {{ doc.content }}
    {% endfor %}

    \nQuestion: {{question}}
    \nAnswer:


"""
prompt_builder = PromptBuilder(prompt_template)
############################################
retriever = InMemoryBM25Retriever(document_store=document_store)
llm = GPTGenerator(api_key=open_ai_key,
                   model_name='gpt-4')

In [7]:
rag_pipeline = Pipeline()
rag_pipeline.add_component(instance=retriever, name="retriever")
rag_pipeline.add_component(instance=prompt_builder, name="prompt_builder")
rag_pipeline.add_component(instance=llm, name="llm")
rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")
rag_pipeline.connect("llm.replies", "answer_builder.replies")
rag_pipeline.connect("llm.metadata", "answer_builder.metadata")
rag_pipeline.connect("retriever", "answer_builder.documents")

In [8]:
question = "Where did the earthquake happen?"

result = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
        "answer_builder": {"query": question},
    }
)
                     


Ranking by BM25...: 100%|██████████| 100/100 [00:00<00:00, 179858.66 docs/s]


In [13]:
result['answer_builder']['answers'][0].data

'The earthquakes happened near the coast of Oregon, 7km NW of The Geysers, CA, Washington, Utah, Nevada, and western Texas.'

In [14]:
question = "Identify all entries containing information on earthquakes with a magnitude equal to 4 or higher? Provide the location and magnitude"

result = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
        "answer_builder": {"query": question},
    }
)

Ranking by BM25...: 100%|██████████| 100/100 [00:00<00:00, 128699.11 docs/s]


In [18]:
result['answer_builder']['answers'][0].data.split('\n')

['Earthquake with Magnitude 4.0 - 7 km NW of The Geysers, CA',
 '',
 'Earthquake with Magnitude 4.3 - Washington',
 '',
 'Earthquake with Magnitude 4.3 - Nevada',
 '',
 'Earthquake with Magnitude 4.0 - western Texas',
 '',
 'Earthquake with Magnitude 4.3 - western Texas']

In [16]:
import pandas as pd
with open('earthquakes.json', 'r') as f:
    data_r = json.load(f)

df = pd.DataFrame(data_r)

In [21]:
df[df['magnitude']=="4"]

Unnamed: 0,id,magnitude,type,title,date,time,updated,url,detailUrl,felt,...,location,continent,country,subnational,city,locality,postcode,what3words,timezone,locationDetails
4,tx2023xtoa,4,earthquake,Earthquake with Magnitude 4.0 - western Texas,2023-12-04T23:59:09,1701734349290,1701804532439,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,6,...,western Texas,North America,United States of America (the),Texas,Van Horn,Culberson County,,hasty.misty.private,-360,"[{'id': '9', 'wikidataId': 'Q49', 'name': 'Nor..."
26,tx2023vqus,4,earthquake,Earthquake with Magnitude 4.0 - 20 km N of Sta...,2023-11-05T00:49:52,1699145392013,1699216610576,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,10,...,"Stanton, Texas",North America,United States of America (the),Texas,Barstow-Pyote,Monahans,79777.0,interview.lanky.fried,-300,"[{'id': '89284', 'wikidataId': '', 'name': '48..."
77,us7000kr5e,4,earthquake,Earthquake with Magnitude 4.0 - 17 km SSW of W...,2023-08-26T08:38:44,1693039124726,1693064910286,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,20,...,"Weston, Colorado",North America,United States of America (the),New Mexico,Raton,Raton,87740.0,dealings.grove.candlelit,-360,"[{'id': '89689', 'wikidataId': '', 'name': '35..."
80,tx2023qlls,4,earthquake,Earthquake with Magnitude 4.0 - 57 km S of Whi...,2023-08-22T22:58:00,1692745080335,1692806049708,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,3,...,"Whites City, New Mexico",North America,United States of America (the),Texas,Van Horn,Culberson County,,storybook.bailouts.fertile,-300,"[{'id': '9', 'wikidataId': 'Q49', 'name': 'Nor..."
81,us7000kq7r,4,earthquake,Earthquake with Magnitude 4.0 - 59 km NW of To...,2023-08-22T22:58:00,1692745080231,1692746034517,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,0,...,"Toyah, Texas",North America,United States of America (the),Texas,Van Horn,Culberson County,,picnic.publications.rehearsed,-300,"[{'id': '9', 'wikidataId': 'Q49', 'name': 'Nor..."
