# Step 1: convert from json to document

In [1]:
from langchain_community.document_loaders import JSONLoader

import json
from pathlib import Path
from pprint import pprint


file_path='firecrawl_output.json'
data = json.loads(Path(file_path).read_text())

In [2]:
pprint(data)

{'completed': 333,
 'creditsUsed': 333,
 'data': [{'markdown': '\n'
                       '\n'
                       '![The Bottom '
                       'Line](https://www.thebottomline.org.uk/wp-content/uploads/2016/02/tbl2-logo-on-line-in-the-rain.jpg)\n'
                       '\n'
                       'KETASED\n'
                       '\n'
                       '[29 October '
                       '2014](https://www.thebottomline.org.uk/summaries/icm/ketased/ '
                       '"6:00 pm")\n'
                       ' [Duncan '
                       'Chambler](https://www.thebottomline.org.uk/author/duncanchambler/ '
                       '"View all posts by Duncan Chambler") [Emergency '
                       'Medicine](https://www.thebottomline.org.uk/category/summaries/em/)\n'
                       ', [Intensive Care '
                       'Medicine](https://www.thebottomline.org.uk/category/summaries/icm/)\n'
                       ', [Peri-operative '
    

In [3]:
loader = JSONLoader(
    file_path='firecrawl_output.json',
    jq_schema='.data[].markdown',
    text_content=False)

data = loader.load()

In [4]:
data



# Step 2: split and chunk text

In [5]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import MarkdownTextSplitter

text_splitter = MarkdownTextSplitter(chunk_size=10000, chunk_overlap=2000)
splits = text_splitter.split_documents(data)
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectorstore = FAISS.from_documents(documents=splits, embedding=embedding_model)


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from tqdm.autonotebook import tqdm, trange


In [11]:
splits

 Document(metadata={'source': '/Users/lawt/Python/personal/scrape/firecrawl_output.json', 'seq_num': 4}, page_content='#### Metadata\n\nSummary author: [Segun Olusanya](https://twitter.com/iceman_ex)\n  \nSummary date: 19th May 2017  \nPeer-review editor: [Adrian Wong](https://twitter.com/avkwong)\n\n[Featured](https://www.thebottomline.org.uk/clinical-topics/featured/)\n[Microbiology](https://www.thebottomline.org.uk/clinical-topics/microbiology/)\n\n[![](https://www.thebottomline.org.uk/wp-content/uploads/2024/07/Bling-490x280.jpeg)](https://www.thebottomline.org.uk/summaries/icm/bling-iii/)\n \n\n[![](https://www.thebottomline.org.uk/wp-content/uploads/2024/07/Copy-of-OXYGEN-ICU-twitter-9-490x280.png)](https://www.thebottomline.org.uk/summaries/icm/preoxi-trial-noninvasive-ventilation-for-preoxygenation/)\n \n\n[![](https://www.thebottomline.org.uk/wp-content/uploads/2024/05/Enrich-490x280.jpg)](https://www.thebottomline.org.uk/summaries/icm/enrich/)\n \n\n### One comment\n\n*   Pin

# test with openai

In [6]:
import openai
import openai.cli
import os
# OpenAI API Key
api_key = os.environ.get("OPENAI_API_KEY")


In [10]:
import openai
from langchain.vectorstores import FAISS
from openai import OpenAI
# Set your OpenAI API key
openai.api_key = api_key

# Assuming your vectorstore is already set up
# Let's query FAISS to retrieve relevant documents based on the user's input
def retrieve_relevant_docs(query, vectorstore, k=10):
    # Vectorstore returns the most similar documents based on the query
    docs = vectorstore.similarity_search(query, k=k)
    # Concatenate the content of the retrieved documents
    context = "\n\n".join([doc.page_content for doc in docs])
    return context

client = OpenAI(api_key=api_key)

SYSTEM_PROMPT = '''
You are an expert medical evidence evaluator with a background in synthesizing research from peer-reviewed medical articles. Your task is to critically evaluate summaries of medical studies, synthesize the key findings, and provide accurate, evidence-based answers to the user’s queries based on these summaries. You are provided with a data set of articles about temperature management. You may only refer to the information provided in the summaries to answer the user's questions, specifically about temperature management. Do not answer questions about any other topic.
When engaging with the user, adhere to the following guidelines:

	1.	Focus on Evidence: Base all your responses solely on the summaries of medical articles provided. If the provided articles cannot be summarized to address the user's question, tell the user and do not respond further.
	2.	Structure: Begin your response with an overall answer to the users' question. Follow up with with synthesis of the evidence. Provide clear, concise responses. Use medical and scientific terminology when necessary; your users are medical professionals. 
	3.	Synthesis: When multiple articles are pertinent to the topic, synthesize the information to form a comprehensive response. Compare and contrast findings, identifying consensus and areas of disagreement. Report summaries in chronological order of when the study was published, to put the evidence into temporal context. Be brief - most studies can be summarized in two sentences.
	4.	Transparency:
	•	Always refer to the summarized articles as your source, providing titles and key publication details (e.g., authors, journal name, year) as given in the summaries.
	•	If the user requests further details, indicate that the source information is based on summarized data, not original articles.
	5.	Limitations of Evidence: Be transparent about the quality and depth of the summarized evidence. If the summaries indicate limitations (e.g., incomplete data, small sample sizes), clearly communicate this to the user. Avoid overstating the strength of evidence from summaries. 
	6.	Ethical Guidelines: Never offer medical advice or diagnoses. Your role is strictly to summarize and synthesize evidence from the summaries provided, not to replace professional healthcare guidance.
	7.	Responsibility: Ensure that any conclusions or recommendations you provide are clearly grounded in the information from the summaries. Avoid speculation unless prompted by the user, and always indicate if the response is based on incomplete or limited summaries.
	8.  Brevity: Keep your responses concise and to the point. Where multiple articles are summarized, emphasize the purpose and key findings. Remember to be brief.

Your goal is to assist users in understanding the current state of medical evidence as reflected in article summaries, helping them digest, compare and contrast similarities and differences current evidence trends.
'''


# Function to get a chat completion from OpenAI using ChatCompletion.create()
def get_chat_completion(query, context, model="gpt-4o", temperature=1.2, max_tokens=1000):
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": query},
            {"role": "assistant", "content": f"Here is the relevant context: {context}"}
        ],
        temperature=temperature,
        max_tokens=max_tokens
    )
    return response.choices[0].message.content

# Example usage:
# Assuming `vectorstore` is your FAISS object loaded with documents

user_query = "What are the major trials in TTM, and what did they find?"
context = retrieve_relevant_docs(user_query, vectorstore)

# Generate a chat completion using the context from FAISS
response = get_chat_completion(user_query, context)
print(response)

The major trials in Targeted Temperature Management (TTM) primarily involve three key studies: the TTM1 trial, the TTM2 trial, and the HYPERION trial. These studies investigated the effectiveness of different temperature management strategies in patients post-cardiac arrest.

### 1. **TTM1 Trial (Target Temperature Management at 33°C Versus 36°C after Cardiac Arrest):**
- **Publication Details:** Nielsen et al., New England Journal of Medicine, 2013.
- **Key Findings:**
  - **Design:** Randomized controlled trial comparing TTM at 33°C versus 36°C in 939 patients who had out-of-hospital cardiac arrest (OHCA).
  - **Outcomes:** There were no significant differences in all-cause mortality at 6 months or the rate of poor neurological outcomes between the 33°C and 36°C groups. Mortality was around 50% in both groups, and poor neurological outcomes were around 50%.
  - **Conclusion:** TTM at 33°C did not confer a survival advantage over TTM at 36°C, suggesting that maintaining a controlled t