# Populate the AI knowledge base



In [None]:
%pip install -qU pinecone-client langchain_community cohere PyPDF2

In [2]:
import pandas as pd
import cohere
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from pinecone import Pinecone, ServerlessSpec
import os
import getpass
from langchain.schema import Document
from langchain.embeddings import CohereEmbeddings
from langchain_community.vectorstores import Pinecone as Pinecone_Langchain
import random

In [3]:
index = 'dispatch-ai'
os.environ['PINECONE_API_KEY'] = pinecone_secret_key = getpass.getpass('Enter Pinecone secret key:')
cohere_secret_key = getpass.getpass('Enter Cohere secret key:')

In [4]:
pc = Pinecone(api_key=pinecone_secret_key)
if index not in pc.list_indexes().names():
  pc.create_index(
    name = index,
    dimension = 4096,
    metric = 'cosine',
    spec=ServerlessSpec(
      cloud="aws",
      region="us-west-2"
      )
  )
pc.describe_index(index)

PineconeConfigurationError: You haven't specified an Api-Key.

In [None]:
# Each document will have a size of 500 or less
character_text_splitter = CharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 200,
    length_function = len,
    is_separator_regex=False,
)
recursive_text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 200,
    length_function = len,
    is_separator_regex=False,
)

In [None]:
import re

# A function to transform the Google sheet url to a downloadable CSV url
def convert_google_sheet_url(url):
    """

    Args:
    - url (str): The Google Sheets URL to convert.

    Returns:
    - str: The converted URL pointing to a downloadable CSV.
    """

    # Regular expression pattern to match Google Sheets URL
    pattern = r'https://docs\.google\.com/spreadsheets/d/([a-zA-Z0-9-_]+)(/edit#gid=(\d+)|/edit.*)?'
    # Replacement function to convert the URL
    replacement = lambda m: f'https://docs.google.com/spreadsheets/d/{m.group(1)}/export?' + (f'gid={m.group(3)}&' if m.group(3) else '') + 'format=csv'
    new_url = re.sub(pattern, replacement, url)
    return new_url

In [None]:
# Fill the vector database with curated articles about different emergency types
sheet_url = 'https://docs.google.com/spreadsheets/d/1smCE64VgagNaTDAMeGqSRX61z70orrkd10kzvDjRf0c/edit?usp=sharing'
df = pd.read_csv(convert_google_sheet_url(sheet_url))
df.head()

Unnamed: 0,Type of emergency,Answer,Reference,Link
0,Cardiac Arrest,Cardiac arrest is when the heart stops pumping...,MSD manual,https://www.msdmanuals.com/home/heart-and-bloo...
1,Cardiac Arrest,First-Aid Treatment for Cardiac Arrest\nThe cr...,MSD manual,https://www.msdmanuals.com/home/heart-and-bloo...
2,Cardiac Arrest,The crucial links in the chain of survival inc...,MSD manual,https://www.msdmanuals.com/home/heart-and-bloo...
3,Cardiac Arrest,Automated External Defibrillator: Jump-Startin...,MSD manual,https://www.msdmanuals.com/home/heart-and-bloo...
4,Cardiac Arrest,Skill in CPR is best obtained through a traini...,MSD manual,https://www.msdmanuals.com/home/heart-and-bloo...


In [None]:
documents = []
for _, row in df.iterrows():
    chuncks = character_text_splitter.split_text(f"{row['Answer']}")
    for chunck in chuncks:
      documents.append(Document(
          page_content = chunck,
          metadata = {"Reference": f"{row['Reference']}", "Link": f"{row['Link']}"}))



In [None]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='dispatch-ai')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

In [None]:
retriever = vector_store.as_retriever()
matched_docs = retriever.get_relevant_documents('What is Cardiac Arrest?')
for i, d in enumerate(matched_docs):
    print(f"\n## Document {i}\n")
    print(d.page_content)
    print(d.metadata)


## Document 0

Cardiac arrest is when the heart stops pumping blood and oxygen to the brain and other organs and tissues. Sometimes a person can be revived after cardiac arrest, particularly if treatment is started immediately. However, the more time that passes without oxygen-containing blood being pumped to the brain, the less likely it is that the person can be revived, and, if revived, the more likely it is that the person will have brain damage.
{'Link': 'https://www.msdmanuals.com/home/heart-and-blood-vessel-disorders/cardiac-arrest-and-cpr/cardiac-arrest-and-cpr#Standard-CPR_v79738929', 'Reference': 'MSD manual'}

## Document 1

Cardiac arrest can be caused by anything that makes the heart stop beating. One common cause, especially in adults, is an abnormal heart rhythm (arrhythmia). Another possible cause is stopping breathing, such as when a person drowns or has a severe lung infection or severe asthma attack.

A person in cardiac arrest lies motionless and does not respond t

In [None]:
# Adding some useful books to the knowledge base
doc_reader = PdfReader('/content/Emergency Call Management - People at risk.pdf')

In [None]:
raw_text = ''
for _, page in enumerate(doc_reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text

In [None]:
chunks = recursive_text_splitter.split_text(raw_text)

In [None]:
docs = []
for chunk in chunks:
  docs.append(Document(
        page_content=chunk,
        metadata={"Reference": "Emergency call management: People at risk", "Link": "https://nfcc.org.uk/wp-content/uploads/2023/11/EmergencycallmanagementpeopleatriskConsultationdoc.pdf"},
  ))

In [None]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='dispatch-ai')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

In [None]:
doc_reader = PdfReader('/content/Emergency Medical Dispatch Procedure.pdf')

In [None]:
raw_text = ''
for _, page in enumerate(doc_reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text

In [None]:
docs = []
for chunk in chunks:
  docs.append(Document(
        page_content=chunk,
        metadata={"Reference": "Emergency Medical Dispatch Procedure", "Link": "http://ma911.org/_wp/wp-content/uploads/2015/06/EMD-Procedures-March-08.pdf"},
  ))

In [None]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='dispatch-ai')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

In [None]:
# Fill the vector database with simulations
sheet_url = 'https://docs.google.com/spreadsheets/d/1J8gwsPwp8gc2cue-S1eu9I_8pcTQ2oezXnE6RZWGhMg/edit?usp=sharing'
df = pd.read_csv(convert_google_sheet_url(sheet_url))
df.head()

Unnamed: 0,Type of emergency,scenario,Title,protocol steps,conversation,reference,link
0,Cardiac Arrest,A family member finds an elderly person uncon...,Elderly Cardiac Arrest Rescue,Protocol steps:\nCheck Responsiveness: Shake t...,"Dispatcher: ""911, what's your emergency?""\nCal...",Emergency Medical Dispatch Guide Cards,https://ia903008.us.archive.org/view_archive.p...
1,Cardiac Arrest,"A person collapses at a shopping mall, and a b...",Cardiac Arrest at Shopping Mall,Check Responsiveness: Ensure the person is unr...,"Dispatcher: ""911, what's your emergency?""\nCal...",Emergency Medical Dispatch Guide Cards,https://ia903008.us.archive.org/view_archive.p...
2,Cardiac Arrest,"A gym member collapses during a workout, and t...",Gym Member Cardiac Arrest,Check Responsiveness: Assess if the person is ...,"Dispatcher: ""911, what’s your emergency?""\nCal...",Emergency Medical Dispatch Guide Cards,https://ia903008.us.archive.org/view_archive.p...
3,Cardiac Arrest,"A student collapses in the classroom, and the...",Cardiac Arrest: Student Collapse in Classroom,Check Responsiveness: Verify if the student is...,"Dispatcher: ""911, what’s your emergency?""\nCal...",Emergency Medical Dispatch Guide Cards,https://ia903008.us.archive.org/view_archive.p...
4,Choking,An adult is experiencing a partial airway obst...,Partial Airway Obstruction: Distressed Adult C...,Confirm that the person is choking.\nEncourage...,"Dispatcher: ""911, what is your emergency?""\nCa...",Emergency Medical Dispatch Guide Cards,https://ia903008.us.archive.org/view_archive.p...


In [None]:
documents = []
for _, row in df.iterrows():
    scenario = f"""Scenario: {row['scenario']}
                  Protocol: {row['protocol steps']}
                  conversation: {row['conversation']}"""
    documents.append(Document(
          page_content = scenario,
          metadata = {"Emergency Type": f"{row['Type of emergency']}", "Title": f"{row['Title']}", "Type": "Scenario", "Reference": f"{row['reference']}", "Link": f"{row['link']}"}))

In [None]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='dispatch-ai')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

In [None]:
# Add protocols for the Q&A feature
documents = []
for _, row in df.iterrows():
    chuncks = character_text_splitter.split_text(f"""This is a protocol for this scenario: {row['scenario']}
                                                 The steps: {row['protocol steps']}""")
    for chunck in chuncks:
      documents.append(Document(
          page_content = chunck,
          metadata = {"Reference": f"{row['reference']}", "Link": f"{row['link']}"}))

In [None]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='dispatch-ai')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

  warn_deprecated(
