In [2]:
%pip install -q -U google-generativeai

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown

import pandas as pd

def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from vertexai.preview.generative_models import GenerativeModel
from google.cloud import aiplatform
import vertexai

In [3]:
from prompt_utils import GENERATE_CONFIRMATIONAL_QUESTION, GENERATE_FACTOID_QUESTION, GENERATE_LIST_QUESTION, GENERATE_CASUAL_QUESTION, GENERATE_HYPOTHETICAL_QUESTION
import random

Run this in the local environment to set up google auth keys
```
gcloud auth login
gcloud config set project ${YOUR_GCP_PROJECT}
gcloud services enable aiplatform.googleapis.com
gcloud auth application-default login
```

In [10]:
"""
    Vertex API - gemini pro configuration
"""
model_name = 'gemini-1.0-pro'
project_id = 'inlpt-gen-ai-416111'
location = 'us-central1'
generation_config = {
    'max_output_tokens': 2048,
    'temperature': 0.2,
    'top_p': 0.85,
    'top_k': 40
}

In [5]:
vertexai.init(project=project_id, location=location)
model = GenerativeModel(model_name, generation_config=generation_config)

In [6]:
def load_doc() -> pd.DataFrame:
    """
    Load the extracted outputs 
    @param files: list of files to load
    @return: string of all docs concatenated
    """

    reader = pd.read_csv("./extracted_outputs.csv",)
    reader.fillna('', inplace=True)
    reader.columns = ["id", "doi", "authors","title", "abstract"]
    return reader

In [7]:

def generate_eval_set(full_data ,number_of_sets , question_types):
    """
        Based on the type of question, generate the question and the context that is
        being used to generate the question. 
    """
    random_integer = random.randint(1, 100)
    df_sample = full_data.sample(n=number_of_sets, random_state=random_integer) 
    evalSet = []
    for index, row in df_sample.iterrows():
        context = row['abstract']
        prompt = ""
        for q in question_types:
            createdSet = {}
            createdSet["context"] = context
            if(q=="CONFIRMATIONAL"):
                createdSet["type"] = "CONFIRMATIONAL"
                prompt = GENERATE_CONFIRMATIONAL_QUESTION.format(context=context)
            elif(q=="FACTOID"):
                createdSet["type"] = "FACTOID"
                prompt = GENERATE_FACTOID_QUESTION.format(context=context)
            elif(q=="LIST"):
                createdSet["type"] = "LIST"
                prompt = GENERATE_LIST_QUESTION.format(context=context)
            elif(q=="CASUAL"):
                createdSet["type"] = "CASUAL"
                prompt = GENERATE_CASUAL_QUESTION.format(context=context)
            elif(q=="HYPOTHETICAL"):
                createdSet["type"] = "HYPOTHETICAL"
                prompt = GENERATE_HYPOTHETICAL_QUESTION.format(context=context)
            response = model.generate_content([prompt])
            print(response)
            print(createdSet)
            createdSet["question"] = response.text
            evalSet.append(createdSet)
    return evalSet

        

In [13]:
pubmed_dataFrame = load_doc()
number_of_sets = 25
question_types = ["CONFIRMATIONAL", "FACTOID", "CASUAL"]
#question_types = ["CONFIRMATIONAL", "FACTOID", "LIST", "CASUAL", "HYPOTHETICAL"]

In [15]:
next_eval_set =  generate_eval_set(pubmed_dataFrame,number_of_sets,question_types )

candidates {
  content {
    role: "model"
    parts {
      text: "QUESTION: Did the first subject develop open set word recognition after the initial AI-assisted fitting approach?\nANSWER: No"
    }
  }
  finish_reason: STOP
  safety_ratings {
    category: HARM_CATEGORY_HATE_SPEECH
    probability: NEGLIGIBLE
  }
  safety_ratings {
    category: HARM_CATEGORY_DANGEROUS_CONTENT
    probability: NEGLIGIBLE
  }
  safety_ratings {
    category: HARM_CATEGORY_HARASSMENT
    probability: NEGLIGIBLE
  }
  safety_ratings {
    category: HARM_CATEGORY_SEXUALLY_EXPLICIT
    probability: NEGLIGIBLE
  }
}
usage_metadata {
  prompt_token_count: 331
  candidates_token_count: 24
  total_token_count: 355
}

{'context': "Objective: To assess whether CI programming by means of a software application using artificial intelligence (AI), FOX®, may improve cochlear implant (CI) performance. Patients: Two adult CI recipients who had mixed auditory results with their manual fitting were selected for an AI-

In [10]:
import json

In [81]:
with open('next_evalSet.json', 'w') as fp:
    json.dump(next_eval_set, fp)

In [70]:
def split_question_and_answer(input_string):
    # Check if 'ANSWER' is in the string
    if 'ANSWER' in input_string:
        split_keyword = 'ANSWER'
    # Check if 'CONTEXT' is in the string as an alternative to 'ANSWER'
    elif 'CONTEXT' in input_string:
        split_keyword = 'CONTEXT'

    # Split the string into question and answer (or context) parts
    parts = input_string.split(split_keyword)

    question = parts[0].replace('QUESTION', '').strip().lstrip(':').strip()
    answer_or_context = parts[1].strip().lstrip(':').strip()

    return question, answer_or_context

In [71]:
for e in next_eval_set:
    print(e["question"])
    question, answer_or_context = split_question_and_answer(e["question"])
    e["extractedQuestion"] = question
    e["extractedContext"] = answer_or_context

QUESTION: Did the first subject develop open set word recognition after the initial AI-assisted fitting approach?
ANSWER: No
**QUESTION :** When did the second subject develop good open set word recognition?

**CONTEXT :** The second subject, after 9 months of manual fitting, had developed good open set word recognition, but his scores remained poor at soft and loud presentation levels.
**CONTEXT :** The first subject hadn't developed open set word recognition even after 17 months CI experience and 19 manual fitting sessions.

**QUESTION :** Why did the first subject not develop open set word recognition after 17 months of CI experience and 19 manual fitting sessions?
QUESTION: Does the text suggest that ML algorithms are superior to traditional multivariate statistical models in donor selection for allogeneic hematopoietic stem cell transplantation?
ANSWER: Yes
**QUESTION :** Which type of ML model has been used to accurately identify skin segments affected with chronic GVHD?
**CONTEX

In [72]:
next_eval_set

[{'context': "Objective: To assess whether CI programming by means of a software application using artificial intelligence (AI), FOX®, may improve cochlear implant (CI) performance. Patients: Two adult CI recipients who had mixed auditory results with their manual fitting were selected for an AI-assisted fitting. Even after 17 months CI experience and 19 manual fitting sessions, the first subject hadn't developed open set word recognition. The second subject, after 9 months of manual fitting, had developed good open set word recognition, but his scores remained poor at soft and loud presentation levels. Main outcome measure(s): Cochlear implant fitting parameters, pure tone thresholds, bisyllabic word recognition, phonemic discrimination scores and loudness scaling curves. Results: For subject 1, a first approach trying to optimize the home maps by means of AI-proposed adaptations was not successful whereas a second approach based on the use of Automaps (an AI approach based on univers

In [46]:
%pip install sentence_transformers -q
%pip install langchain -q
%pip install langchain-openai -q
%pip install google-cloud-aiplatform -q
%pip install opensearch-py -q


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [47]:
import os

import pandas as pd
import torch
from tqdm import tqdm
import numpy as np

from opensearchpy import OpenSearch, helpers
from langchain.text_splitter import RecursiveCharacterTextSplitter

from sentence_transformers import SentenceTransformer

In [48]:
client = OpenSearch(
   hosts=["https://admin:2NCbjLJWWzIFw@ec2-34-207-194-37.compute-1.amazonaws.com:9200/"],
    http_compress=True,
    use_ssl=True,
    verify_certs=False,
    ssl_assert_hostname=False,
    ssl_show_warn=False,
)

In [61]:
INDEX_NAME = "inlpt-without-title-chunking"

In [50]:
model_miniLM = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [51]:
from vertexai.preview.generative_models import GenerativeModel
from google.cloud import aiplatform
import vertexai
# Initialise client
vertexai.init(project=project_id, location=location)
model = GenerativeModel(model_name, generation_config=generation_config)

model_name = 'gemini-1.0-pro'
project_id = 'my-first-project' # TODO: fill this
location = 'us-central1'
generation_config = {
    'max_output_tokens': 2048,
    'temperature': 0.2,
    'top_p': 0.85,
    'top_k': 40
}

# Prompt template
prompt = '''You are an expert on life sciences and biomedical topics.

Research Information:
{context}

- Use only the GIVEN research information above and answer the user question. Try to justify your answer using the research information and support with examples (attribute authors if present) from the context.
- Respond with the answer.
- If you cannot find the answer to the user question, ask user to rephrase or provide more context.

User question:
{question}

Response:
'''

In [66]:
def getResponse(question):
  # Create search query
  query = {
      "size": 5,
      "query": {"knn": {"embedding": {"vector": model_miniLM.encode(question), "k": 10}}},
      "_source": False,
      "fields": ["id","doi","authors", "text"],
  }
  results = client.search(body=query, index=INDEX_NAME)

  results = results['hits']['hits']

  context = ""
  for row in results:
    value = row['fields']['text'][0]
    context += value + "\n" + "- - - - - "*10 + "\n"
  
  #print(context)
  response = model.generate_content([prompt.format(
    context = context,
    question = question
  )])

  return response.text

In [69]:
getResponse("Does the text suggest that ML algorithms are superior to traditional multivariate statistical models in donor selection for allogeneic hematopoietic stem cell transplantation?")

'The text does not explicitly state whether ML algorithms are superior to traditional multivariate statistical models in donor selection for allogeneic hematopoietic stem cell transplantation. However, it does suggest that ML algorithms have the potential to be more accurate and comprehensive than traditional methods in guiding clinicians in choosing the optimal mobilization treatment for patients undergoing hematopoietic stem cell transplantation. The text also mentions that ML-based scoring models may be the basis for the development of "intelligent" mobilization algorithms. These statements suggest that ML algorithms have the potential to improve donor selection for allogeneic hematopoietic stem cell transplantation, but further research is needed to confirm their superiority over traditional methods.'

In [74]:
count = 0
for e in next_eval_set:
    print(count)
    count = count + 1
    e["generatedAnswer"] = getResponse(e["extractedQuestion"])

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74


In [75]:
next_eval_set

[{'context': "Objective: To assess whether CI programming by means of a software application using artificial intelligence (AI), FOX®, may improve cochlear implant (CI) performance. Patients: Two adult CI recipients who had mixed auditory results with their manual fitting were selected for an AI-assisted fitting. Even after 17 months CI experience and 19 manual fitting sessions, the first subject hadn't developed open set word recognition. The second subject, after 9 months of manual fitting, had developed good open set word recognition, but his scores remained poor at soft and loud presentation levels. Main outcome measure(s): Cochlear implant fitting parameters, pure tone thresholds, bisyllabic word recognition, phonemic discrimination scores and loudness scaling curves. Results: For subject 1, a first approach trying to optimize the home maps by means of AI-proposed adaptations was not successful whereas a second approach based on the use of Automaps (an AI approach based on univers

In [2]:
import json
with open("next_evalSet.json", 'r') as file:
    data_dict = json.load(file)

Evaluation of Model

In [3]:
actual_answers = []
generated_answers = []
for e in data_dict:
    actual_answers.append(e["extractedContext"])
    generated_answers.append(e["generatedAnswer"])

In [None]:
%pip install evaluate -q
%pip install rouge_score -q
%pip install bert_score -q


In [1]:
import evaluate

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
bleu = evaluate.load('bleu')
rouge = evaluate.load('rouge')
bertscore = evaluate.load("bertscore")


In [7]:

bleu_simple = bleu.compute(predictions=actual_answers, references=generated_answers)

print("Simple system:")
print(bleu_simple)


Simple system:
{'bleu': 0.042571035662597075, 'precisions': [0.22369065100342633, 0.12245934959349593, 0.10479666319082377, 0.0936830835117773], 'brevity_penalty': 0.3324314565328395, 'length_ratio': 0.4758909853249476, 'translation_length': 2043, 'reference_length': 4293}


In [8]:

rouge_simple = rouge.compute(predictions=actual_answers, references=generated_answers)

print("Simple system:")
print(rouge_simple)


Simple system:
{'rouge1': 0.1430428777208464, 'rouge2': 0.07445845869183862, 'rougeL': 0.12889294541204935, 'rougeLsum': 0.1307237120162058}


In [9]:
import numpy as np
### your code ###
bertscore_simple = bertscore.compute(predictions=actual_answers, references=generated_answers, lang="en")
bertscore_simple_averaged={}
bertscore_compressor_averaged={}
for key in bertscore_simple.keys():
  if key!='hashcode':
    bertscore_simple_averaged[key]=np.mean(bertscore_simple[key])

### your code ###
print("Simple system:")
print(bertscore_simple_averaged)

Simple system:
{'precision': 0.8417519887288412, 'recall': 0.8290904760360718, 'f1': 0.8344328316052755}
