# ARTIDIS NLP Challenge A

In [58]:
import sys

# Check Python version. knowledge-graph-maker library requires >= 3.11.0 version.
if sys.version_info >= (3, 11):
    print(f"Python version is {sys.version}. You're running Python 3.11.0 or above.")
else:
    raise RuntimeError(f"Python 3.11.0 or above is required. You're running {sys.version}.")

Python version is 3.11.0 | packaged by conda-forge | (main, Oct 25 2022, 06:12:32) [MSC v.1929 64 bit (AMD64)]. You're running Python 3.11.0 or above.


In [6]:
!pip install sentence-transformers
!pip install tiktoken
!pip install knowledge-graph-maker
!pip install pyvis
!pip install pandas
!pip install matplotlib



In [60]:
import os
import re
import shutil
import subprocess
import datetime
import requests
import zipfile
import wget
import logging
import pkg_resources

import pandas as pd
import tiktoken
import torch
from sentence_transformers import SentenceTransformer, util
from IPython.core.interactiveshell import InteractiveShell
from dataclasses import dataclass
from pyvis.network import Network
import matplotlib.pyplot as plt
from knowledge_graph_maker import GraphMaker, Ontology, GroqClient, OpenAIClient, Document

InteractiveShell.ast_node_interactivity = "all"

# Adjust pandas options to display full content in the DataFrame
pd.set_option('display.max_colwidth', 20)
pd.set_option('display.max_rows', None)

# Initialize logger
logger = logging.getLogger(__name__)

# Save the key as an environment variable. Its free and gives enough token limit for this assignment. You can use any of the two API keys.
#os.environ['GROQ_API_KEY'] = "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"
os.environ['GROQ_API_KEY'] = "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"


  import pkg_resources


In [61]:
# Just for future reference.
def get_version(package_name):
    try:
        version = pkg_resources.get_distribution(package_name).version
        logger.info(f"{package_name} version: {version}")
    except pkg_resources.DistributionNotFound:
        logger.error(f"{package_name} is not installed.")

# List of packages to check
packages = [
    "sentence-transformers",
    "tiktoken",
    "knowledge-graph-maker",
    "pyvis",
    "pandas",
    "matplotlib"
]

# Check versions for each package
for package in packages:
    get_version(package)

INFO:__main__:sentence-transformers version: 3.1.1
INFO:__main__:tiktoken version: 0.7.0
INFO:__main__:knowledge-graph-maker version: 0.2.1
INFO:__main__:pyvis version: 0.3.2
INFO:__main__:pandas version: 2.2.3
INFO:__main__:matplotlib version: 3.9.2


# Utility class and methods

In [56]:
# This cell contains all utility classes and functions for the assignment!

class InputParams:
    def __init__(self):
        self.emb_model = 'paraphrase-MiniLM-L6-v2'  # Default model name
        self.url        = 'https://www.dropbox.com/scl/fi/sb5v4fmadwwp2e3czlsb0/MedicalNewsToday.zip?rlkey=aep3jn98vk49f2bwtmhsvqzlt&st=plg438k5&dl=0'
        self.query_cancer = "Usage of biomarkers in diagnosis of cancer."
        self.keywords_cancer = [
            'biomarker', 'biomarkers', 'tissue', 'biopsy'
        ]
        self.mandatory_keywords_cancer = [
            'cancer', 'biomarker'
        ]

        self.query_ml = "Usage of machine learning or artificial intelligence in health care."
        self.keywords_ml = [
            'machine learning', ' AI ', ' ML ', '(ai)', '(ml)',
            'automation', 'nlp', 'computer vision',
            'artificial intelligence', 'deep learning',
            'text processing', 'processing of text',
            'lab report', 'patient experience'
        ]
        self.mandatory_keywords_ml = [
            # Add any mandatory keywords for ML here
        ]

        # Groq models
        self.gen_model = "mixtral-8x7b-32768"
        # self.groq_model = "llama3-8b-8192"
        # self.groq_model = "llama3-70b-8192"
        # self.groq_model = "gemma-7b-it"

        # OpenAI models
        #self.gen_model = "gpt-3.5-turbo"

        # Set the LLM Client (Groq or OpenAI)
        self.gen_client = GroqClient(model=self.gen_model, temperature=0.0, top_p=0.95)

class FileOps:
    @staticmethod
    def download_and_extract_medical_news(input_params: InputParams):
        """Download and extract the Medical News Today zip file."""
        try:
            # Check if wget is available
            if shutil.which("wget") is not None:
                subprocess.run(['wget', input_params.url], check=True)

            else:
                # Use curl to download the file
                subprocess.run(['curl', '-L', '-o', 'MedicalNewsToday.zip', input_params.url], check=True)

            # Check if the file was downloaded
            if os.path.exists('MedicalNewsToday.zip'):
                # Unzip the file
                with zipfile.ZipFile('MedicalNewsToday.zip', 'r') as zip_ref:
                    zip_ref.extractall('./')
                logger.info("Download and extraction completed successfully.")
            else:
                logger.warning("No zip file found.")

        except subprocess.CalledProcessError as e:
            logger.error(f"Error during file download: {e}")
        except zipfile.BadZipFile:
            logger.error("Error: The downloaded file is not a valid zip file.")
        except Exception as e:
            logger.error(f"An unexpected error occurred: {e}")

class InputOps:
    def __init__(self, input_params: InputParams):
        self.input_params = input_params
        logger.info("Initialized InputOps with input parameters.")

    def create_input_dataframe_from_articles(self):
        """Create a DataFrame from articles in the './articles/' folder."""
        articles_folder = './articles/'
        data = []

        # Initialize the tokenizer (using GPT-2 encoding)
        tokenizer = tiktoken.get_encoding("gpt2")
        logger.info("Initialized GPT-2 tokenizer.")

        # Loop through each file in the articles folder
        for file_name in os.listdir(articles_folder):
            if file_name.endswith('.txt'):
                #logger.info(f"Processing file: {file_name}")
                # Full path to the text file
                file_path = os.path.join(articles_folder, file_name)
                try:
                    # Read the content of the file
                    with open(file_path, 'r', encoding='utf-8') as file:
                        content = file.read()
                        # Count tokens using tiktoken
                        token_count = len(tokenizer.encode(content))
                        # Append the file name (without path), content, and token count to the data list
                        data.append((file_name, content, token_count))
                        #logger.info(f"File {file_name} processed with {token_count} tokens.")
                except Exception as e:
                    logger.error(f"Error reading file {file_name}: {e}")

        # Create a pandas DataFrame from the data list
        df = pd.DataFrame(data, columns=['file_name', 'content', 'token_count'])

        # Display the DataFrame
        logger.info("Displaying the first few rows of the DataFrame.")
        display(df.head())

        # Gives an idea of the approximate number of tokens in an article.
        logger.info("Calculating token statistics.")
        display(df.sort_values('token_count', ascending=False).agg({'token_count': ['max', 'mean', 'std', 'min']}))

        return df



class ProcOps:
    def __init__(self, input_params: InputParams):
        self.input_params = input_params
        logger.info("Initialized ProcOps with input parameters.")

    def generate_doc_embeddings(self, df: pd.DataFrame):
        """Generate embeddings for documents in a DataFrame."""
        if 'content' not in df:
            logger.error("DataFrame must contain a 'content' column with documents.")
            raise ValueError("DataFrame must contain a 'content' column with documents.")
        
        logger.info("Loading the pre-trained model.")
        # Load the pre-trained model
        self.model = SentenceTransformer(self.input_params.emb_model)

        logger.info("Extracting document content and computing embeddings.")
        # Extract document content and compute embeddings
        docs = df['content'].tolist()
        embeddings = self.model.encode(docs, convert_to_tensor=True)
        logger.info("Document embeddings generated successfully.")
        return embeddings

    def find_keywords(self, text, keywords):
        #logger.info("Finding keywords in the provided text.")
        matched_keywords = []
        for keyword in keywords:
            if keyword in [" ML ", " AI "]:  # Case-sensitive match for "ml"
                if re.search(rf'\b{re.escape(keyword)}\b', text):
                    matched_keywords.append(keyword)
            else:  # Case-insensitive match for other keywords
                if re.search(rf'\b{re.escape(keyword)}\b', text, re.IGNORECASE):
                    matched_keywords.append(keyword)
        logger.debug(f"Matched keywords: {matched_keywords}")
        return list(set(matched_keywords))

    def process_df(self, df, doc_embeddings, query, keywords, mandatory_keywords):
        logger.info("Processing DataFrame with document embeddings.")
        # Encode the query using the model
        query_embedding = self.model.encode(query, convert_to_tensor=True)

        logger.info("Calculating cosine similarity between the query and documents.")
        # Calculate cosine similarity between the query and the documents
        similarities = util.pytorch_cos_sim(query_embedding, doc_embeddings)

        # Update the DataFrame with similarity scores and keyword matches
        df['similarity_score'] = similarities[0].tolist()
        df['matched_keywords'] = df['content'].apply(lambda text: self.find_keywords(text, keywords))
        df['unique_keyword_count'] = df['matched_keywords'].apply(len)

        logger.info("Counting occurrences of the word 'cancer' in the articles.")
        # Count the number of times the word 'cancer' is used in each article
        df['cancer_count'] = df['content'].str.lower().str.count('cancer')

        # Calculate average cancer keyword density
        df['avg_cancer_keyword'] = (df['cancer_count'] * 100 / df['token_count']).round(1)

        logger.info("Checking for mandatory keywords in each document.")
        # Check if all mandatory keywords are present in the document
        df['mandatory_keywords_present'] = df['content'].apply(lambda text: all(
            keyword.lower() in text.lower() for keyword in mandatory_keywords
        ))

        logger.info("Sorting the DataFrame by similarity score.")
        # Sort the DataFrame by similarity score
        df = df.sort_values(by='similarity_score', ascending=False).reset_index(drop=True)

        logger.info("Counting keyword occurrences across documents.")
        # Create a dictionary to track keyword occurrences across documents
        keyword_doc_count = {keyword: 0 for keyword in keywords}

        # Count how many documents each keyword appears in
        for index, row in df.iterrows():
            for keyword in row['matched_keywords']:
                keyword_doc_count[keyword] += 1

        # Create a DataFrame from the keyword_doc_count dictionary
        keyword_df = pd.DataFrame(list(keyword_doc_count.items()), columns=['keyword', 'document_count'])

        logger.info("Processing completed successfully.")
        return df, keyword_df

class KGOps:
    def __init__(self, ontology, website_name):
        self.ontology = ontology
        self.website_name = website_name
        logger.info("Initialized KGOps with ontology and website name.")

    def generate_color_map(self, ontology_labels):
        logger.info("Generating color map for ontology labels.")
        cmap = plt.get_cmap('tab10')  # Colormap for distinct colors
        colors = [cmap(i) for i in range(len(ontology_labels))]
        hex_colors = ['#%02x%02x%02x' % (int(r*255), int(g*255), int(b*255)) for r, g, b, _ in colors]
        logger.info("Color map generated successfully.")
        return {label: hex_color for label, hex_color in zip(ontology_labels, hex_colors)}

    def extract_labels_from_ontology(self):
        logger.info("Extracting labels from ontology.")
        labels = [list(label.keys())[0] for label in self.ontology.labels]
        logger.debug(f"Extracted labels: {labels}")
        return labels

    def plot_knowledge_graph(self, edges, notebook=False):
        logger.info("Plotting the knowledge graph.")
        # Extract labels from the ontology
        ontology_labels = self.extract_labels_from_ontology()

        net = Network(notebook=notebook)

        # Generate color map for ontology labels
        label_color_map = self.generate_color_map(ontology_labels)

        # Set a default color for non-ontology labels
        default_color = '#CCCCCC'

        # Add nodes and edges to the network
        for edge in edges:
            # Assign color based on the ontology label
            color_node_1 = label_color_map.get(edge.node_1.label, default_color)
            color_node_2 = label_color_map.get(edge.node_2.label, default_color)

            # Add nodes with color
            net.add_node(edge.node_1.name, label=edge.node_1.name, title=edge.node_1.label, color=color_node_1)
            net.add_node(edge.node_2.name, label=edge.node_2.name, title=edge.node_2.label, color=color_node_2)

            # Add edge with the relationship as the title
            net.add_edge(edge.node_1.name, edge.node_2.name, title=edge.relationship)

        # Create a legend for ontology labels
        legend_html = '<div style="position: absolute; bottom: 10px; left: 10px; background: white; padding: 10px; border: 1px solid black; z-index: 1000;">'
        legend_html += '<strong>Legend:</strong><br>'
        for label, color in label_color_map.items():
            legend_html += f'<div style="color: {color};">● {label}</div>'
        legend_html += '</div>'

        logger.info(f"Showing network graph and saving as {self.website_name}.")
        net.show(self.website_name)

        # Append the legend to the HTML file with UTF-8 encoding
        with open(self.website_name, "a", encoding="utf-8") as f:
            f.write(legend_html)
            logger.info("Legend appended to the HTML file.")


# Data ingestion

In [41]:
# Initialize the input parameters
input_params = InputParams()

In [14]:
# Download and extract files
FileOps.download_and_extract_medical_news(input_params)

INFO:__main__:Download and extraction completed successfully.


In [57]:
# Initialize InputOps
input_ops = InputOps(input_params)

# Create the DataFrame of articles
df_input = input_ops.create_input_dataframe_from_articles()

INFO:__main__:Initialized InputOps with input parameters.
INFO:__main__:Initialized GPT-2 tokenizer.
INFO:__main__:Displaying the first few rows of the DataFrame.


Unnamed: 0,file_name,content,token_count
0,1.txt,"A new study, pub...",520
1,10.txt,A woman consider...,1199
2,100.txt,Ventolin HFA is ...,9205
3,1000.txt,Candida auris is...,1261
4,1001.txt,New research sug...,892


INFO:__main__:Calculating token statistics.


Unnamed: 0,token_count
max,23317.0
mean,1485.38914
std,1330.357188
min,191.0


# Step 1 as defined in the document sent with the email (Skim articles to keep only relevant articles for the two Themes).

In [18]:
# Initialize ProcOps
proc_ops = ProcOps(input_params)

# Generate document embeddings of each article
doc_embeddings = proc_ops.generate_doc_embeddings(df_input)

INFO:__main__:Initialized ProcOps with input parameters.
INFO:__main__:Loading the pre-trained model.
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: paraphrase-MiniLM-L6-v2


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

INFO:__main__:Extracting document content and computing embeddings.


Batches:   0%|          | 0/63 [00:00<?, ?it/s]

INFO:__main__:Document embeddings generated successfully.


In [None]:
# Update input df for each of the two themes described in the document sent with the email. (For example, df_cancer contains the 4 metrics described in the document for theme 1 while df_keyword_cancer gives the number of articles having keywords.)
df_cancer, df_keyword_cancer = proc_ops.process_df(df_input, doc_embeddings, input_params.query_cancer, input_params.keywords_cancer, input_params.mandatory_keywords_cancer)
df_ml, df_keyword_ml = proc_ops.process_df(df_input, doc_embeddings, input_params.query_ml, input_params.keywords_ml, input_params.mandatory_keywords_ml)

INFO:__main__:Processing DataFrame with document embeddings.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Calculating cosine similarity between the query and documents.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Calculating cosine similarity between the query and documents.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding keywords in the provided text.
INFO:__main__:Finding 

In [63]:
df_keyword_cancer
df_cancer_skim = df_cancer.loc[#(df_cancer['cancer_count']>0)&
                               #(df_cancer['similarity_score']>0.00)|
                               #((df_cancer['unique_keyword_count']>0)&(df_cancer['avg_cancer_keyword']>3.0))&
                               (df_cancer['mandatory_keywords_present']==True)
                               ].reset_index(drop=True)

df_cancer_skim

#Input for knowledge graph of theme 1
df_cancer_list = df_cancer_skim['content'].tolist()

Unnamed: 0,keyword,document_count
0,biomarker,12
1,biomarkers,23
2,tissue,500
3,biopsy,108


Unnamed: 0,file_name,content,token_count,similarity_score,matched_keywords,unique_keyword_count,cancer_count,avg_cancer_keyword,mandatory_keywords_present
0,780.txt,New research use...,884,0.530929,[biomarker],1,9,1.0,True
1,1136.txt,Some types of ca...,1032,0.504091,[biomarkers],1,20,1.9,True
2,199.txt,It turns out som...,856,0.351578,[biomarkers],1,21,2.5,True
3,1246.txt,Imagine a future...,1865,0.348968,[biomarkers],1,14,0.8,True
4,1839.txt,Researchers have...,738,0.330005,[biomarker],1,19,2.6,True
5,1341.txt,A new pilot stud...,941,0.325977,"[biomarker, biop...",3,14,1.5,True
6,1221.txt,"Nilotinib, a dru...",1423,0.306921,"[tissue, biomark...",2,1,0.1,True
7,772.txt,Scientists have ...,1009,0.270634,[biomarker],1,10,1.0,True
8,75.txt,The results of s...,848,0.230521,[biomarkers],1,1,0.1,True
9,1456.txt,A new study sugg...,633,0.128483,[biomarkers],1,2,0.3,True


In [64]:
df_keyword_ml
df_ml_skim = df_ml.loc[#(df_ml['cancer_count']>0)&
                       #(df_ml['similarity_score']>0.00)|
                       (df_ml['unique_keyword_count']>0)
                       #&
                       #(df_ml['avg_cancer_keyword']>3.0))&
                       #(df_ml['mandatory_keywords_present']==True)
                       ].reset_index(drop=True)

df_ml_skim

#Input for knowledge graph of theme 1
df_ml_list = df_ml_skim['content'].tolist()

Unnamed: 0,keyword,document_count
0,machine learning,11
1,AI,6
2,ML,0
3,(ai),0
4,(ml),0
5,automation,0
6,nlp,0
7,computer vision,1
8,artificial intel...,12
9,deep learning,2


Unnamed: 0,file_name,content,token_count,similarity_score,matched_keywords,unique_keyword_count,cancer_count,avg_cancer_keyword,mandatory_keywords_present
0,1307.txt,New research tha...,813,0.670913,"[deep learning, ...",4,1,0.1,True
1,893.txt,The first system...,918,0.60995,"[deep learning, ...",3,1,0.1,True
2,767.txt,Researchers have...,806,0.48637,[artificial inte...,2,0,0.0,True
3,1245.txt,"For some, the wo...",1115,0.412014,[machine learning],1,0,0.0,True
4,888.txt,New research sho...,872,0.375085,[artificial inte...,2,0,0.0,True
5,1537.txt,People’s languag...,1284,0.370376,[machine learnin...,2,0,0.0,True
6,982.txt,Scientists have ...,1124,0.358907,[machine learnin...,3,0,0.0,True
7,908.txt,Effective commun...,1428,0.339175,[patient experie...,1,0,0.0,True
8,615.txt,A new study has ...,814,0.286948,[machine learnin...,2,16,2.0,True
9,1248.txt,Scientists have ...,1071,0.237351,[machine learning],1,1,0.1,True


# Step 2 as defined in the document sent with the email (Make Knowledge Graph for the two Themes)

In [53]:
ontology_cancer = Ontology(
    labels=[
        {"Cancer Type": "Include only specific type of cancer, for example, lymphocytic leukemia, breast cancer, etc. Do not include other diseases like diabetes, etc which are not cancer."},
        {"Cancer": "Include when cancer is addressed in general without specification of any specific type of cancer such as breast cancer, leukemia, etc."},
        {"Biomarker for Cancer Diagnosis": "Include only when using biomarkers for cancer diagnosis, otherwise do not include it"},
        {"Tissue used for cancer diagnosis": "Include only when using any tissue in cancer diagnosis. If the tissue is not involved in cancer diagnosis, do NOT include it. Tissue should be related to humans."},
        {"Tissue affected by cancer": "Include only when human tissue is affected during cancer, otherwise do not include it"},
        {"Cancer Diagnosis Method": "Include only when a particular method is used for cancer diagnosis, otherwise do not include it"},
        {"Cancer Diagnosis Device/Tool": "Include only when a device is used for cancer diagnosis, otherwise do not include it"},
        {"Cancer Diagnosis Technology": "Include only when a particular technology is used for cancer diagnosis, otherwise do not include it"},
        {"Drugs": "Include only when using drugs for cancer diagnosis, otherwise do not include it"},
        {"New Research of Cancer Diagnosis Method": "Include only when a new method or research is used/developed for cancer diagnosis. Do not include methods which not used in cancer diagnosis."}
    ],
    relationships=[
        "Relation between any pair of Entities present in the text",
        ],
)

In [54]:
current_time = str(datetime.datetime.now())

graph_maker_cancer = GraphMaker(ontology=ontology_cancer, llm_client=input_params.gen_client, verbose=False)

docs_cancer = map(
    lambda t: Document(text=t, metadata={'generated_at': current_time}),
    df_cancer_list
)

graph_cancer = graph_maker_cancer.from_documents(
    list(docs_cancer),
    delay_s_between=15 ## delay_s_between because otherwise groq api maxes out pretty fast.
    )

print("Total number of Edges", len(graph_cancer))

kg_cancer = KGOps(ontology_cancer, "knowledge_graph_cancer_biomarkers.html")
kg_cancer.plot_knowledge_graph(graph_cancer, notebook=True)

[92m[39m
[92m▶︎ GRAPH MAKER LOG - 2024-09-23 07:54:07 - INFO [39m
[92mDocument: 1[39m
[92m[39m
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:54:07 - INFO [39m
[34mUsing Ontology:
labels=[{'Cancer Type': 'Include only specific type of cancer, for example, lymphocytic leukemia, breast cancer, etc. Do not include other diseases like diabetes, etc which are not cancer.'}, {'Cancer': 'Include when cancer is addressed in general without specification of any specific type of cancer such as breast cancer, leukemia, etc.'}, {'Biomarker for Cancer Diagnosis': 'Include only when using biomarkers for cancer diagnosis, otherwise do not include it'}, {'Tissue used for cancer diagnosis': 'Include only when using any tissue in cancer diagnosis. If the tissue is not involved in cancer diagnosis, do NOT include it. Tissue should be related to humans.'}, {'Tissue affected by cancer': 'Include only when human tissue is affected during cancer, otherwise do not include it'}, {'Cancer Diag

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:54:08 - INFO [39m
[34mLLM Response:
[
 {"node_1": {"label": "Cancer Diagnosis Method", "name": "new research using nanosensors to detect protein-to-protein interactions"},
 "node_2": {"label": "Biomarker for Cancer Diagnosis", "name": "protein-to-protein interactions"},
 "relationship": "The new research uses nanosensors to detect protein-to-protein interactions which are being used as a biomarker for cancer diagnosis."},

{"node_1": {"label": "Cancer Diagnosis Method", "name": "new research using nanosensors to detect protein-to-protein interactions"},
 "node_2": {"label": "Cancer Type", "name": "lymphocytic leukemia"},
 "relationship": "The new research using nanosensors to detect protein-to-protein interactions is particularly helpful for detecting lymphocytic leukemia, a form of cancer that starts in the bone marrow and spreads int

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:54:25 - INFO [39m
[34mLLM Response:
[
 {"node_1": {"label": "Cancer Type", "name": "ovarian cancer"},
 "node_2": {"label": "Cancer Diagnosis Device/Tool", "name": "3-D-nanopatterned microfluidic chip"},
 "relationship": "The 3-D-nanopatterned microfluidic chip is a device used for diagnosing ovarian cancer."},

 {"node_1": {"label": "Cancer Diagnosis Device/Tool", "name": "3-D-nanopatterned microfluidic chip"},
 "node_2": {"label": "Tissue used for cancer diagnosis", "name": "plasma"},
 "relationship": "The 3-D-nanopatterned microfluidic chip uses plasma, a component of blood, for diagnosing cancer."},

 {"node_1": {"label": "Cancer", "name": "notoriously hard to detect cancers"},
 "node_2": {"label": "Cancer Type", "name": "ovarian cancer"},
 "relationship": "Ovarian cancer is an example of a hard-to-detect cancer."},

 {"node_1": {"l

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:54:45 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Drugs", "name": "approved, safe drugs"},
"node_2": {"label": "Cancer", "name": "anticancer properties"},
"relationship": "Some approved, safe drugs have unexpected anticancer properties."},

{"node_1": {"label": "Drug", "name": "a drug developed to treat one condition"},
"node_2": {"label": "Cancer", "name": "value in treating another"},
"relationship": "Occasionally, researchers find that a drug developed to treat one condition has unexpected value in treating another."},

{"node_1": {"label": "Drug", "name": "pain reliever aspirin"},
"node_2": {"label": "Cancer", "name": "prevents cardiovascular disease"},
"relationship": "A well-known case in point is the pain reliever aspirin, which turns out to be of use in preventing cardiovascular disease."},

{"node_1": {"label": "Rese

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 29.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:55:34 - INFO [39m
[34mLLM Response:
[
   {
       "node_1": {
           "label": "Cancer",
           "name": "cancer"
       },
       "node_2": {
           "label": "Biomarker for Cancer Diagnosis",
           "name": "genetic mutations that drive specific cancers, biomarkers that predict disease risk or how well a person will respond to a particular treatment"
       },
       "relationship": "Biomarkers, such as genetic mutations that drive specific cancers and biomarkers that predict disease risk or how well a person will respond to a particular treatment, are used in cancer diagnosis."
   },
   {
       "node_1": {
           "l

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:55:51 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Cancer", "name": "cancer"},
"node_2": {"label": "Drugs", "name": "cancer drugs"},
"relationship": "The text mentions that cancer becomes resistant to certain cancer drugs."},

{"node_1": {"label": "Cancer", "name": "cancer"},
"node_2": {"label": "Cancer Diagnosis Method", "name": "chemotherapy"},
"relationship": "The text mentions that cancers will often respond to chemotherapy drugs initially."},

{"node_1": {"label": "Cancer", "name": "cancer"},
"node_2": {"label": "Cancer Diagnosis Device/Tool", "name": "treatments"},
"relationship": "The text mentions that treatments are used for cancer."},

{"node_1": {"label": "Cancer", "name": "cancer"},
"node_2": {"label": "Biomarker for Cancer Diagnosis", "name": "macropinocytosis"},
"relationship": "The text describes macropinocytosi

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:56:08 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Cancer", "name": "Prostate cancer"},
 "node_2": {"label": "Cancer Diagnosis Method", "name": "Digital rectal exams"},
 "relationship": "Prostate cancer is commonly diagnosed using digital rectal exams."},
{"node_1": {"label": "Cancer", "name": "Prostate cancer"},
 "node_2": {"label": "Biomarker for Cancer Diagnosis", "name": "PSA (prostate-specific antigen)"},
 "relationship": "PSA is a biomarker used for the diagnosis of prostate cancer."},
{"node_1": {"label": "Biomarker for Cancer Diagnosis", "name": "PSA (prostate-specific antigen)"},
 "node_2": {"label": "Cancer Diagnosis Device/Tool", "name": "Blood tests"},
 "relationship": "PSA blood tests are used to measure the levels of PSA, a biomarker for prostate cancer diagnosis."},
{"node_1": {"label": "Cancer", "name": "Prosta

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:56:28 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Drugs", "name": "Nilotinib"},
"node_2": {"label": "Cancer Type", "name": "Leukemia"},
"relationship": "Nilotinib is a drug that regulators have approved for the treatment of leukemia."},

{"node_1": {"label": "Drugs", "name": "Nilotinib"},
"node_2": {"label": "Parkinson's disease", "name": "Parkinson's disease"},
"relationship": "Nilotinib has shown promise in a clinical trial of people with Parkinson's disease."},

{"node_1": {"label": "Clinical Trial", "name": "Clinical trial"},
"node_2": {"label": "Nilotinib", "name": "Nilotinib"},
"relationship": "The clinical trial assessed the safety, tolerability, and behavior of nilotinib in people with Parkinson's disease."},

{"node_1": {"label": "Clinical Trial", "name": "Clinical trial"},
"node_2": {"label": "Biomarker for Cancer D

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:56:47 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Cancer Type", "name": "acute myeloid leukemia (AML)"},
"node_2": {"label": "Cancer", "name": "AML"},
"relationship": "AML is a specific type of cancer."},

{"node_1": {"label": "Cancer Type", "name": "acute myeloid leukemia (AML)"},
"node_2": {"label": "Tissue affected by cancer", "name": "bone marrow"},
"relationship": "AML starts in bone marrow."},

{"node_1": {"label": "Cancer Type", "name": "acute myeloid leukemia (AML)"},
"node_2": {"label": "Tissue affected by cancer", "name": "bloodstream"},
"relationship": "AML soon spreads into the bloodstream."},

{"node_1": {"label": "Cancer Type", "name": "acute myeloid leukemia (AML)"},
"node_2": {"label": "Tissue affected by cancer", "name": "liver, spleen, lymph system, testicles, brain, and spinal cord"},
"relationship": "In so

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:57:03 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Cancer", "name": "N/A"},
"node_2": {"label": "New Research of Cancer Diagnosis Method", "name": "New research on e-cigarettes' health effects"},
"relationship": "The new research is a study on the health effects of e-cigarettes, but it is not specifically about cancer diagnosis methods."},

{"node_1": {"label": "Cancer", "name": "N/A"},
"node_2": {"label": "Drugs", "name": "Nicotine"},
"relationship": "The new research exposes mice to e-cigarettes with and without nicotine, indicating that nicotine is a drug being studied."},

{"node_1": {"label": "Cancer", "name": "N/A"},
"node_2": {"label": "Device/Tool for Cancer Diagnosis", "name": "Electronic Nicotine Delivery Systems (ENDS), e-cigarettes"},
"relationship": "The new research uses Electronic Nicotine Delivery Systems (ENDS

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:57:23 - INFO [39m
[34mLLM Response:
[
 {
 "node_1": {"label": "Cancer", "name": "cancer"},
 "node_2": {"label": "New Research of Cancer Diagnosis Method", "name": "new research of cancer diagnosis method"},
 "relationship": "The text mentions a new study about the potential risks of a gluten-free diet, which is relevant to cancer diagnosis and treatment, as it is a diet followed by people with celiac disease, an autoimmune condition where gluten intake leads to intestinal damage. However, the text does not provide specific details about the new research being conducted on cancer diagnosis methods."
 },
 {
 "node_1": {"label": "Gluten-free diet", "name": "gluten-free diet"},
 "node_2": {"label": "Tissue used for cancer diagnosis", "name": "tissue used for cancer diagnosis"},
 "relationship": "The text mentions that a gluten-free diet ex

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 54.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:58:35 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Cancer", "name": "Cancer" },
"node_2": {"label": "Cancer Diagnosis Method", "name": "Immune reaction triggered by the body" },
"relationship": "Cancer diagnosis can be done by observing the immune reaction triggered by the body against cancer."},
{"node_1": {"label": "Cancer", "name": "Cancer" },
"node_2": {"label": "Tissue affected by cancer", "name": "Tissue affected by cancer" },
"relationship": "Cancer is a condition where the body's own cells or tissues are perceived as harmful, leading to an immune reaction."},
{"node_1": {"label": "Biomarker for Cancer Diagnosis", "name"

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 14.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:59:06 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Cancer Diagnosis Device/Tool", "name": "smart toilet"},
"node_2": {"label": "Cancer Type", "name": "prostate cancer"},
"relationship": "The smart toilet has an algorithm that can detect abnormal urine flow rate, stream time, and volume which could be useful for flagging prostate problems in men."},

{"node_1": {"label": "Cancer Diagnosis Method", "name": "urinalysis strips"},
"node_2": {"label": "Biomarker for Cancer Diagnosis", "name": "biomarkers"},

{"node_1": {"label": "Cancer Diagnosis Device/Tool", "name": "smart toilet"},
"node_2": {"label": "Tissue affected by cancer", 

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 16.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:59:42 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Cancer", "name": "cancer"
}
,"node_2": {"label": "Biomarker for Cancer Diagnosis", "name": "chlorogenic acids"
}
,"relationship": "Chlorogenic acids, a group of antioxidant compounds found in green coffee bean extract, are believed to be responsible for potential health benefits such as improving blood pressure and cholesterol levels, which may be associated with cancer prevention or treatment."
},
{"node_1": {"label": "Cancer", "name": "cancer"
}
,"node_2": {"label": "Biomarker for Cancer Diagnosis", "name": "caffeine"
}
,"relationship": "Caffeine, found in green coffee bean e

Total number of Edges 216
knowledge_graph_cancer_biomarkers.html


In [48]:
ontology_ml = Ontology(
    labels=[
        {"Cancer Type": "Specific type of cancer, for example, leukemia, breast cancer, etc. Do not include any other disease except cancer."},
        {"Cancer": "Include only when cancer is addressed in general without specifying any particular type of cancer such as breast cancer, leukemia, etc. Do not include any other disease except cancer."},
        {"Technology/Tool which uses ML": "Include only when a technology uses machine learning, otherwise do not include it."},
        {"Machine Learning method used": "Include only when machine learning method is used to solve a problem or identify a disease, otherwise do not include it."},
        {"Healthcare problem solved by ML": "Include only when a problem is solved by machine learning. Do not add a problem if it is not solved by machine learning, otherwise do not include it."},
        {"Patient problem solved by ML": "Include only when a patient problem which is solved by machine learning like delays in getting the report, time taken was longer previously, quick diagnosis, etc"},
        {"Cancer diagnosed by ML": "Include only when a cancer is diagnosed by machine learning, otherwise do not include it."},
        {"Disease diagnosed by ML": "Include only when a disease other than cancer is diagnosed by machine learning, otherwise do not include it."},
    ],
    relationships=[
        "Relation between any pair of Entities",
        ],
)

In [49]:
current_time = str(datetime.datetime.now())

graph_maker_ml = GraphMaker(ontology=ontology_ml, llm_client=input_params.gen_client, verbose=False)

docs_ml = map(
    lambda t: Document(text=t, metadata={'generated_at': current_time}),
    df_ml_list
)

graph_ml = graph_maker_ml.from_documents(
    list(docs_ml),
    delay_s_between=15 ## delay_s_between because otherwise groq api maxes out pretty fast.
    )

print("Total number of Edges", len(graph_ml))

kg_ml = KGOps(ontology_ml, "knowledge_graph_healthcare_ml.html")
kg_ml.plot_knowledge_graph(graph_ml, notebook=True)

[92m[39m
[92m▶︎ GRAPH MAKER LOG - 2024-09-23 07:38:51 - INFO [39m
[92mDocument: 1[39m
[92m[39m
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:38:51 - INFO [39m
[34mUsing Ontology:
labels=[{'Cancer Type': 'Specific type of cancer, for example, leukemia, breast cancer, etc. Do not include any other disease except cancer.'}, {'Cancer': 'Include only when cancer is addressed in general without specifying any particular type of cancer such as breast cancer, leukemia, etc. Do not include any other disease except cancer.'}, {'Technology/Tool which uses ML': 'Include only when a technology uses machine learning, otherwise do not include it.'}, {'Machine Learning method used': 'Include only when machine learning method is used to solve a problem or identify a disease, otherwise do not include it.'}, {'Healthcare problem solved by ML': 'Include only when a problem is solved by machine learning. Do not add a problem if it is not solved by machine learning, otherwise do not inclu

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:38:53 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Machine Learning method used", "name": "deep learning algorithms"},
"node_2": {"label": "Healthcare problem solved by ML", "name": "predict the onset of Alzheimer’s disease"},
"relationship": "The input text mentions that deep learning algorithms can accurately predict the onset of Alzheimer’s disease as early as 6 years in advance."},

{"node_1": {"label": "Machine Learning method used", "name": "random forest, deep learning"},
"node_2": {"label": "Healthcare problem solved by ML", "name": "predict premature death due to chronic disease"},
"relationship": "The input text states that researchers developed a system of learning algorithms using two models called 'random forest' and 'deep learning' to predict the risk of premature death due to chronic disease."},

{"node_1": {"la

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:39:09 - INFO [39m
[34mLLM Response:
[
   {
       "node_1": {
           "label": "Disease diagnosed by ML",
           "name": "disease"
       },
       "node_2": {
           "label": "Machine Learning method used",
           "name": "deep learning algorithms"
       },
       "relationship": "The text mentions that deep learning algorithms were used to diagnose a disease."
   },
   {
       "node_1": {
           "label": "Healthcare problem solved by ML",
           "name": "diagnostic effectiveness"
       },
       "node_2": {
           "label": "Machine Learning method used",
           "name": "deep learning algorithms"
       },
       "relationship": "The text states that deep learning algorithms were used to examine the diagnostic effectiveness of AI."
   },
   {
       "node_1": {
           "label": "Machine Learning me

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:39:26 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Healthcare problem solved by ML", "name": "Mental health issue screening"},
"node_2": {"label": "Technology/Tool which uses ML", "name": "Interactive voice recognition technology"},
"relationship": "The technology uses machine learning to screen for changes in mental health by monitoring the mental health of participants for up to 14 months using a specially designed application that the participants were able to access by calling a toll-free number."},

{"node_1": {"label": "Technology/Tool which uses ML", "name": "Interactive voice recognition technology"},
"node_2": {"label": "Machine Learning method used", "name": "Keyword and voice pattern analysis"},
"relationship": "The technology uses machine learning to identify and analyze keywords and voice patterns for each person 

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:39:43 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Healthcare problem solved by ML", "name": "Diagnosis of mental and physical conditions"},
"node_2": {"label": "Technology/Tool which uses ML", "name": "Smart speakers"},
"relationship": "Smart speakers may soon be able to diagnose our mental and physical conditions using machine learning."},

{"node_1": {"label": "Healthcare problem solved by ML", "name": "Understanding a person's emotional state"},
"node_2": {"label": "Technology/Tool which uses ML", "name": "Smart speakers"},
"relationship": "Smart speakers can potentially predict a person's emotional response by analyzing skin conductance and pupillary dilation."},

{"node_1": {"label": "Healthcare problem solved by ML", "name": "Tracking cognitive processes"},
"node_2": {"label": "Machine Learning method used", "name": "Pu

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 1.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:40:00 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Disease diagnosed by ML", "name": "Hypoglycemia"},
"node_2": {"label": "Machine Learning method used", "name": "AI system"},
"relationship": "The AI system is used to automatically detect hypoglycemia."},

{"node_1": {"label": "Healthcare problem solved by ML", "name": "Difficulty in measuring glucose levels"},
"node_2": {"label": "Patient problem solved by ML", "name": "Inconvenience in measuring glucose levels"},
"relationship": "The AI system is designed to improve treatments for specific health issues related to measuring glucose levels, making it easier for patients."},

{"

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 15.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:40:32 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Healthcare problem solved by ML", "name": "Prediction of future risk of developing psychosis"},
"node_2": {"label": "Machine Learning method used", "name": "Machine learning technique"},
"relationship": "A machine-learning technique was used to analyze language in a group of at-risk young people and predict which individuals would go on to develop psychosis with an accuracy of 93%."},

{"node_1": {"label": "Machine Learning method used", "name": "Machine learning technique"},
"node_2": {"label": "Technology/Tool which uses ML", "name": "Machine learning system"},
"relationship"

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 13.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:41:03 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Healthcare problem solved by ML", "name": "Depression medication response prediction"},
"node_2": {"label": "Machine Learning method used", "name": "Artificial Intelligence (AI)"},
"relationship": "The text mentions the use of AI in predicting how well a person with depression will respond to medication."},

{"node_1": {"label": "Healthcare problem solved by ML", "name": "Depression medication response prediction"},
"node_2": {"label": "Patient problem solved by ML", "name": "Delays in getting the report, time taken was longer previously, quick diagnosis, etc."},
"relationship"

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 17.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:41:37 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Healthcare problem solved by ML", "name": "Lack of patient knowledge about chronic disease complications"},
"node_2": {"label": "Patient problem solved by ML", "name": "Patient misconceptions"},
"relationship": "The lack of patient knowledge about chronic disease complications can lead to patient misconceptions, which can be addressed by effectively communicating the right information at the appropriate time using machine learning."},

{"node_1": {"label": "Healthcare problem solved by ML", "name": "Lack of patient knowledge about chronic disease complications"},
"node_2": {"la

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 10.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:42:04 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Cancer", "name": "brain cancer"},
"node_2": {"label": "Healthcare problem solved by ML", "name": "early detection of brain cancer"},
"relationship": "The new blood test accurately detects brain cancer, helping in early detection and diagnosis."},

{"node_1": {"label": "Patient problem solved by ML", "name": "delays in getting the report"},
"node_2": {"label": "Healthcare problem solved by ML", "name": "early detection of brain cancer"},
"relationship": "The new blood test reduces delays in getting the report, enabling quicker diagnosis of brain cancer."},

{"node_1": {"label": 

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 12.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:42:33 - INFO [39m
[34mLLM Response:
[
   {
       "node_1": {
           "label": "Healthcare problem solved by ML",
           "name": "obesity"
       },
       "node_2": {
           "label": "Technology/Tool which uses ML",
           "name": "SlipBuddy"
       },
       "relationship": "SlipBuddy is a technology that uses machine learning to solve the problem of obesity by providing personalized interventions to prevent overeating."
   },
   {
       "node_1": {
           "label": "Machine Learning method used",
           "name": "machine learning algorithms"
       },
       "node_2": {
           "label": "Machine Learning meth

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 6.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:42:55 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Healthcare problem solved by ML", "name": "Assessing empathy"},
"node_2": {"label": "Technology/Tool which uses ML", "name": "Machine learning algorithms"},
"relationship": "The text mentions the use of machine learning algorithms to predict a person's empathetic disposition, even when not directly engaged in a task involving empathy, in order to assess empathy."},
{"node_1": {"label": "Healthcare problem solved by ML", "name": "Assessing empathy"},
"node_2": {"label": "Patient problem solved by ML", "name": "Improving empathy in patients with autism spectrum disorder"},
"relati

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 6.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:43:19 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Healthcare problem solved by ML", "name": "Noninvasive, high-fidelity interface to control a robotic arm"},
"node_2": {"label": "Technology/Tool which uses ML", "name": "Noninvasive brain-computer interface"},
"relationship": "The noninvasive, high-fidelity interface to control a robotic arm is a healthcare problem solved by machine learning, and it is achieved using a noninvasive brain-computer interface, a technology that uses machine learning."},

{"node_1": {"label": "Machine Learning method used", "name": "Specialized sensing and machine learning techniques"},
"node_2": {"l

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 11.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:43:48 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Disease diagnosed by ML", "name": "fibromyalgia"},
 "node_2": {"label": "Machine Learning method used", "name": "artificial intelligence"},
 "relationship": "Artificial intelligence was used to diagnose fibromyalgia and identify its link to gut bacteria."},

{"node_1": {"label": "Patient problem solved by ML", "name": "delays in getting the report"},
 "node_2": {"label": "Healthcare problem solved by ML", "name": "fibromyalgia diagnosis time"},
 "relationship": "The use of artificial intelligence in diagnosing fibromyalgia could significantly shorten the time it typically takes

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 19.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:44:25 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Cancer Type", "name": "Melanoma"},
"node_2": {"label": "Healthcare problem solved by ML", "name": "Early detection of melanoma"},
"relationship": "The text mentions that early detection of melanoma can lead to a 99% 5-year survival rate, implying that machine learning could be used to improve the detection rates."},

{"node_1": {"label": "Cancer Type", "name": "Melanoma"},
"node_2": {"label": "Patient problem solved by ML", "name": "Delays in getting the report"},
"relationship": "The text mentions the development of a blood test for melanoma that could potentially reduce delay

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 14.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:44:56 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Cancer Type", "name": "Lung cancer"},
"node_2": {"label": "Healthcare problem solved by ML", "name": "Best, most targeted therapy for lung cancer patients"},
"relationship": "Lung cancer is a specific type of cancer that requires the best and most targeted therapy for patients. A new AI model has been developed to help healthcare professionals choose the best type of treatment for each individual with lung cancer."},
{"node_1": {"label": "Cancer Type", "name": "Lung cancer"},
"node_2": {"label": "Healthcare problem solved by ML", "name": "Determining the benefit of immunotherap

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 9.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:45:22 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Healthcare problem solved by ML", "name": "Diagnosis of chronic fatigue syndrome (ME/CFS)"},
 "node_2": {"label": "Technology/Tool which uses ML", "name": "Nanoelectronic assay using machine learning"},
 "relationship": "The nanoelectronic assay uses machine learning to accurately diagnose chronic fatigue syndrome (ME/CFS) by measuring the reaction of immune cells and blood plasma to stress."},

{"node_1": {"label": "Machine Learning method used", "name": "Nanoelectronic assay using machine learning"},
 "node_2": {"label": "Healthcare problem solved by ML", "name": "Diagnosis of

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 20.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:45:59 - INFO [39m
[34mLLM Response:
[
   {
       "node_1": {
           "label": "Healthcare problem solved by ML",
           "name": "Stroke"
       },
       "node_2": {
           "label": "Machine Learning method used",
           "name": "Medisafe"
       },
       "relationship": "Medisafe is a pill reminder app that uses machine learning to ensure that the user never misses a dose of their medication or mistakenly doubles up due to not tracking their medications correctly."
   },
   {
       "node_1": {
           "label": "Healthcare problem solved by ML",
           "name": "Stress"
       },
       "node_2": {
           "la

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 13.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:46:29 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Cancer", "name": "Cancer" },
"node_2": {"label": "Healthcare problem solved by ML", "name": "Mapping neural connections in squid brain" },
"relationship": "Modern technology, specifically MRI, was used to map the neural connections in the squid brain, which can be considered as a healthcare problem solved by machine learning."},

{"node_1": {"label": "Technology/Tool which uses ML", "name": "MRI" },
"node_2": {"label": "Healthcare problem solved by ML", "name": "Mapping neural connections in squid brain" },
"relationship": "MRI, a technology that uses machine learning, was empl

Using Model:  mixtral-8x7b-32768


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 3.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
[34m[39m
[34m▶︎ GRAPH MAKER VERBOSE - 2024-09-23 07:46:54 - INFO [39m
[34mLLM Response:
[
{"node_1": {"label": "Healthcare problem solved by ML", "name": "varicose veins"},
"node_2": {"label": "Machine Learning method used", "name": "genome-wide association study, machine learning, and statistical methods"},
"relationship": "The genetic study used machine learning and statistical methods, including a genome-wide association study, to examine the genetic data of individuals and identify risk factors for varicose veins."},

{"node_1": {"label": "Healthcare problem solved by ML", "name": "varicose veins"},
"node_2": {"label": "Patient problem solved by ML", "name": "determine the risk of dev

Total number of Edges 194
knowledge_graph_healthcare_ml.html


INFO:__main__:Legend appended to the HTML file.
