In [1]:
!pip install transformers rdflib spacy
!python -m spacy download en_core_web_sm

Collecting rdflib
  Downloading rdflib-7.0.0-py3-none-any.whl.metadata (11 kB)
Collecting isodate<0.7.0,>=0.6.0 (from rdflib)
  Downloading isodate-0.6.1-py2.py3-none-any.whl.metadata (9.6 kB)
Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m531.9/531.9 kB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: isodate, rdflib
Successfully installed isodate-0.6.1 rdflib-7.0.0
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You 

In [2]:
from transformers import pipeline
from rdflib import Graph, URIRef, Literal, Namespace
import spacy

In [3]:
''' We will use Huggingface’s pre-trained distilBERT model to extract relevant entities
(e.g.,"blood pressure", "cholesterol level", "stroke risk") from medical reports or articles.
 Load pre-trained Named Entity Recognition (NER) model '''
nlp_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
# Example medical report text related to stroke risk
medical_text = "John has a blood pressure of 160/100 mmHg and his cholesterol level is 250 mg/dL, which puts him at high risk for stroke."
# Extract entities from the medical report
entities = nlp_pipeline(medical_text)
print("Extracted Entities:")
for entity in entities:
    print(entity)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/998 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Extracted Entities:
{'entity': 'I-PER', 'score': 0.9982315, 'index': 1, 'word': 'John', 'start': 0, 'end': 4}


In [6]:
''' In this step, we map the extracted entities (e.g., "blood pressure", "cholesterol", "strokerisk")
to health-related ontologies such as SNOMED CT (Systematized Nomenclature of
Medicine) and LOINC (Logical Observation Identifiers Names and Codes) using rdflib. '''
# Initialize RDF graph and define namespaces
g = Graph()
snomed = Namespace("http://snomed.info/id/")
loinc = Namespace("http://loinc.org/")
ex = Namespace("http://example.org/")
# Example of mapping extracted entities to SNOMED CT and LOINC ontologies
john = URIRef(ex.John)
g.add((john, snomed['hasCondition'], Literal("High blood pressure")))
g.add((john, loinc['BloodPressure'], Literal("160/100 mmHg")))
g.add((john, loinc['CholesterolLevel'], Literal("250 mg/dL")))
g.add((john, snomed['atRiskFor'], Literal("Stroke")))
# Serialize the graph in Turtle format
print("Knowledge Graph in Turtle format:")
print(g.serialize(format='turtle'))

Knowledge Graph in Turtle format:
@prefix ns1: <http://snomed.info/id/> .
@prefix ns2: <http://loinc.org/> .

<http://example.org/John> ns2:BloodPressure "160/100 mmHg" ;
    ns2:CholesterolLevel "250 mg/dL" ;
    ns1:atRiskFor "Stroke" ;
    ns1:hasCondition "High blood pressure" .




In [8]:
''' Once the knowledge graph is created, use SPARQL to query it for insights, such as
identifying patients at high risk for stroke based on their blood pressure and cholesterol
levels. '''
# Define a SPARQL query to retrieve patients with high blood pressure and high cholesterol
query = """
SELECT ?person ?bloodPressure ?cholesterol WHERE {
?person <http://loinc.org/BloodPressure> ?bloodPressure .
?person <http://loinc.org/CholesterolLevel> ?cholesterol .
FILTER (?bloodPressure > "140/90 mmHg" && ?cholesterol > "200 mg/dL")
}
"""
# Execute the query and print results
print("Query Results:")
results = g.query(query)
for row in results:
    print(f"Person: {row.person}, Blood Pressure: {row.bloodPressure}, Cholesterol Level: {row.cholesterol}")

Query Results:
Person: http://example.org/John, Blood Pressure: 160/100 mmHg, Cholesterol Level: 250 mg/dL


In [9]:
''' In this example, we use health data (e.g., blood pressure, cholesterol levels) to predict
stroke risk. We’ll use a logistic regression model to make predictions. '''
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
# Example patient data (blood pressure and cholesterol levels)
patient_data = pd.DataFrame({
'BloodPressure': [140, 160, 130, 180], # Systolic blood pressure in mmHg
'Cholesterol': [220, 250, 180, 260] # Cholesterol levels in mg/dL
})
# Labels for stroke risk (1 = high risk, 0 = low risk) - dummy data
stroke_risk_labels = [0, 1, 0, 1]
# Train a logistic regression model to predict stroke risk
model = LogisticRegression()
model.fit(patient_data[['BloodPressure', 'Cholesterol']], stroke_risk_labels)
# Predict stroke risk for a new patient
new_patient_data = pd.DataFrame({'BloodPressure': [160], 'Cholesterol': [240]})
predicted_risk = model.predict(new_patient_data)[0]
print(f"Predicted Stroke Risk: {'High' if predicted_risk == 1 else 'Low'}")

Predicted Stroke Risk: High


In [12]:
'''Now, we use SWRL (Semantic Web Rule Language) to generate stroke prevention
recommendations. For example, recommend lifestyle changes like exercise or
medication if the patient’s blood pressure and cholesterol levels exceed safe limits.'''
<swrl:rule>
    <swrl:body>
        <swrl:atom swrl:predicate="hasCondition" swrl:arg1="?person" swrl:arg2="Highblood pressure"/>
        <swrl:atom swrl:predicate="CholesterolLevel" swrl:arg1="?person" swrl:arg2=">200mg/dL"/>
    </swrl:body>
    <swrl:head>
        <swrl:atom swrl:predicate="recommendLifestyleChange" swrl:arg1="?person"/>
   </swrl:head>
</swrl:rule>

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 11)

In [13]:
!pip install owlready2

Collecting owlready2
  Downloading owlready2-0.47.tar.gz (27.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.3/27.3 MB[0m [31m44.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: owlready2
  Building wheel for owlready2 (pyproject.toml) ... [?25l[?25hdone
  Created wheel for owlready2: filename=owlready2-0.47-cp310-cp310-linux_x86_64.whl size=24075216 sha256=10645c2e5dd704c3ab7d2ea768c20690885896275eb5a1797389f7c5ebd54066
  Stored in directory: /root/.cache/pip/wheels/27/3e/ba/4171c4b10bba9fe1774fbf8fcf794de889e636ce64ad83a533
Successfully built owlready2
Installing collected packages: owlready2
Successfully installed owlready2-0.47


In [19]:
from owlready2 import *

# Create a new ontology
onto = get_ontology("http://example.org/stroke_prevention.owl")

# Define some classes and properties
with onto:
    class Person(Thing): pass
    class hasCondition(ObjectProperty): pass
    class hasCholesterolLevel(DataProperty, FunctionalProperty): pass
    class recommendLifestyleChange(ObjectProperty): pass

    # Conditions
    class HighBloodPressure(Thing): pass
    class StrokeRisk(Thing): pass

# Creating individuals
john = onto.Person("John")
john.hasCondition.append(onto.HighBloodPressure())
john.hasCholesterolLevel = 250  # High cholesterol level

# Define a SWRL-like rule using Python
def stroke_prevention_rule(person):
    if onto.HighBloodPressure in person.hasCondition and person.hasCholesterolLevel > 200:
        print(f"Recommending lifestyle changes for {person.name} due to high blood pressure and cholesterol.")
    else:
        print(f"No lifestyle change recommendation needed for {person.name}.")

# Applying the rule to John
stroke_prevention_rule(john)

# Save the ontology to a file
onto.save(file="stroke_prevention.owl", format="rdfxml")


No lifestyle change recommendation needed for John.


In [15]:
''' Using the patient’s health data and the rules defined with SWRL, we can generate
personalized recommendations for stroke prevention. '''
# Set a recommendation based on blood pressure and cholesterol levels
blood_pressure = 160
cholesterol = 240
if blood_pressure > 140 and cholesterol > 200:
  recommendation = "Recommend lifestyle changes such as exercise and medication."
else:
  recommendation = "Continue with current lifestyle."
print(f"Stroke Prevention Recommendation: {recommendation}")

Stroke Prevention Recommendation: Recommend lifestyle changes such as exercise and medication.
