In [3]:
import pandas as pd
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
import string

# Load spaCy's English NLP model
nlp = spacy.load("en_core_web_sm")

# Load your dataset
df = pd.read_csv('dataset.csv')  # Replace with your dataset file path

# Preprocessing function
def preprocess_text(text):
    doc = nlp(text)
    tokens = [token.lemma_ for token in doc if token.text not in STOP_WORDS and token.text not in string.punctuation]
    return " ".join(tokens)

# Apply preprocessing to the dataset
df['processed_text'] = df['Title'].apply(preprocess_text)  # Assuming 'abstract' is a column in your dataset

In [5]:
def extract_medical_terms(text):
    doc = nlp(text)
    medical_terms = [(ent.text, ent.label_) for ent in doc.ents if ent.label_ in ['diabetes', 'hypertension', 'fever']]
    return medical_terms

# Example of extracting terms from processed text
df['medical_terms'] = df['processed_text'].apply(extract_medical_terms)

In [6]:
def generate_boolean_query(user_input):
    terms = user_input.split(',')
    boolean_query = " AND ".join([term.strip() for term in terms])
    return boolean_query

# Example usage
user_input = "diabetes, hypertension, fever"
boolean_query = generate_boolean_query(user_input)
print("Generated Boolean Query:", boolean_query)

Generated Boolean Query: diabetes AND hypertension AND fever


In [7]:
def refine_query(original_query):
    # Placeholder for query refinement logic (e.g., expansion or rewriting)
    refined_query = original_query.replace("AND", "OR")  # Example transformation
    return refined_query

refined_query = refine_query(boolean_query)
print("Refined Query:", refined_query)

Refined Query: diabetes OR hypertension OR fever
