In [5]:
pip install sentence_transformers



In [7]:
import numpy as np
from sentence_transformers import SentenceTransformer, util

# Load the pre-trained SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 1: Divide dialogue into utterances and sentences
def split_dialogue(dialogue):
    utterances = dialogue.split('\n')
    sentences = [sentence for utterance in utterances for sentence in utterance.split('. ')]
    return utterances, sentences

# Step 2: Encode the units
def encode_units(units):
    embeddings = model.encode(units, convert_to_tensor=True)
    return embeddings

# Step 3: Calculate similarity scores
def calculate_similarity_scores(query_embeddings, sentence_embeddings, threshold):
    similarity_scores = util.pytorch_cos_sim(query_embeddings, sentence_embeddings)
    max_scores = np.max(similarity_scores.numpy(), axis=0)
    return max_scores

# Step 4: Filter the units
def filter_units_by_threshold(units, scores, threshold):
    relevant_units = [unit for unit, score in zip(units, scores) if score >= threshold]
    return relevant_units

# Step 5: Strict matching method
def strict_matching(sentences, query_set):
    matched_sentences = [sentence for sentence in sentences if any(keyword in sentence for keyword in query_set)]
    return matched_sentences

# Example
if __name__ == "__main__":
    dialogue = """
    Doctor: Good morning. What brings you in today?
    Patient: I have been having a severe headache for the past three days.
    Doctor: Have you taken any medication for it?
    Patient: Yes, I took some ibuprofen but it didn't help much.
    Doctor: Are there any other symptoms besides the headache?
    Patient: I've also been feeling nauseous and dizzy.
    """

    query_set = ["headache", "medication", "symptoms", "nauseous", "dizzy"]
    print("Query set: ", query_set)

    # Predefined query set and dialogue
    utterances, sentences = split_dialogue(dialogue)

    # Encode queries and sentences
    query_embeddings = encode_units(query_set)
    sentence_embeddings = encode_units(sentences)

    # Calculate similarity scores
    threshold = 0.5
    scores = calculate_similarity_scores(query_embeddings, sentence_embeddings, threshold)

    # Filter sentences by threshold
    filtered_sentences = filter_units_by_threshold(sentences, scores, threshold)

    # Perform strict matching
    strict_matched_sentences = strict_matching(sentences, query_set)

    # Print results
    print("Filtered Sentences by Threshold:")
    for sentence in filtered_sentences:
        print(sentence)

    print("\nStrict Matched Sentences:")
    for sentence in strict_matched_sentences:
        print(sentence)


Query set:  ['headache', 'medication', 'symptoms', 'nauseous', 'dizzy']
Filtered Sentences by Threshold:
    Patient: I have been having a severe headache for the past three days.
    Doctor: Are there any other symptoms besides the headache?
    Patient: I've also been feeling nauseous and dizzy.

Strict Matched Sentences:
    Patient: I have been having a severe headache for the past three days.
    Doctor: Have you taken any medication for it?
    Doctor: Are there any other symptoms besides the headache?
    Patient: I've also been feeling nauseous and dizzy.
