In [1]:
from openai import OpenAI

# dotenv
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
system_prompt = """Please take the below text and make it easier to read on a screen. But, the text itself must remain exactly the same with no changes - only the formatting (paragraph breaks, use of bullet points etc) should be added, if and only if appropriate to improve the reading experience. i.e. don’t go overboard in using them. And please don’t add any new subheadings. There must be no changes to the text itself – only the formatting."""

user_prompt = """Primary care context
People may develop many different skin lesions throughout their lifetimes, and commonly present to primary care to seek reassurance that they are not cancerous or infective. Most benign skin lesions, such as seborrheic keratoses or dermatofibromas are diagnosed based on clinical appearance (morphology, distribution) and history. The increased access to camera phones and technology-enabled communication platforms (such as Accurx) mean that photos can be shared between patients and clinicians and be used in discussions/referrals to secondary care colleagues. Speak to your practice team about how skin lesions are triaged. Do the community pharmacists have a role in assessment? Do any of the GPs at your practice use a dermatoscope? Whilst on placement, look at as many skin lesions as you can, and practice describing them to your tutor and colleagues. Think about the features that might be suggestive of skin cancer or an underlying malignancy or infection such as HIV and speak to your tutor about the different referral options. Some surgeries offer minor surgery such as excisions, punch biopsies and cryotherapy- ask about thresholds for treatment and the role of Exceptional Funding agreements. Consider how to manage expectations for patients who want removal/treatment of lesions such as skin tags or molluscum contagiosum.
Secondary care context
Skin cancer is the most common type of cancer in the UK and can present as a new or changing skin lesion i.e. an area that is different from surrounding skin such as a new lump, spot, ulcer or mole that the patient hasn't noticed before or a non-healing sore or mark in the skin ( including moles) that has changed size, shape, colour or texture. These lesions may sometimes be described as itchy, painful or bleeding.
Patients may present concerned about a skin lesion or the lesion maybe picked up incidentally by a healthcare professional during a general examination. Think about how you would approach a patient with a skin lesion; taking an accurate history , performing a thorough examination and applying clinical reasoning skills will help guide your management plan. Whilst most skin lesions are benign it is important to recognise an atypical or concerning lesion and refer promptly via a 2 week wait , the Non-pigmented, solitary lesion pathway or Tele-dermatology so that the patient can be assessed by a Dermatologist and the lesion examined or photographed with dermoscopy.
Benign skin tumours are very common, student doctors should be aware of the following benign lesions: seborrheic keratosis, dermatofibroma, neurofibroma, epidermoid and pilar cysts, keratoacanthoma, haemangioma, and viral wart. Can you describe the features of pre-malignant skin lesions such as actinic keratosis (solar keratosis) and intra-epithelial carcinoma (Bowen's disease)? Non-melanoma skin cancers are by far the commonest form of skin cancer, Basal cell carcinomas account for almost 75%, whilst SCCs are less common, they have the potential to metastasise and therefore must be referred on a cancer pathway. Consider how you tell the difference and discuss the treatment modalities for pre-malignant versus malignant skin cancers, are you familiar with surgical techniques such as skin biopsy and excision and can you explain to patients the risks and benefits of surgery? You should be able to describe different types of benign melanocytic naevi and recognise the features of malignant melanoma including the different sub-types. It is important to be aware of the differential diagnosis of benign and malignant pigmented lesions and the red flags which should prompt referral. You should be able to discuss the pathological features of a skin cancers, the MDT approach and management. Finally, are you able to demonstrate knowledge of the risk factors of skin cancers but also recognise that skin cancer can affect patients with all skin types as early recognition and referral improves patient outcomes."""


client = OpenAI()
completion = client.chat.completions.create(
    model="gpt-4o-mini",
    temperature=0.0,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
)

In [3]:
output = completion.choices[0].message.content

In [5]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

In [18]:
def tokenize(text: str) -> list[str]:
    """
    Tokenize text into words. This is a simple space-based tokenization.
    You might want to use a more sophisticated tokenizer based on your needs.
    """
    return text.split()

def calculate_bleu_score(original: str, reformatted: str, weights: tuple[float, ...] = (0.25, 0.25, 0.25, 0.25)) -> float:
    """
    Calculate BLEU score between original and reformatted text.
    
    Args:
        original (str): The original text
        reformatted (str): The reformatted text
        weights (tuple): Weights for different n-grams (default: equal weights for 1-4 grams)
    
    Returns:
        float: BLEU score between 0 and 1
    """
    # Tokenize both texts
    reference = tokenize(original)
    candidate = tokenize(reformatted)
    
    # Create list of references (BLEU expects a list of references)
    references = [reference]
    
    # Use NLTK's sentence_bleu with smoothing
    smoothing = SmoothingFunction().method1  # Using method1 smoothing
    
    try:
        score = sentence_bleu(
            references,
            candidate,
            weights=weights,
            smoothing_function=smoothing
        )
        return score
    except Exception as e:
        print(f"Error calculating BLEU score: {e}")
        return 0.0

def analyze_differences(original: str, reformatted: str, threshold: float = 0.950) -> dict:
    """
    Analyze the differences between original and reformatted text.
    
    Args:
        original (str): The original text
        reformatted (str): The reformatted text
        threshold (float): BLEU score threshold for acceptance
    
    Returns:
        dict: Analysis results including BLEU score and validation status
    """
    # Calculate BLEU scores with different n-gram weights
    unigram_score = calculate_bleu_score(original, reformatted, weights=(1.0, 0.0, 0.0, 0.0))
    bigram_score = calculate_bleu_score(original, reformatted, weights=(0.0, 1.0, 0.0, 0.0))
    trigram_score = calculate_bleu_score(original, reformatted, weights=(0.0, 0.0, 1.0, 0.0))
    fourgram_score = calculate_bleu_score(original, reformatted, weights=(0.0, 0.0, 0.0, 1.0))
    combined_score = calculate_bleu_score(original, reformatted)  # Using default weights
    
    # Compare token counts
    original_tokens = tokenize(original)
    reformatted_tokens = tokenize(reformatted)
    
    return {
        "bleu_scores": {
            "unigram": unigram_score,
            "bigram": bigram_score,
            "trigram": trigram_score,
            "fourgram": fourgram_score,
            "combined": combined_score
        },
        "token_counts": {
            "original": len(original_tokens),
            "reformatted": len(reformatted_tokens),
            "difference": len(reformatted_tokens) - len(original_tokens)
        },
        "passes_threshold": combined_score >= threshold,
        "needs_review": combined_score < threshold
    }

In [19]:
results = analyze_differences(user_prompt_strip, output_strip)

In [20]:
print("\nAnalysis Results:")
print(f"BLEU Scores:")
for gram_type, score in results["bleu_scores"].items():
    print(f"  {gram_type}: {score:.4f}")


Analysis Results:
BLEU Scores:
  unigram: 0.9951
  bigram: 0.9903
  trigram: 0.9854
  fourgram: 0.9805
  combined: 0.9878


In [21]:
print("\nToken Analysis:")
print(f"  Original count: {results['token_counts']['original']}")
print(f"  Reformatted count: {results['token_counts']['reformatted']}")
print(f"  Difference: {results['token_counts']['difference']}")

print(f"\nPasses threshold: {results['passes_threshold']}")
print(f"Needs manual review: {results['needs_review']}")


Token Analysis:
  Original count: 617
  Reformatted count: 616
  Difference: -1

Passes threshold: True
Needs manual review: False


In [9]:
def clean_text(text: str) -> str:
    # Convert to lowercase
    text = text.lower()
    
    # First, protect number-word hyphens by replacing them with a special marker
    import re
    text = re.sub(r'(\d)-([a-z])', r'\1§\2', text)
    
    # Remove special chars and newlines (keep letters, numbers, spaces)
    text = re.sub(r'[^a-z0-9\s§]', '', text)
    
    # Replace multiple spaces with single space
    text = re.sub(r'\s+', ' ', text)
    
    # Restore protected hyphens
    text = text.replace('§', '-')
    
    # Remove leading/trailing whitespace
    text = text.strip()
    
    return text

In [10]:
user_prompt_strip = clean_text(user_prompt)
output_strip = clean_text(output)

In [93]:
from rouge_score import rouge_scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

In [94]:
scores = scorer.score(user_prompt, output)

In [95]:
scores

{'rouge1': Score(precision=0.9968253968253968, recall=0.9952456418383518, fmeasure=0.9960348929421095),
 'rougeL': Score(precision=0.9968253968253968, recall=0.9952456418383518, fmeasure=0.9960348929421095)}

In [96]:
user_words = user_prompt_strip.split()
output_words = output_strip.split()

print(len(user_words))
print(len(output_words))

617
615


In [80]:
set(user_words) - set(output_words)

{'2', 'maybe', 'week'}

In [23]:
print(output_strip)

primary care context people may develop many different skin lesions throughout their lifetimes and commonly present to primary care to seek reassurance that they are not cancerous or infective most benign skin lesions such as seborrheic keratoses or dermatofibromas are diagnosed based on clinical appearance morphology distribution and history the increased access to camera phones and technologyenabled communication platforms such as accurx mean that photos can be shared between patients and clinicians and be used in discussionsreferrals to secondary care colleagues speak to your practice team about how skin lesions are triaged do the community pharmacists have a role in assessment do any of the gps at your practice use a dermatoscope whilst on placement look at as many skin lesions as you can and practice describing them to your tutor and colleagues think about the features that might be suggestive of skin cancer or an underlying malignancy or infection such as hiv and speak to your tu

In [15]:
# strip all newlines, extra space, punctuation, and make lower case.
user_prompt_strip = ' '.join(user_prompt.split())
user_prompt_strip = user_prompt_strip.replace("- ", "")

# strip all newlines, extra spaces and bullet points from output
output_strip = ' '.join(output.split())
output_strip = output_strip.replace("- ", "")

In [16]:
len(user_prompt_strip)

3991

In [17]:
len(output_strip)

3987

In [13]:
print(user_prompt_strip)

Primary care context People may develop many different skin lesions throughout their lifetimes, and commonly present to primary care to seek reassurance that they are not cancerous or infective. Most benign skin lesions, such as seborrheic keratoses or dermatofibromas are diagnosed based on clinical appearance (morphology, distribution) and history. The increased access to camera phones and technology-enabled communication platforms (such as Accurx) mean that photos can be shared between patients and clinicians and be used in discussions/referrals to secondary care colleagues. Speak to your practice team about how skin lesions are triaged. Do the community pharmacists have a role in assessment? Do any of the GPs at your practice use a dermatoscope? Whilst on placement, look at as many skin lesions as you can, and practice describing them to your tutor and colleagues. Think about the features that might be suggestive of skin cancer or an underlying malignancy or infection such as HIV an

In [14]:
print(output_strip)

Primary care context People may develop many different skin lesions throughout their lifetimes, and commonly present to primary care to seek reassurance that they are not cancerous or infective. Most benign skin lesions, such as seborrheic keratoses or dermatofibromas, are diagnosed based on clinical appearance (morphology, distribution) and history. The increased access to camera phones and technologyenabled communication platforms (such as Accurx) mean that photos can be shared between patients and clinicians and be used in discussions/referrals to secondary care colleagues.  Speak to your practice team about how skin lesions are triaged.  Do the community pharmacists have a role in assessment?  Do any of the GPs at your practice use a dermatoscope? Whilst on placement, look at as many skin lesions as you can, and practice describing them to your tutor and colleagues. Think about the features that might be suggestive of skin cancer or an underlying malignancy or infection such as HIV

In [81]:
from rouge_score import rouge_scorer
import html
from IPython.display import HTML
from typing import List, Tuple, Set

def find_ngram_positions(text: str, n: int) -> List[Tuple[str, int]]:
    """Find all n-grams and their starting positions in the text."""
    words = text.split()
    ngrams = []
    for i in range(len(words) - n + 1):
        ngram = " ".join(words[i:i + n])
        ngrams.append((ngram, i))
    return ngrams

def get_matching_ngrams(reference: str, candidate: str, n: int) -> Set[str]:
    """Find all matching n-grams between reference and candidate texts."""
    ref_ngrams = set(ngram for ngram, _ in find_ngram_positions(reference, n))
    cand_ngrams = set(ngram for ngram, _ in find_ngram_positions(candidate, n))
    return ref_ngrams.intersection(cand_ngrams)

def highlight_matches(text: str, matches: Set[str], n: int) -> str:
    """Highlight matching n-grams in the text using HTML spans."""
    words = text.split()
    highlighted = words.copy()
    
    # Find positions of matching n-grams
    ngram_positions = find_ngram_positions(text, n)
    matching_positions = []
    
    for ngram, pos in ngram_positions:
        if ngram in matches:
            matching_positions.extend(range(pos, pos + n))
    
    # Apply highlighting
    for i in range(len(words)):
        if i in matching_positions:
            highlighted[i] = f'<span style="background-color: yellow">{html.escape(words[i])}</span>'
        else:
            highlighted[i] = html.escape(words[i])
    
    return " ".join(highlighted)

def visualize_rouge_matches(reference: str, candidate: str, n: int = 1):
    """
    Visualize matching n-grams between reference and candidate texts.
    
    Args:
        reference: Reference text
        candidate: Candidate text to compare against reference
        n: Size of n-grams to compare (default: 1 for unigrams)
    
    Returns:
        IPython.display.HTML object with highlighted matching sequences
    """
    # Find matching n-grams
    matches = get_matching_ngrams(reference, candidate, n)
    
    # Highlight matches in both texts
    highlighted_ref = highlight_matches(reference, matches, n)
    highlighted_cand = highlight_matches(candidate, matches, n)
    
    # Calculate Rouge score
    scorer = rouge_scorer.RougeScorer([f'rouge{n}'], use_stemmer=True)
    scores = scorer.score(reference, candidate)
    rouge_score = scores[f'rouge{n}'].fmeasure
    
    # Create HTML output
    html_output = f"""
    <div style="font-family: monospace; white-space: pre-wrap;">
        <h3>Rouge-{n} Score: {rouge_score:.3f}</h3>
        <div style="margin: 10px 0;">
            <strong>Reference:</strong><br>
            {highlighted_ref}
        </div>
        <div style="margin: 10px 0;">
            <strong>Candidate:</strong><br>
            {highlighted_cand}
        </div>
    </div>
    """
    
    return HTML(html_output)

In [98]:
print(output)

Primary care context

People may develop many different skin lesions throughout their lifetimes, and commonly present to primary care to seek reassurance that they are not cancerous or infective. Most benign skin lesions, such as seborrheic keratoses or dermatofibromas, are diagnosed based on clinical appearance (morphology, distribution) and history. 

The increased access to camera phones and technology-enabled communication platforms (such as Accurx) mean that photos can be shared between patients and clinicians and be used in discussions/referrals to secondary care colleagues. 

- Speak to your practice team about how skin lesions are triaged. 
- Do the community pharmacists have a role in assessment? 
- Do any of the GPs at your practice use a dermatoscope? 

Whilst on placement, look at as many skin lesions as you can, and practice describing them to your tutor and colleagues. Think about the features that might be suggestive of skin cancer or an underlying malignancy or infectio

In [102]:
reference = "The quick brown fox jumps over the lazy dog"
candidate = "The brown fox jumps over the lazy dog"

# Visualize unigram matches
display(visualize_rouge_matches(user_prompt_strip, output_strip, n=1))


In [104]:
168*20/60

56.0

In [105]:
3373/4


843.25

In [111]:
168*(((843.25/1000000)*0.15) + ((843.25/1000000)*0.6))

0.1062495

In [108]:
(843.25/1000000)*0.6

0.00050595