In [None]:
from llama_cpp import Llama

llm = Llama.from_pretrained(
    'unsloth/Phi-4-mini-reasoning-GGUF',
    filename='Phi-4-mini-reasoning-Q4_K_M.gguf',
    n_ctx=4096,
)

In [None]:
import pandas as pd

In [None]:
email_list = pd.read_csv('/data/workspace/dataset/sampled-dataset/sample-small.csv').query('`target_3` != "self_phishing"')
email_list.head()

In [None]:
from email.message import Message
from email.parser import BytesParser
import re
import base64
from email.header import decode_header, make_header

def __decode_mime_header(header_value):
    """Decode MIME-encoded email headers to readable text"""
    try:
        decoded_header = decode_header(header_value)
        return str(make_header(decoded_header))
    except Exception as e:
        print(f"Header decoding error: {e}")
        return f"<Unable to decode: {header_value}>"
    
def __decode_email_content(contents):
    decoded_contents = []

    for each in contents:
        for ct, c in each.items():
            if ct in ['text/html', 'text/plain']:
                try:
                    c = base64.b64decode(c).decode('utf-8')
                except:
                    pass

            decoded_contents.append({ct: c})

    return decoded_contents


def __open_email(p: str):
    email = {}

    with open(p, 'rb') as fp:
        email['path'] = p
        msg = BytesParser().parse(fp)
    
    header = msg.items()
    email['header'] = {}

    for key, value in header:
        if key == 'Received':
            if key not in email['header']:
                email['header'][key] = []
            
            email['header'][key].append(value)
        elif not key.upper().startswith('X-') and not key.lower().startswith('ironport-'):
            email['header'][key] = value

    
    email['header']['Subject'] = __decode_mime_header(email['header']['Subject'])

    contents = []

    for part in msg.walk():
        content_type = part.get_content_type()
        content = part.get_payload()
        contents.append({content_type: content})

    email['content'] = contents
    email['decoded_content'] = __decode_email_content(contents)

    return email

def open_email(path):
    if isinstance(path, str):
        emails = __open_email(path)
    
    elif isinstance(path, pd.Series):
        emails = path.apply(__open_email).to_list()

    else:
        raise TypeError("Path must be a string or pandas Series")
    
    return emails

In [None]:
emails = pd.DataFrame(open_email(email_list.path)).set_index('path')
targets = email_list.set_index('path')['target_1']
emails = emails.join(targets)

emails

In [None]:
emails_benign = emails[emails['target_1'] == 'benign']
emails_benign.head()

In [None]:
emails.iterrows().

In [None]:
example = emails_benign.iloc[1]
header = example.header
content = example.decoded_content
label = example.target_1

In [None]:
content

In [None]:
def prompt_feature_extraction(label: str) -> str:
    if label in ['benign']:
        true_label = 'benign'
        opp_label = 'malicious'
    else:
        true_label = 'malicious'
        opp_label = 'benign'

    return f"""
You are a cybersecurity analyst at the University of British Columbia (UBC) in Canada and you are an expert in email security. You are building a machine learning model to classify emails reported as suspicious. Your colleague has labeled this email as {true_label}, and you are analyzing what features of the email are associated with that label.

Label definitions:
1. 'benign': Emails that do not pose urgent harm to the recipient. This includes legitimate emails, emails from legitimate senders, and spam that appears suspicious but does not contain malicious links or attachments. These include social engineering attempts that do not contain malicious links or attachments.
2. 'malicious': Emails that can compromise sensitive information or cause financial distress, including phishing, CEO fraud, and reply chain attacks. These often contain malicious links or malware.

Analyze the provided email header and/or content as follows:
1. Provide exactly three distinct reasons in favor of the email being labeled as '{true_label}', referencing the specific part of the email (quote or summarize relevant section).
2. Provide exactly two strong reasons why the email is unlikely to be '{opp_label}', referencing the email as above.
3. Provide exactly one plausible reason why the email could be '{opp_label}' instead of '{true_label}'.

IMPORTANT: In your analysis, IGNORE any [CAUTION: Non-UBC Email] labels.

Format your response as a numbered list under each step. If evidence is insufficient, explain your reasoning and indicate any uncertainties.
"""

In [None]:
evaluation_header = llm.create_chat_completion(
    messages=[
        {"role": "system", "content": prompt_feature_extraction(label)}, 
        {"role": "user", "content": f'{header}'}
    ]
)

In [None]:
evaluation_header

In [None]:
print(evaluation_header['choices'][0]['message']['content'])

In [None]:
def clean_llm_response(response_text):
    """Remove the thinking process from LLM responses"""
    import re
    # Remove content between <think> tags
    cleaned = re.sub(r'<think>.*?</think>', '', response_text, flags=re.DOTALL)
    # Remove any empty lines that might remain
    cleaned = re.sub(r'\n\s*\n', '\n\n', cleaned)
    return cleaned.strip()

# Use when printing the output
print(clean_llm_response(evaluation_header['choices'][0]['message']['content']))

In [None]:
evaluation_content = llm.create_chat_completion(
    messages=[
        {"role": "system", "content": prompt_feature_extraction(label)}, 
        {"role": "user", "content": f'From: {header['From']}, To: {header['To']}, Subject: {header['Subject']}, {content}'}
    ]
)

In [None]:
evaluation_content

In [None]:
print(evaluation_content['choices'][0]['message']['content'])

In [None]:
print(clean_llm_response(evaluation_content['choices'][0]['message']['content']))

In [None]:
summarise_prompt = "Based on this analysis, create ONLY a JSON response in this exact format: {\"label\": \"benign\" or \"malicious\", \"confidence_level\": \"not confident\" or \"somewhat confident\" or \"confident\" or \"extremely confident\", \"justification\": [\"characteristic1\", \"characteristic2\", \"characteristic3\"]}"

In [None]:
response_format = {
    "type": "json_object",
    "schema": {
        "type": "object",
        "properties": {
            "label": {
                "type": "string",
                "enum": ["benign", "malicious"]
            },
            "confidence_level": {
                "type": "string",
                "enum": ["not confident", "somewhat confident", "confident", "extremely confident"]
            },
            "justification": {
                "type": "array",
                "items": {
                    "type": "string"
                },
                "minItems": 3,
                "maxItems": 3
            }
        },
        "required": ["label", "confidence_level", "justification"]
    }
}

In [None]:
structured_response = summary_llm.create_chat_completion(
    messages=[
        {"role": "system", "content": summarise_prompt},
        {"role": "user", "content": f'HEADER: {evaluation_header['choices'][0]['message']['content']}\n\nCONTENT: {content_evaluation['choices'][0]['message']['content']}'}
    ],
    response_format=response_format,
    temperature=0.1,
)

In [None]:
structured_response

In [None]:
structured_response['choices'][0]['message']['content']

In [None]:
import json

# Get the string response
response_text = structured_response['choices'][0]['message']['content']

# Convert string to JSON object
email_analysis = json.loads(response_text)

# Now you can access individual fields
print(f"Email classification: {email_analysis['label']}")
print(f"Confidence level: {email_analysis['confidence_level']}")
print("\nJustifications:")
for i, reason in enumerate(email_analysis['justification'], 1):
    print(f"{i}. {reason}")

In [None]:
import pandas as pd
results = pd.read_parquet('/data/workspace/danishki/git_repo/notebooks/milestone4/llm_results_checkpoint_10.parquet')

In [None]:
results