In [1]:
# #Version 1
# import requests

# def check_word_relation(word1, word2, language="en"):
#     """
#     Checks if there exists > 0 relations between two words using ConceptNet API.

#     Args:
#         word1 (str): The first word.
#         word2 (str): The second word.
#         language (str): The language code (default is 'en' for English).

#     Returns:
#         int: 1 if there is at least one relation, 0 otherwise.
#     """
#     # Format API URL
#     url = f"http://api.conceptnet.io/related/c/{language}/{word1}?filter=/c/{language}/{word2}"

#     try:
#         # Make the request
#         response = requests.get(url)
#         response.raise_for_status()  # Raise an exception for HTTP errors

#         # Parse JSON
#         data = response.json()

#         # Check if there are any related terms
#         if data.get("related", []):  # 'related' key contains relations
#             return 1  # Relation exists
#         else:
#             return 0  # No relation exists
#     except Exception as e:
#         print(f"Error querying ConceptNet: {e}")
#         return 0

# # Example usage:
# result = check_word_relation("surgery", "fruit")
# print(f"Relation exists: {result}")  # Output will be 1 if relation exists, 0 otherwise.

In [2]:
# #Version 2
# import requests

# def check_trustworthy_relation(word1, word2, language="en"):
#     """
#     Checks if two words have a trustworthy relationship in ConceptNet, focusing on phishing detection.

#     Args:
#         word1 (str): The first word.
#         word2 (str): The second word.
#         language (str): The language code (default is 'en' for English).

#     Returns:
#         int: 1 if a trustworthy relation exists, 0 otherwise.
#     """
#     # Normalize the input words
#     word1 = word1.lower().strip()
#     word2 = word2.lower().strip()

#     # Construct the API URL
#     url = f"http://api.conceptnet.io/query?start=/c/{language}/{word1}&end=/c/{language}/{word2}"

#     try:
#         # Query the API
#         response = requests.get(url)
#         response.raise_for_status()

#         # Parse the JSON response
#         data = response.json()
#         edges = data.get("edges", [])

#         # Define relations to consider as trustworthy
#         trusted_relations = {"Synonym", "IsA", "PartOf", "UsedFor"}

#         # Check for trustworthy relations
#         for edge in edges:
#             if edge.get("rel", {}).get("label") in trusted_relations:
#                 # Optionally check weight for stronger relations
#                 if edge.get("weight", 0) > 1.0:
#                     return 1  # Trustworthy relation found

#         return 0  # No trustworthy relation found
#     except Exception as e:
#         print(f"Error querying ConceptNet: {e}")
#         return 0

# # Example usage:
# result = check_trustworthy_relation("manager", "boss")
# print(f"Trustworthy relation exists: {result}")  # Should output 1 for meaningful trust relations

In [None]:
# def get_trust_score(sentence):
#     """
#     Main function to evaluate the trustworthiness of a sentence.

#     Args:
#         sentence (str): The input sentence to evaluate.

#     Returns:
#         float: Trust score (0 to 1), indicating the trustworthiness of the sentence.
#     """
#     try:
#         # Step 1: Process the sentence into word pairs
#         word_pairs = process_sentence(sentence)

#         # Step 2: Evaluate each pair using the ConceptNet-based method
#         if not word_pairs:
#             return 0.0  # No meaningful pairs, return lowest trust score

#         trustworthy_count = sum(
#             check_trustworthy_relation(word1, word2) for word1, word2 in word_pairs
#         )

#         # Step 3: Calculate the trust score
#         total_pairs = len(word_pairs)
#         trust_score = trustworthy_count / total_pairs if total_pairs > 0 else 0.0

#         return trust_score
#     except Exception as e:
#         print(f"Error during trust score evaluation: {e}")
#         return 0.0

# # Example usage:
# sentence = "Hi team, please find attached the minutes from our last meeting. Let me know if you have any questions."
# trust_score = get_trust_score(sentence)
# print(f"Trust Score: {trust_score:.2f}")


In [6]:
# def evaluate_sentence_trust(sentence):
#     """
#     Evaluates the trustworthiness of a sentence using ConceptNet relations.

#     Args:
#         sentence (str): The input sentence to evaluate.

#     Returns:
#         float: Trust score (ratio of trustworthy pairs to total pairs).
#     """
#     # Step 1: Process the sentence into word pairs
#     # word_pairs = process_sentence(sentence)

#     # Step 2: Check each pair for trustworthy relations
#     trustworthy_count = 0
#     for word1, word2 in word_pairs:
#         if check_trustworthy_relation(word1, word2) == 1:
#             trustworthy_count += 1

#     # Step 3: Calculate the trust score
#     total_pairs = len(word_pairs)
#     trust_score = trustworthy_count / total_pairs if total_pairs > 0 else 0

#     return trust_score

# # Example usage:
# trust_score = evaluate_sentence_trust(sentence)
# print(f"Trust Score: {trust_score:.2f}")  # Output is a normalized trust score (0 to 1)


KeyboardInterrupt: 

In [3]:
import spacy
from itertools import combinations

# Load SpaCy English model (ensure SpaCy is installed with `pip install spacy`)
nlp = spacy.load("en_core_web_sm")

def process_sentence(sentence):
    """
    Processes a sentence to extract meaningful word pairs for trustworthiness evaluation.

    Args:
        sentence (str): The input sentence to process.

    Returns:
        list: A list of tuples containing pairs of words for ConceptNet trustworthiness checks.
    """
    # Step 1: Tokenize and analyze sentence
    doc = nlp(sentence)

    # Step 2: Extract relevant keywords (nouns, verbs, or adjectives)
    keywords = [
        token.text.lower()
        for token in doc
        if token.pos_ in {"NOUN", "VERB", "ADJ"} and not token.is_stop
    ]

    # Step 3: Generate all meaningful word pairs (combinations)
    word_pairs = list(combinations(keywords, 2))

    return word_pairs

# Example usage:
sentence = "Congratulations! You have been selected to receive a $1000 gift card. Click here to claim your reward."
word_pairs = process_sentence(sentence)
print(f"Word pairs: {word_pairs}")


Word pairs: [('congratulations', 'selected'), ('congratulations', 'receive'), ('congratulations', 'gift'), ('congratulations', 'card'), ('congratulations', 'click'), ('congratulations', 'claim'), ('congratulations', 'reward'), ('selected', 'receive'), ('selected', 'gift'), ('selected', 'card'), ('selected', 'click'), ('selected', 'claim'), ('selected', 'reward'), ('receive', 'gift'), ('receive', 'card'), ('receive', 'click'), ('receive', 'claim'), ('receive', 'reward'), ('gift', 'card'), ('gift', 'click'), ('gift', 'claim'), ('gift', 'reward'), ('card', 'click'), ('card', 'claim'), ('card', 'reward'), ('click', 'claim'), ('click', 'reward'), ('claim', 'reward')]


In [5]:
# Version 3
import requests
def check_trustworthy_relation(word1, word2, language="en"):
    """
    Checks if two words have a trustworthy relationship in ConceptNet with prioritized trust criteria.

    Args:
        word1 (str): The first word.
        word2 (str): The second word.
        language (str): The language code (default is 'en').

    Returns:
        int: 1 if a trustworthy relation exists, 0 otherwise.
    """
    word1 = word1.lower().strip()
    word2 = word2.lower().strip()

    url = f"http://api.conceptnet.io/query?start=/c/{language}/{word1}&end=/c/{language}/{word2}"

    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        edges = data.get("edges", [])

        # Relation priorities
        high_priority = {"IsA", "PartOf", "UsedFor", "AtLocation",
                         "MotivatedByGoal", "HasProperty", "Synonym"}
        medium_priority = {"CapableOf", "Causes", "ReceivesAction", "MadeOf"}
        low_priority = {"RelatedTo", "SimilarTo"}

        # Thresholds
        high_threshold = 0.5
        medium_threshold = 0.7
        low_threshold = 0.9

        for edge in edges:
            rel = edge.get("rel", {}).get("label")
            weight = edge.get("weight", 0)

            if rel in high_priority and weight >= high_threshold:
                return 1
            elif rel in medium_priority and weight >= medium_threshold:
                return 1
            elif rel in low_priority and weight >= low_threshold:
                return 1

        return 0
    except Exception as e:
        print(f"Error querying ConceptNet: {e}")
        return 0



In [18]:
def extract_important_words(sentence):
    """
    Extracts important words (entities, nouns, verbs, etc.) from a sentence to minimize unnecessary ConceptNet calls.

    Args:
        sentence (str): The input sentence.

    Returns:
        list: A list of important words extracted from the sentence.
    """
    doc = nlp(sentence)
    important_words = []

    for token in doc:
        # Select meaningful words: Proper nouns, nouns, verbs, and adjectives
        if token.pos_ in {"PROPN", "NOUN", "VERB", "ADJ"} and not token.is_stop:
            important_words.append(token.text.lower())

    # Deduplicate and return
    print(f"Extracted Important Words: {important_words}")
    return list(set(important_words))

    # Write a method that uses Semantic Analysis to enrich sentence context and provide a trust score of if the message is phishing or not


In [19]:
from itertools import combinations

def create_pairs(words):
    """
    Creates word pairs from the extracted important words.

    Args:
        words (list): A list of important words.

    Returns:
        list: A list of word pairs.
    """
    pairs = list(combinations(words, 2))
    # Limit pair generation to small groups
    print(f"Generated Word Pairs: {pairs}")
    return pairs


In [15]:
print(check_trustworthy_relation("dog", "pet"))

1


In [11]:
# def get_trust_score(sentence):
#     """
#     Evaluates the trustworthiness of a sentence using prioritized word pairs.

#     Args:
#         sentence (str): The input sentence.

#     Returns:
#         float: Trust score (0 to 1), indicating the trustworthiness of the sentence.
#     """
#     try:
#         # Step 1: Extract important words
#         important_words = extract_important_words(sentence)

#         # Step 2: Generate meaningful pairs
#         word_pairs = create_pairs(important_words)

#         if not word_pairs:
#             return 0.0  # No meaningful pairs

#         # Step 3: Evaluate pairs with relaxed trust criteria
#         trustworthy_count = sum(
#             check_trustworthy_relation(word1, word2) for word1, word2 in word_pairs
#         )

#         # Step 4: Compute trust score
#         total_pairs = len(word_pairs)
#         trust_score = trustworthy_count / total_pairs if total_pairs > 0 else 0.0

#         return trust_score
#     except Exception as e:
#         print(f"Error during trust score evaluation: {e}")
#         return 0.0


In [12]:
import concurrent.futures

def threaded_check_relation(word_pairs):
    """
    Processes word pairs in parallel to check their relations in ConceptNet.

    Args:
        word_pairs (list): List of word pairs to check.

    Returns:
        list: A list of results (1 or 0) for each pair.
    """
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(
            lambda pair: check_trustworthy_relation(pair[0], pair[1]),
            word_pairs
        ))
    return results


def get_trust_score(sentence):
    """
    Evaluates the trustworthiness of a sentence using threaded word pair checks.

    Args:
        sentence (str): The input sentence.

    Returns:
        float: Trust score (0 to 1), indicating the trustworthiness of the sentence.
    """
    try:
        # Step 1: Extract important words
        important_words = extract_important_words(sentence)

        # Step 2: Generate meaningful pairs
        word_pairs = create_pairs(important_words)

        if not word_pairs:
            return 0.0  # No meaningful pairs

        # Step 3: Evaluate pairs concurrently
        results = threaded_check_relation(word_pairs)

        # Step 4: Compute trust score
        trustworthy_count = sum(results)
        total_pairs = len(word_pairs)
        trust_score = trustworthy_count / total_pairs if total_pairs > 0 else 0.0

        return trustworthy_count
    except Exception as e:
        print(f"Error during trust score evaluation: {e}")
        return 0.0


In [17]:
# Example usage
sentence = "Nah I don't think he goes to usf, he lives around here though"
trust_score = get_trust_score(sentence)
print(f"Trust Score: {trust_score:.2f}")

Extracted Important Words: ['think', 'goes', 'usf', 'lives']
Generated Word Pairs: [('think', 'goes'), ('think', 'lives'), ('think', 'usf'), ('goes', 'lives'), ('goes', 'usf'), ('lives', 'usf')]
Trust Score: 0.00


In [44]:
# TESTING!!!!

import requests
import spacy
from collections import Counter

# Load the spaCy language model
nlp = spacy.load("en_core_web_sm")

def query_conceptnet(term):
    """
    Query ConceptNet API to get semantic relationships for a given term.
    """
    url = f"http://api.conceptnet.io/c/en/{term}?offset=0&limit=50"
    response = requests.get(url)
    if response.status_code == 200:
        edges = response.json().get('edges', [])
        relationships = []
        for edge in edges:
            if(edge['rel']['label'] != 'Synonym'):
                relationships.append((edge['rel']['label'], edge['end']['label']))

        # relationships = [(edge['rel']['label'], edge['end']['label']) for edge in edges]
        print(f"Relationships for '{term}': {relationships}")
        return relationships
    else:
        return []

# def query_relatedto_conceptnet(term):
#     """
#     Query ConceptNet API to get 'RelatedTo' relationships for a given term.
#     """
#     url = f"http://api.conceptnet.io/c/en/{term}?offset=0&limit=50"
#     response = requests.get(url)
#     if response.status_code == 200:
#         edges = response.json().get('edges', [])
#         # Filter for 'RelatedTo' relationships only
#         relatedto_relationships = [edge['end']['label'] for edge in edges if edge['rel']['label'] == 'RelatedTo']
#         return relatedto_relationships
#     else:
#         return []

def compute_trust_score(message):
    """
    Use Semantic Analysis with ConceptNet to enrich sentence context and compute a trust score.
    A higher score means the message is less likely to be phishing.
    """
    phishing_keywords = {
        "urgent": ["important", "immediate", "now"],
        "prize": ["lottery", "reward", "scam"],
        "account": ["suspended", "compromised", "deactivated"],
        "password": ["reset", "recover", "breach"],
    }

    # Preprocess the message
    doc = nlp(message)

    # Extract key terms (nouns, verbs, and named entities)
    key_terms = [token.text.lower() for token in doc if token.pos_ in {"PROPN","NOUN", "VERB", "ADJ"}]
    key_entities = [ent.text.lower() for ent in doc.ents]
    terms_to_check = list(set(key_terms + key_entities))

    # Query ConceptNet for relationships
    term_relationships = {}
    for term in terms_to_check:
        term_relationships[term] = query_conceptnet(term)

    # Analyze semantic context for phishing indicators
    phishing_count = 0
    total_terms = len(terms_to_check)

    for term, relationships in term_relationships.items():
        for relation, related_term in relationships:
            if term in phishing_keywords:
                # Check if related terms match phishing-related keywords
                if any(phish_term in related_term for phish_term in phishing_keywords[term]):
                    phishing_count += 1

    # Calculate trust score (normalized to 0-1 range)
    trust_score = 1 - (phishing_count / max(total_terms, 1))

    # Enrich context for explanation
    enriched_context = Counter([related for rel, related in sum(term_relationships.values(), [])])

    return trust_score, enriched_context

# Example Usage
message = "password"
trust_score, enriched_context = compute_trust_score(message)

print(f"Trust Score: {trust_score:.2f}")
print("Enriched Context:")
for term, count in enriched_context.most_common(10):
    print(f"- {term}: {count}")


Relationships for 'password': [('IsA', 'positive identification'), ('RelatedTo', 'password')]
Trust Score: 1.00
Enriched Context:
- positive identification: 1
- password: 1


In [45]:
import requests
import spacy

# Load the spaCy language model
nlp = spacy.load("en_core_web_sm")

def query_relatedto_conceptnet(term):
    """
    Query ConceptNet API to get 'RelatedTo' relationships for a given term.
    """
    url = f"http://api.conceptnet.io/c/en/{term}?offset=0&limit=50"
    response = requests.get(url)
    if response.status_code == 200:
        edges = response.json().get('edges', [])
        # Filter for 'RelatedTo' relationships only
        relatedto_relationships = [edge['end']['label'] for edge in edges if edge['rel']['label'] == 'RelatedTo']
        return relatedto_relationships
    else:
        return []

def compute_trust_score_relatedto(message):
    """
    Use 'RelatedTo' relationships from ConceptNet to compute a trust score for a message.
    A higher score means the message is less likely to be phishing.
    """
    phishing_keywords = {
        "urgent": ["important", "immediate", "now"],
        "prize": ["lottery", "reward", "scam"],
        "account": ["suspended", "compromised", "deactivated", 'bank', 'money', 'checking', 'savings', 'account'],
        "password": ["reset", "recover", "breach"],
    }

    # Preprocess the message
    doc = nlp(message)

    # Extract key terms (nouns, verbs, and named entities)
    key_terms = [token.text.lower() for token in doc if token.pos_ in {"NOUN", "VERB"}]
    key_entities = [ent.text.lower() for ent in doc.ents]
    terms_to_check = list(set(key_terms + key_entities))

    # Query ConceptNet for 'RelatedTo' relationships
    term_relationships = {}
    for term in terms_to_check:
        term_relationships[term] = query_relatedto_conceptnet(term)

    # Analyze semantic context for phishing indicators
    phishing_count = 0
    total_terms = len(terms_to_check)

    for term, related_terms in term_relationships.items():
        if term in phishing_keywords:
            # Check if related terms match phishing-related keywords
            if any(phish_term in related_terms for phish_term in phishing_keywords[term]):
                phishing_count += 1

    # Calculate trust score (normalized to 0-1 range)
    trust_score = 1 - (phishing_count / max(total_terms, 1))

    # Enrich context for explanation
    enriched_context = {term: related_terms for term, related_terms in term_relationships.items()}

    return trust_score, enriched_context

# Example Usage
message = "Your account has been suspended. Click here to reset your password immediately."
trust_score, enriched_context = compute_trust_score_relatedto(message)

print(f"Trust Score: {trust_score:.2f}")
print("Enriched Context:")
for term, related_terms in enriched_context.items():
    print(f"- {term}: {related_terms}")


Trust Score: 0.80
Enriched Context:
- suspended: ['suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'interrupted', 'suspended', 'suspended', 'suspended', 'suspend', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended', 'suspended']
- account: ['bank', 'money', 'checking', 'savings', 'account']
- reset: ['reset', 'computer', 'reset', 'harbour', 'reset', 'reset', 'reset', 'reset']
- click: ['click']
- password: ['password']
