In [1]:
from collections import defaultdict

class HindleRoothPPAttachment:
    def __init__(self):
        # Dictionary to store co-occurrence counts
        self.np_p_counts = defaultdict(int)
        self.vp_p_counts = defaultdict(int)
        self.p_counts = defaultdict(int)
    
    def train(self, corpus):
        """
        Train the model using a parsed corpus.
        
        :param corpus: A parsed corpus with tuples (NP, VP, P)
        """
        for (np, vp, p) in corpus:
            self.np_p_counts[(np, p)] += 1
            self.vp_p_counts[(vp, p)] += 1
            self.p_counts[p] += 1
    
    def calculate_probabilities(self, np, vp, p):
        """
        Calculate the probabilities P(NP, P) and P(VP, P).
        
        :param np: Noun phrase
        :param vp: Verb phrase
        :param p: Preposition
        :return: (P(NP, P), P(VP, P))
        """
        p_count = self.p_counts[p]
        
        # Avoid division by zero
        if p_count == 0:
            return 0, 0
        
        p_np_p = self.np_p_counts[(np, p)] / p_count
        p_vp_p = self.vp_p_counts[(vp, p)] / p_count
        
        return p_np_p, p_vp_p
    
    def decide_attachment(self, np, vp, p):
        """
        Decide whether the preposition attaches to the NP or the VP.
        
        :param np: Noun phrase
        :param vp: Verb phrase
        :param p: Preposition
        :return: 'NP' or 'VP' based on the attachment decision
        """
        p_np_p, p_vp_p = self.calculate_probabilities(np, vp, p)
        
        if p_np_p > p_vp_p:
            return 'NP'
        else:
            return 'VP'

# Example usage
if __name__ == "__main__":
    # Example corpus: List of tuples (NP, VP, P)
    corpus = [
        ("the man", "saw", "with"),
        ("the book", "is", "on"),
        ("the cat", "sat", "on"),
        ("the dog", "barked", "at"),
        # More parsed sentences from a corpus...
    ]
    
    hr_model = HindleRoothPPAttachment()
    hr_model.train(corpus)
    
    # Test the model with a new sentence
    np = "the man"
    vp = "saw"
    p = "with"
    
    attachment = hr_model.decide_attachment(np, vp, p)
    print(f"The preposition '{p}' attaches to the '{attachment}'")

The preposition 'with' attaches to the 'VP'


In [3]:
import math

# Step 1: Ask the user for occurrence counts
def get_user_input():
    verb = input("Enter the verb: ").strip()
    noun = input("Enter the noun: ").strip()
    prep = input("Enter the preposition: ").strip()
    
    verb_prep_count = int(input(f"Enter the occurrence of the preposition '{prep}' with the verb '{verb}': "))
    verb_total_count = int(input(f"Enter the total occurrences of the verb '{verb}': "))
    
    noun_prep_count = int(input(f"Enter the occurrence of the preposition '{prep}' with the noun '{noun}': "))
    noun_total_count = int(input(f"Enter the total occurrences of the noun '{noun}': "))

    return verb, noun, prep, verb_prep_count, verb_total_count, noun_prep_count, noun_total_count

# Step 2: Calculate Probabilities and λ(v, n, p)
def calculate_lambda(verb, noun, prep, verb_prep_count, verb_total_count, noun_prep_count, noun_total_count):
    # Calculate P(VA_p = 1 | v)
    P_VAp = verb_prep_count / verb_total_count
    
    # Calculate P(NA_p = 1 | n)
    P_NAp = noun_prep_count / noun_total_count
    
    # Calculate P(NA_p = 0 | n)
    P_NAp_0 = 1 - P_NAp

    # Handle case where probabilities might cause division by zero
    if P_NAp == 0:
        return None, "Error: Division by zero in log-ratio calculation due to insufficient data."
    
    # Calculate λ(v, n, p)
    lambda_value = math.log2((P_VAp * P_NAp_0) / P_NAp)
    
    return lambda_value, None

# Step 3: Determine the attachment
def determine_attachment(lambda_value):
    if lambda_value > 0:
        return "PP attaches with the Verb."
    else:
        return "PP attaches with the Noun."

# Step 4: Interactive User Input
def hindle_rooth_algorithm():
    verb, noun, prep, verb_prep_count, verb_total_count, noun_prep_count, noun_total_count = get_user_input()

    lambda_value, error_message = calculate_lambda(verb, noun, prep, verb_prep_count, verb_total_count, noun_prep_count, noun_total_count)
    
    if error_message:
        print(error_message)
    else:
        result = determine_attachment(lambda_value)
        print(f"λ({verb}, {noun}, {prep}) = {lambda_value}")
        print(result)

# Run the interactive algorithm
hindle_rooth_algorithm()

Enter the verb:  send
Enter the noun:  soldiers
Enter the preposition:  into
Enter the occurrence of the preposition 'into' with the verb 'send':  86
Enter the total occurrences of the verb 'send':  1742
Enter the occurrence of the preposition 'into' with the noun 'soldiers':  1
Enter the total occurrences of the noun 'soldiers':  1478


λ(send, soldiers, into) = 6.1881899568680225
PP attaches with the Verb.
