In [2]:
#Hindle Rooth Algorithm

from collections import defaultdict, Counter
import re

class HindleRoothAttachment:
    def __init__(self, corpus):
        self.corpus = corpus
        self.prepositions = ['with', 'in', 'on', 'at', 'by', 'for'] # Example prepositions
        self.preposition_count = defaultdict(Counter)
        self.total_count = Counter()
        self._train()

    def _train(self):
        # Process the corpus to count occurrences of prepositions and noun phrases
        for sentence in self.corpus:
            words = sentence.lower().split()
            preps = [word for word in words if word in self.prepositions]
            nouns = [word for word in words if word not in self.prepositions]
            
            if not preps or not nouns:
                continue
            
            for prep in preps:
                for noun in nouns:
                    self.preposition_count[prep][noun] += 1
                    self.total_count[prep] += 1

    def predict_attachment(self, sentence):
        words = sentence.lower().split()
        preps = [word for word in words if word in self.prepositions]
        nouns = [word for word in words if word not in self.prepositions]

        if not preps or not nouns:
            return None

        # Calculate likelihood for each preposition-noun pair
        likelihoods = defaultdict(float)
        for prep in preps:
            for noun in nouns:
                count_pn = self.preposition_count[prep].get(noun, 0)
                count_p = self.total_count[prep]
                if count_p > 0:
                    likelihoods[(prep, noun)] = count_pn / count_p

        # Find the best attachment based on the highest likelihood
        best_attachment = max(likelihoods, key=likelihoods.get, default=None)
        return best_attachment

# Example usage
corpus = [
    "Moscow sent more than 10000 soldiers into Afghanistan."
]

hr = HindleRoothAttachment(corpus)

sentence = "sent soldiers into"
attachment = hr.predict_attachment(sentence)
print(f"Best attachment for the sentence '{sentence}': {attachment}")

        


Best attachment for the sentence 'sent soldiers into': None


In [3]:
# Define counts (for demonstration purposes)
count_pp_with_afghanistan = {
    "soldiers": 200,   # Count of "into Afghanistan" modifying "soldiers"
    "soldier's": 50    # Count of "into Afghanistan" modifying "soldier's"
}
total_count_pp = 250  # Total count of the PP "into Afghanistan"

# Define the counts for the two possible attachments
count_soldiers = count_pp_with_afghanistan["soldiers"]
count_soldier_possessive = count_pp_with_afghanistan["soldier's"]

# Calculate probabilities
def calculate_probability(count_attachment, total_count_pp):
    return count_attachment / total_count_pp

# Calculate the probabilities for each attachment
probability_soldiers = calculate_probability(count_soldiers, total_count_pp)
probability_soldier_possessive = calculate_probability(count_soldier_possessive, total_count_pp)

# Print results
print(f"Probability of 'into Afghanistan' modifying 'soldiers': {probability_soldiers:.3f}")
print(f"Probability of 'into Afghanistan' modifying 'soldier's': {probability_soldier_possessive:.3f}")

# Determine the most likely attachment
if probability_soldiers > probability_soldier_possessive:
    print("The most likely attachment is 'into Afghanistan' modifying 'soldiers'.")
else:
    print("The most likely attachment is 'into Afghanistan' modifying 'soldier's'.")


Probability of 'into Afghanistan' modifying 'soldiers': 0.800
Probability of 'into Afghanistan' modifying 'soldier's': 0.200
The most likely attachment is 'into Afghanistan' modifying 'soldiers'.


In [9]:
import math

# Step 1: Ask the user for occurrence counts
def get_user_input():
    verb = input("Enter the verb: ").strip()
    noun = input("Enter the noun: ").strip()
    prep = input("Enter the preposition: ").strip()
   
    verb_prep_count = int(input(f"Enter the occurrence of the preposition '{prep}' with the verb '{verb}': "))
    verb_total_count = int(input(f"Enter the total occurrences of the verb '{verb}': "))
   
    noun_prep_count = int(input(f"Enter the occurrence of the preposition '{prep}' with the noun '{noun}': "))
    noun_total_count = int(input(f"Enter the total occurrences of the noun '{noun}': "))

    return verb, noun, prep, verb_prep_count, verb_total_count, noun_prep_count, noun_total_count

# Step 2: Calculate Probabilities and λ(v, n, p)
def calculate_lambda(verb, noun, prep, verb_prep_count, verb_total_count, noun_prep_count, noun_total_count):
    # Calculate P(VA_p = 1 | v)
    P_VAp = verb_prep_count / verb_total_count if verb_total_count > 0 else 0
   
    # Calculate P(NA_p = 1 | n)
    P_NAp = noun_prep_count / noun_total_count if noun_total_count > 0 else 0
   
    # Calculate P(NA_p = 0 | n)
    P_NAp_0 = 1 - P_NAp

    # Handle case where probabilities might cause division by zero
    if P_NAp == 0:
        return None, "Error: P(NA(p) = 1 | n) is zero, which causes invalid calculation."
   
    # Ensure that we do not take log2 of zero or a negative number
    numerator = P_VAp * P_NAp_0
    if numerator <= 0:
        return None, "Error: The numerator in the Lambda calculation is zero or negative."

    lambda_value = math.log2(numerator / P_NAp)
   
    return lambda_value, None

# Step 3: Determine the attachment
def determine_attachment(lambda_value):
    if lambda_value > 0:
        return "PP attaches with the Verb."
    else:
        return "PP attaches with the Noun."

# Step 4: Interactive User Input
def hindle_rooth_algorithm():
    verb, noun, prep, verb_prep_count, verb_total_count, noun_prep_count, noun_total_count = get_user_input()

    lambda_value, error_message = calculate_lambda(verb, noun, prep, verb_prep_count, verb_total_count, noun_prep_count, noun_total_count)
   
    if error_message:
        print(error_message)
    else:
        result = determine_attachment(lambda_value)
        print(f"λ({verb}, {noun}, {prep}) = {lambda_value:.4f}")
        print(result)

# Run the interactive algorithm
hindle_rooth_algorithm()

Enter the verb:  end
Enter the noun:  venture
Enter the preposition:  with
Enter the occurrence of the preposition 'with' with the verb 'end':  607
Enter the total occurrences of the verb 'end':  5156
Enter the occurrence of the preposition 'with' with the noun 'venture':  155
Enter the total occurrences of the noun 'venture':  1442


λ(end, venture, with) = -0.0328
PP attaches with the Noun.


In [None]:
import math
from math import log2 as lg
noun=input("enter the noun from the text")
verb=input("enter the verb from the text ")
prepo=input("enter the preposition from the text")
noun_count=int(input("entr the noun count"))
verb_count=int(input("enter the verb count"))
np_count=int(input("enter the noun preposition count"))
vp_count=int(input("enter vp_count"))
prob_vp=vp_count/verb_count
prob_np=np_count/noun_count
prob_np_not=1-prob_np
lambdaa=lg((prob_vp*prob_np_not)/prob_np)
print(lambdaa)
if lambdaa>0:
    print("PP with Verb")
else:
    print("PP with Noun")

enter the noun from the text sai
enter the verb from the text  went
enter the preposition from the text in
entr the noun count 20
enter the verb count 10
enter the noun preposition count 14
