In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

# Ensure you have the necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# List of common discourse markers
discourse_markers = {
    'contrast': ['but', 'however', 'although', 'yet', 'nevertheless'],
    'cause': ['because', 'since', 'due to', 'as'],
    'result': ['therefore', 'thus', 'consequently', 'hence'],
    'addition': ['and', 'also', 'furthermore', 'moreover'],
    'condition': ['if', 'unless', 'provided that']
}

def categorize_discourse_markers(text):
    words = word_tokenize(text.lower())
    stop_words = set(stopwords.words('english'))

    markers_found = {category: [] for category in discourse_markers}

    for word in words:
        if word not in stop_words:
            for category, markers in discourse_markers.items():
                if word in markers:
                    markers_found[category].append(word)

    return markers_found

text = "We were late; therefore, we missed the opening. But we still enjoyed the show. Also, the food was great."
markers = categorize_discourse_markers(text)
print(markers)

{'contrast': [], 'cause': [], 'result': ['therefore'], 'addition': ['also'], 'condition': []}


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
import spacy

# Load spaCy's English language model
nlp = spacy.load('en_core_web_sm')

def hobbs_algorithm(doc):
    for token in doc:
        if token.pos_ == 'PRON':  # Step 2: Locate the Pronoun
            print(f"Pronoun: {token.text}")
            antecedent = find_antecedent(token)
            if antecedent:
                print(f"  Antecedent: {antecedent.text}")
            else:
                print("  No antecedent found")

def find_antecedent(pronoun):
    current_node = pronoun

    # Traverse up the tree to find the first NP or S node
    while current_node.dep_ not in ('ROOT', 'nsubj', 'dobj', 'pobj'):
        current_node = current_node.head

    # Traverse down through the left branches of the tree
    for child in current_node.lefts:
        if child.dep_ in ('nsubj', 'pobj', 'dobj') and child != pronoun:
            return child

    # Traverse up and look for NP or S in ancestors
    for ancestor in current_node.ancestors:
        for child in ancestor.lefts:
            if child.dep_ in ('nsubj', 'pobj', 'dobj') and child != pronoun:
                return child

    return None

text = "Samantha found her keys. She was relieved."

text=text.replace(".","").replace(",","")

doc = nlp(text)
hobbs_algorithm(doc)

Pronoun: her
  Antecedent: Samantha
Pronoun: She
  Antecedent: Samantha
