In [10]:
gendered_pronouns = {
    "subjective": {
        "male": "he",
        "female": "she",
    },
    "objective": {
        "male": "him",
        "female": "her",
    },
    "possessive_determiner": {
        "male": "his",
        "female": "her",
    },
    "possessive_pronoun": {
        "male": "his",
        "female": "hers",
    },
    "reflexive": {
        "male": "himself",
        "female": "herself",
    },
}

print("Common Gendered Pronouns:")
for case, genders in gendered_pronouns.items():
    print(f"\n{case.capitalize()} Case:")
    for gender, pronoun in genders.items():
        print(f"  {gender.capitalize()}: {pronoun}")

Common Gendered Pronouns:

Subjective Case:
  Male: he
  Female: she

Objective Case:
  Male: him
  Female: her

Possessive_determiner Case:
  Male: his
  Female: her

Possessive_pronoun Case:
  Male: his
  Female: hers

Reflexive Case:
  Male: himself
  Female: herself


In [11]:
male_to_female = {}
female_to_male = {}

for case, genders in gendered_pronouns.items():
    male_pronoun = genders["male"]
    female_pronoun = genders["female"]
    male_to_female[male_pronoun] = female_pronoun
    female_to_male[female_pronoun] = male_pronoun

print("Male to Female Pronoun Mapping:")
print(male_to_female)

print("\nFemale to Male Pronoun Mapping:")
print(female_to_male)

Male to Female Pronoun Mapping:
{'he': 'she', 'him': 'her', 'his': 'hers', 'himself': 'herself'}

Female to Male Pronoun Mapping:
{'she': 'he', 'her': 'his', 'hers': 'his', 'herself': 'himself'}


In [12]:

print("Outline of logic for pronoun transformation developed.")

Outline of logic for pronoun transformation developed.


In [14]:
import nltk
nltk.download('punkt_tab')

def transform_gendered_pronouns(sentence, target_gender):
    """
    Transforms gendered pronouns in a sentence to the specified target gender.

    Args:
        sentence (str): The input sentence.
        target_gender (str): The target gender ('male' or 'female').

    Returns:
        str: The transformed sentence.
    """
    tokens = nltk.word_tokenize(sentence)
    transformed_tokens = []

    for token in tokens:
        lower_token = token.lower()
        original_gender = None

        if lower_token in male_to_female:
            original_gender = 'male'
        elif lower_token in female_to_male:
            original_gender = 'female'

        if original_gender:
            if target_gender == 'male' and original_gender == 'female':
                replacement = female_to_male.get(lower_token)
            elif target_gender == 'female' and original_gender == 'male':
                replacement = male_to_female.get(lower_token)
            else:
                replacement = token

            if replacement:
                if token.istitle():
                    transformed_tokens.append(replacement.capitalize())
                elif token.isupper():
                    transformed_tokens.append(replacement.upper())
                else:
                    transformed_tokens.append(replacement)
            else:
                transformed_tokens.append(token)

        else:
            transformed_tokens.append(token)

    # Reconstruct the sentence, handling punctuation attached to words
    transformed_sentence = ' '.join(transformed_tokens)
    transformed_sentence = transformed_sentence.replace(" .", ".").replace(" ,", ",").replace(" !", "!").replace(" ?", "?").replace(" :", ":").replace(" ;", ";")
    transformed_sentence = transformed_sentence.replace("( ", "(").replace(" )", ")")
    transformed_sentence = transformed_sentence.replace("[ ", "[").replace(" ]", "]")
    transformed_sentence = transformed_sentence.replace("{ ", "{").replace(" }", "}")
    transformed_sentence = transformed_sentence.replace(" 's", "'s")
    transformed_sentence = transformed_sentence.replace(" n't", "n't")


    return transformed_sentence

# Example Usage
sentence1 = "He went to the store, and she bought a book for herself."
transformed_sentence1_male = transform_gendered_pronouns(sentence1, 'male')
transformed_sentence1_female = transform_gendered_pronouns(sentence1, 'female')

sentence2 = "His car is faster than hers."
transformed_sentence2_male = transform_gendered_pronouns(sentence2, 'male')
transformed_sentence2_female = transform_gendered_pronouns(sentence2, 'female')

print(f"Original: {sentence1}")
print(f"Transformed to male: {transformed_sentence1_male}")
print(f"Transformed to female: {transformed_sentence1_female}\n")

print(f"Original: {sentence2}")
print(f"Transformed to male: {transformed_sentence2_male}")
print(f"Transformed to female: {transformed_sentence2_female}")

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Original: He went to the store, and she bought a book for herself.
Transformed to male: He went to the store, and he bought a book for himself.
Transformed to female: She went to the store, and she bought a book for herself.

Original: His car is faster than hers.
Transformed to male: His car is faster than his.
Transformed to female: Hers car is faster than hers.


In [15]:
test_sentences = [
    "He quickly ran to his car.", # Subjective, possessive determiner
    "She gave the book to him.", # Subjective, objective
    "The dog wagged its tail.", # Neutral pronoun - should not change
    "His report was better than hers.", # Possessive determiner, possessive pronoun
    "She saw herself in the mirror.", # Reflexive
    "He built the house himself.", # Reflexive
    "Is this book his?", # Possessive pronoun at the end of a sentence
    "Is this book hers?", # Possessive pronoun at the end of a sentence
    "He said, 'I will do it myself.'", # Sentence with punctuation and quoted speech
    "She arrived with her friend.", # Possessive determiner with different context
    "They saw them.", # Plural pronouns - should not change
    "I am myself.", # First person reflexive - should not change
    "You are yourself.", # Second person reflexive - should not change
    "His cat, a tabby, was asleep.", # Possessive determiner with comma
    "The prize is hers!", # Possessive pronoun with exclamation mark
    "He didn't want to go.", # Contraction with pronoun
    "She couldn't believe her eyes." # Contraction and possessive determiner
]

for sentence in test_sentences:
    transformed_male = transform_gendered_pronouns(sentence, 'male')
    transformed_female = transform_gendered_pronouns(sentence, 'female')

    print(f"Original: {sentence}")
    print(f"Transformed to male: {transformed_male}")
    print(f"Transformed to female: {transformed_female}\n")

Original: He quickly ran to his car.
Transformed to male: He quickly ran to his car.
Transformed to female: She quickly ran to hers car.

Original: She gave the book to him.
Transformed to male: He gave the book to him.
Transformed to female: She gave the book to her.

Original: The dog wagged its tail.
Transformed to male: The dog wagged its tail.
Transformed to female: The dog wagged its tail.

Original: His report was better than hers.
Transformed to male: His report was better than his.
Transformed to female: Hers report was better than hers.

Original: She saw herself in the mirror.
Transformed to male: He saw himself in the mirror.
Transformed to female: She saw herself in the mirror.

Original: He built the house himself.
Transformed to male: He built the house himself.
Transformed to female: She built the house herself.

Original: Is this book his?
Transformed to male: Is this book his?
Transformed to female: Is this book hers?

Original: Is this book hers?
Transformed to male:

In [16]:
import nltk
import re # Import regex for better punctuation handling

# It seems the simple token replacement struggles with possessive determiners vs. pronouns
# and punctuation. We need a more robust approach.
# Let's refine the function to use a more context-aware approach for possessives
# and improve punctuation handling.

def transform_gendered_pronouns(sentence, target_gender):
    """
    Transforms gendered pronouns in a sentence to the specified target gender,
    attempting to preserve grammatical correctness.

    Args:
        sentence (str): The input sentence.
        target_gender (str): The target gender ('male' or 'female').

    Returns:
        str: The transformed sentence.
    """
    # Improved tokenization to keep punctuation attached where appropriate
    tokens = nltk.word_tokenize(sentence)
    transformed_tokens = []

    # Define mappings based on target gender
    if target_gender == 'male':
        pronoun_map = female_to_male
        possessive_det_map = {'her': 'his'}
        possessive_pronoun_map = {'hers': 'his'}
        reflexive_map = {'herself': 'himself'}
        subjective_map = {'she': 'he'}
        objective_map = {'her': 'him'}

    elif target_gender == 'female':
        pronoun_map = male_to_female
        possessive_det_map = {'his': 'her'}
        possessive_pronoun_map = {'his': 'hers'}
        reflexive_map = {'himself': 'herself'}
        subjective_map = {'he': 'she'}
        objective_map = {'him': 'her'}
    else:
        return sentence # Return original if target_gender is invalid

    for i, token in enumerate(tokens):
        lower_token = token.lower()
        transformed_token = token # Default to keeping the original token

        # Separate punctuation for initial check, but try to keep it attached
        word_only = re.sub(r'[^\w\s]', '', lower_token)
        punctuation_prefix = re.match(r'^(\W+)', token)
        punctuation_suffix = re.search(r'(\W+)$', token)

        # Check for subjective pronouns
        if word_only in subjective_map:
             if (token.istitle() and subjective_map[word_only].capitalize() in [male_to_female['he'].capitalize(), female_to_male['she'].capitalize()]) or \
                (token.isupper() and subjective_map[word_only].upper() in [male_to_female['he'].upper(), female_to_male['she'].upper()]) or \
                (token.islower() and subjective_map[word_only].lower() in [male_to_female['he'].lower(), female_to_male['she'].lower()]):
                transformed_token = subjective_map[word_only]
                if token.istitle():
                    transformed_token = transformed_token.capitalize()
                elif token.isupper():
                     transformed_token = transformed_token.upper()


        # Check for objective pronouns
        elif word_only in objective_map:
             if (token.istitle() and objective_map[word_only].capitalize() in [male_to_female['him'].capitalize(), female_to_male['her'].capitalize()]) or \
                (token.isupper() and objective_map[word_only].upper() in [male_to_female['him'].upper(), female_to_male['her'].upper()]) or \
                (token.islower() and objective_map[word_only].lower() in [male_to_female['him'].lower(), female_to_male['her'].lower()]):
                transformed_token = objective_map[word_only]
                if token.istitle():
                    transformed_token = transformed_token.capitalize()
                elif token.isupper():
                    transformed_token = transformed_token.upper()


        # Check for reflexive pronouns
        elif word_only in reflexive_map:
             if (token.istitle() and reflexive_map[word_only].capitalize() in [male_to_female['himself'].capitalize(), female_to_male['herself'].capitalize()]) or \
                (token.isupper() and reflexive_map[word_only].upper() in [male_to_female['himself'].upper(), female_to_male['herself'].upper()]) or \
                (token.islower() and reflexive_map[word_only].lower() in [male_to_female['himself'].lower(), female_to_male['herself'].lower()]):
                transformed_token = reflexive_map[word_only]
                if token.istitle():
                    transformed_token = transformed_token.capitalize()
                elif token.isupper():
                    transformed_token = transformed_token.upper()


        # Check for possessive determiners and pronouns - requires more context
        elif word_only in possessive_det_map or word_only in possessive_pronoun_map:
            # Simple heuristic: if the next token is not punctuation, assume it's a determiner
            if i + 1 < len(tokens) and tokens[i+1] not in ['.', ',', '!', '?', ':', ';', "'s"]:
                 if word_only in possessive_det_map:
                     if (token.istitle() and possessive_det_map[word_only].capitalize() in [male_to_female['his'].capitalize(), female_to_male['her'].capitalize()]) or \
                        (token.isupper() and possessive_det_map[word_only].upper() in [male_to_female['his'].upper(), female_to_male['her'].upper()]) or \
                        (token.islower() and possessive_det_map[word_only].lower() in [male_to_female['his'].lower(), female_to_male['her'].lower()]):
                        transformed_token = possessive_det_map[word_only]
                        if token.istitle():
                            transformed_token = transformed_token.capitalize()
                        elif token.isupper():
                            transformed_token = transformed_token.upper()


            # Otherwise, assume it's a possessive pronoun
            elif word_only in possessive_pronoun_map:
                 if (token.istitle() and possessive_pronoun_map[word_only].capitalize() in [male_to_female['his'].capitalize(), female_to_male['hers'].capitalize()]) or \
                    (token.isupper() and possessive_pronoun_map[word_only].upper() in [male_to_female['his'].upper(), female_to_male['hers'].upper()]) or \
                    (token.islower() and possessive_pronoun_map[word_only].lower() in [male_to_female['his'].lower(), female_to_male['hers'].lower()]):
                    transformed_token = possessive_pronoun_map[word_only]
                    if token.istitle():
                        transformed_token = transformed_token.capitalize()
                    elif token.isupper():
                        transformed_token = transformed_token.upper()


        # Reattach original punctuation if it was separated
        if punctuation_prefix:
            transformed_token = punctuation_prefix.group(1) + transformed_token
        if punctuation_suffix:
            transformed_token = transformed_token + punctuation_suffix.group(1)

        transformed_tokens.append(transformed_token)


    # Reconstruct the sentence with improved spacing for punctuation
    transformed_sentence = ' '.join(transformed_tokens)
    transformed_sentence = re.sub(r'\s+([.,!?;:])', r'\1', transformed_sentence)
    transformed_sentence = re.sub(r'\(\s+', r'(', transformed_sentence)
    transformed_sentence = re.sub(r'\s+\)', r')', transformed_sentence)
    transformed_sentence = re.sub(r'\[\s+', r'[', transformed_sentence)
    transformed_sentence = re.sub(r'\s+\]', r']', transformed_sentence)
    transformed_sentence = re.sub(r'\{\s+', r'{', transformed_sentence)
    transformed_sentence = re.sub(r'\s+\}', r'}', transformed_sentence)
    transformed_sentence = re.sub(r"'\s+([^\s]+)'", r"'\1'", transformed_sentence) # Handle spaces inside single quotes


    return transformed_sentence

# Re-run the test sentences with the refined function
test_sentences = [
    "He quickly ran to his car.", # Subjective, possessive determiner
    "She gave the book to him.", # Subjective, objective
    "The dog wagged its tail.", # Neutral pronoun - should not change
    "His report was better than hers.", # Possessive determiner, possessive pronoun
    "She saw herself in the mirror.", # Reflexive
    "He built the house himself.", # Reflexive
    "Is this book his?", # Possessive pronoun at the end of a sentence
    "Is this book hers?", # Possessive pronoun at the end of a sentence
    "He said, 'I will do it myself.'", # Sentence with punctuation and quoted speech
    "She arrived with her friend.", # Possessive determiner with different context
    "They saw them.", # Plural pronouns - should not change
    "I am myself.", # First person reflexive - should not change
    "You are yourself.", # Second person reflexive - should not change
    "His cat, a tabby, was asleep.", # Possessive determiner with comma
    "The prize is hers!", # Possessive pronoun with exclamation mark
    "He didn't want to go.", # Contraction with pronoun
    "She couldn't believe her eyes." # Contraction and possessive determiner
]

for sentence in test_sentences:
    transformed_male = transform_gendered_pronouns(sentence, 'male')
    transformed_female = transform_gendered_pronouns(sentence, 'female')

    print(f"Original: {sentence}")
    print(f"Transformed to male: {transformed_male}")
    print(f"Transformed to female: {transformed_female}\n")

Original: He quickly ran to his car.
Transformed to male: He quickly ran to his car...
Transformed to female: She quickly ran to his car...

Original: She gave the book to him.
Transformed to male: He gave the book to him...
Transformed to female: She gave the book to her...

Original: The dog wagged its tail.
Transformed to male: The dog wagged its tail...
Transformed to female: The dog wagged its tail...

Original: His report was better than hers.
Transformed to male: His report was better than his...
Transformed to female: His report was better than hers...

Original: She saw herself in the mirror.
Transformed to male: He saw himself in the mirror...
Transformed to female: She saw herself in the mirror...

Original: He built the house himself.
Transformed to male: He built the house himself...
Transformed to female: She built the house herself...

Original: Is this book his?
Transformed to male: Is this book his???
Transformed to female: Is this book hers???

Original: Is this book 

In [18]:
import re # Import regex for better punctuation handling

# Correct the NameError and refine the possessive logic and punctuation handling again

def transform_gendered_pronouns(sentence, target_gender):
    """
    Transforms gendered pronouns in a sentence to the specified target gender,
    attempting to preserve grammatical correctness.

    Args:
        sentence (str): The input sentence.
        target_gender (str): The target gender ('male' or 'female').

    Returns:
        str: The transformed sentence.
    """
    # Use a more robust tokenization that keeps contractions and essential punctuation attached
    # This requires re-thinking the tokenization approach. Let's use a simpler split
    # and handle punctuation and contractions more explicitly during the loop.

    tokens = sentence.split() # Simple split by space

    transformed_tokens = []

    # Define mappings based on target gender
    if target_gender == 'male':
        pronoun_map = female_to_male
        possessive_det_map = {'her': 'his'}
        possessive_pronoun_map = {'hers': 'his'}
        reflexive_map = {'herself': 'himself'}
        subjective_map = {'she': 'he'}
        objective_map = {'her': 'him'}

    elif target_gender == 'female':
        pronoun_map = male_to_female
        possessive_det_map = {'his': 'her'}
        possessive_pronoun_map = {'his': 'hers'}
        reflexive_map = {'himself': 'herself'}
        subjective_map = {'he': 'she'}
        objective_map = {'him': 'her'}
    else:
        return sentence # Return original if target_gender is invalid

    for i, token in enumerate(tokens):
        lower_token = token.lower()
        transformed_token = token # Default to keeping the original token

        # Separate potential punctuation for checking the core word
        match = re.match(r'(\W*)(.*?)(\W*)$', token)
        if match:
            prefix_punct, word_part, suffix_punct = match.groups()
            lower_word_part = word_part.lower()

            # Check for subjective pronouns
            if lower_word_part in subjective_map and lower_word_part in ['he', 'she']:
                 transformed_word_part = subjective_map[lower_word_part]
                 if word_part.istitle():
                     transformed_word_part = transformed_word_part.capitalize()
                 elif word_part.isupper():
                      transformed_word_part = transformed_word_part.upper()
                 transformed_token = prefix_punct + transformed_word_part + suffix_punct


            # Check for objective pronouns
            elif lower_word_part in objective_map and lower_word_part in ['him', 'her']:
                transformed_word_part = objective_map[lower_word_part]
                if word_part.istitle():
                    transformed_word_part = transformed_word_part.capitalize()
                elif word_part.isupper():
                    transformed_word_part = transformed_word_part.upper()
                transformed_token = prefix_punct + transformed_word_part + suffix_punct


            # Check for reflexive pronouns
            elif lower_word_part in reflexive_map and lower_word_part in ['himself', 'herself']:
                 transformed_word_part = reflexive_map[lower_word_part]
                 if word_part.istitle():
                     transformed_word_part = transformed_word_part.capitalize()
                 elif word_part.isupper():
                     transformed_word_part = transformed_word_part.upper()
                 transformed_token = prefix_punct + transformed_word_part + suffix_punct


            # Check for possessive determiners and pronouns
            elif lower_word_part in possessive_det_map or lower_word_part in possessive_pronoun_map:
                 # Simple heuristic: if the next token exists and doesn't start with punctuation,
                 # or if the current token is the last one and doesn't have suffix punctuation,
                 # it's likely a possessive determiner followed by a noun.
                 # Otherwise, assume it's a possessive pronoun.

                 is_possessive_determiner = False
                 if i + 1 < len(tokens):
                     next_token = tokens[i+1]
                     if not re.match(r'^\W', next_token):
                          is_possessive_determiner = True
                 elif not suffix_punct: # Last token and no suffix punctuation
                      is_possessive_determiner = True


                 if is_possessive_determiner and lower_word_part in possessive_det_map:
                     transformed_word_part = possessive_det_map[lower_word_part]
                     if word_part.istitle():
                          transformed_word_part = transformed_word_part.capitalize()
                     elif word_part.isupper():
                          transformed_word_part = transformed_word_part.upper()
                     transformed_token = prefix_punct + transformed_word_part + suffix_punct


                 elif not is_possessive_determiner and lower_word_part in possessive_pronoun_map:
                     transformed_word_part = possessive_pronoun_map[lower_word_part]
                     if word_part.istitle():
                          transformed_word_part = transformed_word_part.capitalize()
                     elif word_part.isupper():
                          transformed_word_part = transformed_word_part.upper()
                     transformed_token = prefix_punct + transformed_word_part + suffix_punct


        transformed_tokens.append(transformed_token)

    # Join tokens with a space, then use regex for better punctuation and spacing
    transformed_sentence = ' '.join(transformed_tokens)
    # Remove space before punctuation
    transformed_sentence = re.sub(r'\s+([.,!?;:])', r'\1', transformed_sentence)
    # Handle spaces around parentheses, brackets, braces
    transformed_sentence = re.sub(r'\(\s+', r'(', transformed_sentence)
    transformed_sentence = re.sub(r'\s+\)', r')', transformed_sentence)
    transformed_sentence = re.sub(r'\[\s+', r'[', transformed_sentence)
    transformed_sentence = re.sub(r'\s+\]', r']', transformed_sentence)
    transformed_sentence = re.sub(r'\{\s+', r'{', transformed_sentence)
    transformed_sentence = re.sub(r'\s+\}', r'}', transformed_sentence)
    # Handle spaces around single quotes (for contractions and quotes)
    transformed_sentence = re.sub(r"'\s+([^']*?)\s+'", r"'\1'", transformed_sentence)
    # Handle spaces before contractions and possessive 's
    transformed_sentence = transformed_sentence.replace(" n't", "n't")
    transformed_sentence = transformed_sentence.replace(" 's", "'s")
    transformed_sentence = transformed_sentence.replace(" 've", "'ve")
    transformed_sentence = transformed_sentence.replace(" 'll", "'ll")
    transformed_sentence = transformed_sentence.replace(" 'd", "'d")
    transformed_sentence = transformed_sentence.replace(" 're", "'re")
    transformed_sentence = transformed_sentence.replace(" 'm", "'m")


    return transformed_sentence

# Re-run the test sentences with the refined function
test_sentences = [
    "He quickly ran to his car.", # Subjective, possessive determiner
    "She gave the book to him.", # Subjective, objective
    "The dog wagged its tail.", # Neutral pronoun - should not change
    "His report was better than hers.", # Possessive determiner, possessive pronoun
    "She saw herself in the mirror.", # Reflexive
    "He built the house himself.", # Reflexive
    "Is this book his?", # Possessive pronoun at the end of a sentence
    "Is this book hers?", # Possessive pronoun at the end of a sentence
    "He said, 'I will do it myself.'", # Sentence with punctuation and quoted speech
    "She arrived with her friend.", # Possessive determiner with different context
    "They saw them.", # Plural pronouns - should not change
    "I am myself.", # First person reflexive - should not change
    "You are yourself.", # Second person reflexive - should not change
    "His cat, a tabby, was asleep.", # Possessive determiner with comma
    "The prize is hers!", # Possessive pronoun with exclamation mark
    "He didn't want to go.", # Contraction with pronoun
    "She couldn't believe her eyes." # Contraction and possessive determiner
]

for sentence in test_sentences:
    transformed_male = transform_gendered_pronouns(sentence, 'male')
    transformed_female = transform_gendered_pronouns(sentence, 'female')

    print(f"Original: {sentence}")
    print(f"Transformed to male: {transformed_male}")
    print(f"Transformed to female: {transformed_female}\n")

Original: He quickly ran to his car.
Transformed to male: He quickly ran to his car.
Transformed to female: She quickly ran to her car.

Original: She gave the book to him.
Transformed to male: He gave the book to him.
Transformed to female: She gave the book to her.

Original: The dog wagged its tail.
Transformed to male: The dog wagged its tail.
Transformed to female: The dog wagged its tail.

Original: His report was better than hers.
Transformed to male: His report was better than his.
Transformed to female: Her report was better than hers.

Original: She saw herself in the mirror.
Transformed to male: He saw himself in the mirror.
Transformed to female: She saw herself in the mirror.

Original: He built the house himself.
Transformed to male: He built the house himself.
Transformed to female: She built the house herself.

Original: Is this book his?
Transformed to male: Is this book his?
Transformed to female: Is this book hers?

Original: Is this book hers?
Transformed to male: I

In [19]:
import re # Import regex for better punctuation handling
# No need to import nltk as we are not using nltk.word_tokenize anymore

# Further refine the possessive logic to avoid incorrect transformations
# for possessive determiners followed by non-pronouns.

def transform_gendered_pronouns(sentence, target_gender):
    """
    Transforms gendered pronouns in a sentence to the specified target gender,
    attempting to preserve grammatical correctness.

    Args:
        sentence (str): The input sentence.
        target_gender (str): The target gender ('male' or 'female').

    Returns:
        str: The transformed sentence.
    """
    # Use a simple split by space and handle punctuation and contractions explicitly.
    tokens = sentence.split()

    transformed_tokens = []

    # Define mappings based on target gender
    if target_gender == 'male':
        possessive_det_map = {'her': 'his'}
        possessive_pronoun_map = {'hers': 'his'}
        reflexive_map = {'herself': 'himself'}
        subjective_map = {'she': 'he'}
        objective_map = {'her': 'him'}

    elif target_gender == 'female':
        possessive_det_map = {'his': 'her'}
        possessive_pronoun_map = {'his': 'hers'}
        reflexive_map = {'himself': 'herself'}
        subjective_map = {'he': 'she'}
        objective_map = {'him': 'her'}
    else:
        return sentence # Return original if target_gender is invalid

    # Combine all gendered pronouns for easier checking
    all_gendered_pronouns = {}
    all_gendered_pronouns.update({p: 'subjective' for p in subjective_map})
    all_gendered_pronouns.update({p: 'objective' for p in objective_map})
    all_gendered_pronouns.update({p: 'possessive_determiner' for p in possessive_det_map})
    all_gendered_pronouns.update({p: 'possessive_pronoun' for p in possessive_pronoun_map})
    all_gendered_pronouns.update({p: 'reflexive' for p in reflexive_map})


    for i, token in enumerate(tokens):
        lower_token = token.lower()
        transformed_token = token # Default to keeping the original token

        # Separate potential punctuation for checking the core word
        match = re.match(r'(\W*)(.*?)(\W*)$', token)
        if match:
            prefix_punct, word_part, suffix_punct = match.groups()
            lower_word_part = word_part.lower()

            if lower_word_part in all_gendered_pronouns:
                pronoun_type = all_gendered_pronouns[lower_word_part]

                if pronoun_type == 'subjective' and lower_word_part in subjective_map:
                    transformed_word_part = subjective_map[lower_word_part]
                    if word_part.istitle():
                        transformed_word_part = transformed_word_part.capitalize()
                    elif word_part.isupper():
                        transformed_word_part = transformed_word_part.upper()
                    transformed_token = prefix_punct + transformed_word_part + suffix_punct

                elif pronoun_type == 'objective' and lower_word_part in objective_map:
                    transformed_word_part = objective_map[lower_word_part]
                    if word_part.istitle():
                        transformed_word_part = transformed_word_part.capitalize()
                    elif word_part.isupper():
                        transformed_word_part = transformed_word_part.upper()
                    transformed_token = prefix_punct + transformed_word_part + suffix_punct

                elif pronoun_type == 'reflexive' and lower_word_part in reflexive_map:
                    transformed_word_part = reflexive_map[lower_word_part]
                    if word_part.istitle():
                        transformed_word_part = transformed_word_part.capitalize()
                    elif word_part.isupper():
                        transformed_word_part = transformed_word_part.upper()
                    transformed_token = prefix_punct + transformed_word_part + suffix_punct

                # Possessive determiners and pronouns - improved heuristic
                elif pronoun_type in ['possessive_determiner', 'possessive_pronoun']:
                    is_possessive_determiner = False
                    # Look at the next token: if it's not punctuation or the end of the sentence,
                    # assume it's a determiner followed by a noun.
                    if i + 1 < len(tokens):
                        next_token = tokens[i+1]
                        if not re.match(r'^\W+$', next_token): # Check if the next token is NOT just punctuation
                            is_possessive_determiner = True
                    elif not suffix_punct: # Last token and no suffix punctuation
                         # This case is more ambiguous, but if it's a known possessive determiner
                         # like 'his' or 'her' at the end, it's *usually* a pronoun.
                         # However, the previous logic treated it as a determiner.
                         # Let's refine this: if it's the last token and has no suffix punctuation,
                         # and is a possessive form, treat as a pronoun. If it has suffix punctuation,
                         # the heuristic might be more complex. Let's stick to the simple lookahead for now.
                         pass # Keep is_possessive_determiner as False if it's the last token without suffix punct

                    # Corrected logic: Check if the *original* token is a possessive determiner or pronoun
                    # and apply the transformation based on the heuristic.
                    if is_possessive_determiner and lower_word_part in possessive_det_map:
                        transformed_word_part = possessive_det_map[lower_word_part]
                        if word_part.istitle():
                             transformed_word_part = transformed_word_part.capitalize()
                        elif word_part.isupper():
                             transformed_word_part = transformed_word_part.upper()
                        transformed_token = prefix_punct + transformed_word_part + suffix_punct

                    elif not is_possessive_determiner and lower_word_part in possessive_pronoun_map:
                         transformed_word_part = possessive_pronoun_map[lower_word_part]
                         if word_part.istitle():
                             transformed_word_part = transformed_word_part.capitalize()
                         elif word_part.isupper():
                             transformed_word_part = transformed_word_part.upper()
                         transformed_token = prefix_punct + transformed_word_part + suffix_punct


        transformed_tokens.append(transformed_token)

    # Join tokens with a space, then use regex for better punctuation and spacing
    transformed_sentence = ' '.join(transformed_tokens)
    # Remove space before punctuation
    transformed_sentence = re.sub(r'\s+([.,!?;:])', r'\1', transformed_sentence)
    # Handle spaces around parentheses, brackets, braces
    transformed_sentence = re.sub(r'\(\s+', r'(', transformed_sentence)
    transformed_sentence = re.sub(r'\s+\)', r')', transformed_sentence)
    transformed_sentence = re.sub(r'\[\s+', r'[', transformed_sentence)
    transformed_sentence = re.sub(r'\s+\]', r']', transformed_sentence)
    transformed_sentence = re.sub(r'\{\s+', r'{', transformed_sentence)
    transformed_sentence = re.sub(r'\s+\}', r'}', transformed_sentence)
    # Handle spaces around single quotes (for contractions and quotes)
    transformed_sentence = re.sub(r"'\s+([^']*?)\s+'", r"'\1'", transformed_sentence)
     # Handle spaces before contractions and possessive 's
    transformed_sentence = transformed_sentence.replace(" n't", "n't")
    transformed_sentence = transformed_sentence.replace(" 's", "'s")
    transformed_sentence = transformed_sentence.replace(" 've", "'ve")
    transformed_sentence = transformed_sentence.replace(" 'll", "'ll")
    transformed_sentence = transformed_sentence.replace(" 'd", "'d")
    transformed_sentence = transformed_sentence.replace(" 're", "'re")
    transformed_sentence = transformed_sentence.replace(" 'm", "'m")


    return transformed_sentence

# Re-run the test sentences with the refined function
test_sentences = [
    "He quickly ran to his car.", # Subjective, possessive determiner
    "She gave the book to him.", # Subjective, objective
    "The dog wagged its tail.", # Neutral pronoun - should not change
    "His report was better than hers.", # Possessive determiner, possessive pronoun
    "She saw herself in the mirror.", # Reflexive
    "He built the house himself.", # Reflexive
    "Is this book his?", # Possessive pronoun at the end of a sentence
    "Is this book hers?", # Possessive pronoun at the end of a sentence
    "He said, 'I will do it myself.'", # Sentence with punctuation and quoted speech
    "She arrived with her friend.", # Possessive determiner with different context
    "They saw them.", # Plural pronouns - should not change
    "I am myself.", # First person reflexive - should not change
    "You are yourself.", # Second person reflexive - should not change
    "His cat, a tabby, was asleep.", # Possessive determiner with comma
    "The prize is hers!", # Possessive pronoun with exclamation mark
    "He didn't want to go.", # Contraction with pronoun
    "She couldn't believe her eyes." # Contraction and possessive determiner
]

for sentence in test_sentences:
    transformed_male = transform_gendered_pronouns(sentence, 'male')
    transformed_female = transform_gendered_pronouns(sentence, 'female')

    print(f"Original: {sentence}")
    print(f"Transformed to male: {transformed_male}")
    print(f"Transformed to female: {transformed_female}\n")

Original: He quickly ran to his car.
Transformed to male: He quickly ran to his car.
Transformed to female: She quickly ran to her car.

Original: She gave the book to him.
Transformed to male: He gave the book to him.
Transformed to female: She gave the book to her.

Original: The dog wagged its tail.
Transformed to male: The dog wagged its tail.
Transformed to female: The dog wagged its tail.

Original: His report was better than hers.
Transformed to male: His report was better than his.
Transformed to female: Her report was better than hers.

Original: She saw herself in the mirror.
Transformed to male: He saw himself in the mirror.
Transformed to female: She saw herself in the mirror.

Original: He built the house himself.
Transformed to male: He built the house himself.
Transformed to female: She built the house herself.

Original: Is this book his?
Transformed to male: Is this book his?
Transformed to female: Is this book hers?

Original: Is this book hers?
Transformed to male: I

In [20]:
from gensim.models import Word2Vec
import numpy as np

if df is not None:
    # Prepare data for custom Word2Vec training (list of lists of tokens)
    tokenized_reviews = [review.split() for review in df['cleaned_review']]

    # Train custom CBOW Word2Vec model
    # sg=0 for CBOW
    custom_cbow_model = Word2Vec(
        sentences=tokenized_reviews,
        sg=0,
        vector_size=100, # You can adjust the vector size
        window=5,
        min_count=1,
        workers=4
    )
    print("Custom CBOW Word2Vec model trained successfully.")

    def get_custom_review_vector(review_tokens, model):
        # Get the average vector for the tokens in the review
        word_vectors = [model.wv[token] for token in review_tokens if token in model.wv]
        if word_vectors:
            return np.mean(word_vectors, axis=0)
        else:
            return None # Or a vector of zeros

    # Apply the function to get vectors for each cleaned review using the custom CBOW model
    df['custom_cbow_vector'] = df['cleaned_review'].apply(lambda x: get_custom_review_vector(x.split(), custom_cbow_model))

    # Remove rows where no word vectors were found
    df_cbow = df.dropna(subset=['custom_cbow_vector'])

    if not df_cbow.empty:
        X_cbow = list(df_cbow['custom_cbow_vector'].values)
        y_cbow = df_cbow['sentiment'].apply(lambda x: 1 if x == 'positive' else 0).values

        # Train a simple classification model (e.g., Logistic Regression)
        from sklearn.model_selection import train_test_split
        from sklearn.linear_model import LogisticRegression
        from sklearn.metrics import classification_report

        X_train_cb, X_test_cb, y_train_cb, y_test_cb = train_test_split(X_cbow, y_cbow, test_size=0.2, random_state=42)

        lr_model_cb = LogisticRegression(max_iter=1000)
        lr_model_cb.fit(X_train_cb, y_train_cb)

        y_pred_cb = lr_model_cb.predict(X_test_cb)

        print("\nClassification Report for Custom CBOW Word2Vec Model:")
        print(classification_report(y_test_cb, y_pred_cb))
    else:
        print("No valid review vectors generated from the custom CBOW model.")
else:
    print("DataFrame is not loaded. Cannot train model.")

Custom CBOW Word2Vec model trained successfully.

Classification Report for Custom CBOW Word2Vec Model:
              precision    recall  f1-score   support

           0       0.87      0.85      0.86      4961
           1       0.86      0.87      0.87      5039

    accuracy                           0.86     10000
   macro avg       0.86      0.86      0.86     10000
weighted avg       0.86      0.86      0.86     10000



In [None]:
from gensim.models import FastText
import numpy as np

if df is not None:
    # Prepare data for custom FastText training (list of lists of tokens)
    tokenized_reviews = [review.split() for review in df['cleaned_review']]

    # Train custom FastText model
    custom_fasttext_model = FastText(
        sentences=tokenized_reviews,
        sg=1, # 1 for Skip-gram, 0 for CBOW (FastText can use either)
        vector_size=100, # You can adjust the vector size
        window=5,
        min_count=1,
        workers=4
    )
    print("Custom FastText model trained successfully.")

    def get_custom_review_vector(review_tokens, model):
        # Get the average vector for the tokens in the review
        word_vectors = [model.wv[token] for token in review_tokens if token in model.wv]
        if word_vectors:
            return np.mean(word_vectors, axis=0)
        else:
            return None # Or a vector of zeros

    # Apply the function to get vectors for each cleaned review using the custom FastText model
    df['custom_fasttext_vector'] = df['cleaned_review'].apply(lambda x: get_custom_review_vector(x.split(), custom_fasttext_model))

    # Remove rows where no word vectors were found
    df_fasttext = df.dropna(subset=['custom_fasttext_vector'])

    if not df_fasttext.empty:
        X_fasttext = list(df_fasttext['custom_fasttext_vector'].values)
        y_fasttext = df_fasttext['sentiment'].apply(lambda x: 1 if x == 'positive' else 0).values

        # Train a simple classification model (e.g., Logistic Regression)
        from sklearn.model_selection import train_test_split
        from sklearn.linear_model import LogisticRegression
        from sklearn.metrics import classification_report

        X_train_ft, X_test_ft, y_train_ft, y_test_ft = train_test_split(X_fasttext, y_fasttext, test_size=0.2, random_state=42)

        lr_model_ft = LogisticRegression(max_iter=1000)
        lr_model_ft.fit(X_train_ft, y_train_ft) # Corrected to use y_train_ft

        y_pred_ft = lr_model_ft.predict(X_test_ft)

        print("\nClassification Report for Custom FastText Model:")
        print(classification_report(y_test_ft, y_pred_ft))
    else:
        print("No valid review vectors generated from the custom FastText model.")
else:
    print("DataFrame is not loaded. Cannot train model.")