In [1]:
!pip install -U spacy
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m83.7 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [13]:
def pronoun_swap(text, target_gender):
    # Parse the input sentence using spaCy
    doc = nlp(text)

    # Determine the gender to convert FROM (we want the opposite of the target)
    opposite_gender = "male" if target_gender == "female" else "female"
    result = []

    # Mapping of (pronoun, dependency) → replacement
    # This handles subject, object, reflexive, possessive forms
    swap_map = {
        # Subject pronouns
        ("he", "nsubj"): "she",
        ("she", "nsubj"): "he",

        # Object pronouns
        ("him", "dobj"): "her",
        ("him", "pobj"): "her",
        ("him", "iobj"): "her",
        ("him", "obl"): "her",
        ("her", "dobj"): "him",
        ("her", "pobj"): "him",
        ("her", "iobj"): "him",
        ("her", "obl"): "him",

        # Reflexive pronouns
        ("himself", "dobj"): "herself",
        ("herself", "dobj"): "himself",
        ("himself", "pobj"): "herself",
        ("herself", "pobj"): "himself",

        # Possessive determiners (e.g., "his book", "her phone")
        ("his", "poss"): "her",
        ("her", "poss"): "his",
        ("his", "det"): "her",
        ("her", "det"): "his",

        # Possessive pronouns at end of sentence (e.g., "not his")
        ("his", "attr"): "hers",
        ("hers", "attr"): "his"
    }

    # Loop through each token in the sentence
    for tok in doc:
        word = tok.text           # Original token text
        dep = tok.dep_            # Dependency role (e.g., nsubj, dobj)
        lower = tok.lower_        # Lowercased token text

        # Special fallback: handle "his"/"hers" used at the end (e.g. "not his")
        if lower in ["his", "hers"] and dep == "attr":
            dep = "attr"

        key = (lower, dep)        # Build key to lookup in swap_map
        replacement = None

        # Primary lookup in swap_map
        if key in swap_map:
            replacement = swap_map[key]

        # Fallback 1: direct subject pronouns (e.g., "he", "she")
        elif lower in ["he", "she"] and tok.pos_ == "PRON":
            replacement = "she" if lower == "he" else "he"

        # Fallback 2: reflexive pronouns (e.g., "himself", "herself")
        elif lower in ["himself", "herself"]:
            replacement = "herself" if lower == "himself" else "himself"

        # Fallback 3: ambiguous "her" (could be possessive or object)
        elif lower == "her":
            if dep in ["poss", "det"]:
                replacement = "his"
            else:
                replacement = "him"

        # Fallback 4: ambiguous "his" (could be possessive determiner or pronoun)
        elif lower == "his":
            if dep in ["poss", "det"]:
                replacement = "her"
            else:
                replacement = "hers"

        # Apply capitalization if original was capitalized
        if replacement:
            if word[0].isupper():
                replacement = replacement.capitalize()
            result.append(replacement)
        else:
            result.append(word)

    # Reconstruct sentence with proper spacing (don't add space before punctuation)
    final_sentence = ""
    for i, tok in enumerate(doc):
        if i > 0 and not tok.is_punct:
            final_sentence += " "
        final_sentence += result[i]

    return final_sentence


In [14]:
df["transformed_output"] = df.apply(
    lambda row: pronoun_swap(row["input_text"], row["target_gender"]), axis=1
)
df["correct"] = df["transformed_output"].str.strip() == df["expected_output"].str.strip()
print(df[["input_text", "expected_output", "transformed_output", "correct"]])
print(f"\n✅ Accuracy: {df['correct'].mean() * 100:.2f}%")


                             input_text                      expected_output  \
0            He is going to the market.          She is going to the market.   
1             His book is on the table.            Her book is on the table.   
2                  I saw him yesterday.                 I saw her yesterday.   
3                      He hurt himself.                    She hurt herself.   
4              I called him last night.             I called her last night.   
5                      That is his car.                     That is her car.   
6            He told me about his trip.          She told me about her trip.   
8    He blames himself for the mistake.  She blames herself for the mistake.   
9                He brought his laptop.              She brought her laptop.   
10                  He made it himself.                 She made it herself.   
11           I don’t like his attitude.           I don’t like her attitude.   
12               Tell him to come here. 