In [None]:
import re
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass

@dataclass
class PronounMapping:
    """Data class to store pronoun mappings"""
    word: str
    pos_type: str  # subject, object, possessive_adj, possessive_pronoun, reflexive
    gender: str    # male, female
    case_type: str # lower, upper, title

class PronounTransformer:
    """Rules-based system for transforming gendered pronouns"""

    def __init__(self):
        # Define pronoun mappings
        self.pronouns = {
            # Subject pronouns
            'he': {'target_female': 'she', 'pos_type': 'subject', 'gender': 'male'},
            'she': {'target_male': 'he', 'pos_type': 'subject', 'gender': 'female'},

            # Object pronouns
            'him': {'target_female': 'her', 'pos_type': 'object', 'gender': 'male'},
            'her': {'target_male': 'him', 'pos_type': 'object', 'gender': 'female'},

            # Possessive adjectives (his book, her book)
            'his': {'target_female': 'her', 'pos_type': 'possessive_adj', 'gender': 'male'},
            # Note: 'her' as possessive adjective becomes 'his'

            # Possessive pronouns (the book is his, the book is hers)
            'hers': {'target_male': 'his', 'pos_type': 'possessive_pronoun', 'gender': 'female'},

            # Reflexive pronouns
            'himself': {'target_female': 'herself', 'pos_type': 'reflexive', 'gender': 'male'},
            'herself': {'target_male': 'himself', 'pos_type': 'reflexive', 'gender': 'female'},
        }

        # Common contractions that might affect pronouns
        self.contractions = {
            "he's": {"male_to_female": "she's", "expansion": "he is/has"},
            "she's": {"female_to_male": "he's", "expansion": "she is/has"},
            "he'll": {"male_to_female": "she'll", "expansion": "he will"},
            "she'll": {"female_to_male": "he'll", "expansion": "she will"},
            "he'd": {"male_to_female": "she'd", "expansion": "he would/had"},
            "she'd": {"female_to_male": "he'd", "expansion": "she would/had"},
        }

        # Create reverse mappings for comprehensive coverage
        self._create_reverse_mappings()

        # Pattern for detecting pronouns with word boundaries
        self.pronoun_pattern = self._create_pronoun_pattern()

    def _create_reverse_mappings(self):
        """Create comprehensive bidirectional mappings"""
        # Handle 'her' ambiguity (object vs possessive adjective)
        self.pronouns['her_obj'] = {'target_male': 'him', 'pos_type': 'object', 'gender': 'female'}
        self.pronouns['her_poss'] = {'target_male': 'his', 'pos_type': 'possessive_adj', 'gender': 'female'}

    def _create_pronoun_pattern(self):
        """Create regex pattern for all pronouns and contractions"""
        all_pronouns = list(self.pronouns.keys()) + list(self.contractions.keys())
        # Remove internal markers like '_obj', '_poss'
        all_pronouns = [p for p in all_pronouns if '_' not in p]
        pattern = r'\b(' + '|'.join(re.escape(p) for p in all_pronouns) + r')\b'
        return re.compile(pattern, re.IGNORECASE)

    def _get_case_type(self, word: str) -> str:
        """Determine the case type of a word"""
        if word.isupper():
            return 'upper'
        elif word[0].isupper():
            return 'title'
        else:
            return 'lower'

    def _apply_case(self, word: str, case_type: str) -> str:
        """Apply the specified case to a word"""
        if case_type == 'upper':
            return word.upper()
        elif case_type == 'title':
            return word.capitalize()
        else:
            return word.lower()

    def _detect_her_context(self, sentence: str, match_pos: int) -> str:
        """Detect whether 'her' is object or possessive adjective"""
        # Get surrounding context
        words = sentence.split()
        her_index = -1

        # Find the index of 'her' in the words list
        char_count = 0
        for i, word in enumerate(words):
            if char_count <= match_pos < char_count + len(word):
                her_index = i
                break
            char_count += len(word) + 1  # +1 for space

        if her_index == -1:
            return 'her_obj'  # default to object

        # Check if next word exists and is likely a noun
        if her_index + 1 < len(words):
            next_word = words[her_index + 1].lower().strip('.,!?;:')
            # Simple heuristic: if followed by a noun-like word, it's likely possessive
            # Common patterns: "her book", "her friend", "her car"
            if len(next_word) > 2 and next_word not in ['and', 'or', 'but', 'to', 'in', 'on', 'at', 'for', 'with']:
                return 'her_poss'

        # Check if 'her' comes after certain verbs (indicating object usage)
        if her_index > 0:
            prev_word = words[her_index - 1].lower().strip('.,!?;:')
            object_verbs = ['give', 'gave', 'tell', 'told', 'show', 'showed', 'send', 'sent', 'ask', 'asked', 'see', 'saw', 'help', 'helped']
            if prev_word in object_verbs:
                return 'her_obj'

        return 'her_obj'  # default to object if unclear

    def _transform_pronoun(self, pronoun: str, target_gender: str, context_info: str = None) -> str:
        """Transform a single pronoun to the target gender"""
        original_case = self._get_case_type(pronoun)
        pronoun_lower = pronoun.lower()

        # Handle 'her' ambiguity
        if pronoun_lower == 'her':
            if context_info == 'her_poss':
                pronoun_key = 'her_poss'
            else:
                pronoun_key = 'her_obj'
        else:
            pronoun_key = pronoun_lower

        # Handle contractions
        if pronoun_lower in self.contractions:
            contraction_info = self.contractions[pronoun_lower]
            if target_gender == 'female' and 'male_to_female' in contraction_info:
                transformed = contraction_info['male_to_female']
            elif target_gender == 'male' and 'female_to_male' in contraction_info:
                transformed = contraction_info['female_to_male']
            else:
                return pronoun  # No transformation needed

            return self._apply_case(transformed, original_case)

        # Handle regular pronouns
        if pronoun_key not in self.pronouns:
            return pronoun  # Unknown pronoun, return as is

        pronoun_info = self.pronouns[pronoun_key]

        # Determine target pronoun
        target_key = f'target_{target_gender}'
        if target_key not in pronoun_info:
            return pronoun  # No transformation available

        transformed = pronoun_info[target_key]
        return self._apply_case(transformed, original_case)

    def transform_sentence(self, sentence: str, target_gender: str) -> str:
        """
        Transform all gendered pronouns in a sentence to the target gender

        Args:
            sentence: Input sentence containing pronouns
            target_gender: Target gender ('male' or 'female')

        Returns:
            Transformed sentence with pronouns changed to target gender
        """
        if target_gender not in ['male', 'female']:
            raise ValueError("target_gender must be 'male' or 'female'")

        result = sentence
        offset = 0

        # Find all pronoun matches
        matches = list(self.pronoun_pattern.finditer(sentence))

        # Process matches from right to left to avoid position shifts
        for match in reversed(matches):
            start, end = match.span()
            matched_pronoun = match.group(1)

            # Special handling for 'her'
            context_info = None
            if matched_pronoun.lower() == 'her':
                context_info = self._detect_her_context(sentence, start)

            # Transform the pronoun
            transformed = self._transform_pronoun(matched_pronoun, target_gender, context_info)

            # Replace in the result string
            result = result[:start] + transformed + result[end:]

        return result

    def analyze_sentence(self, sentence: str) -> List[Dict]:
        """
        Analyze pronouns in a sentence and return detailed information

        Args:
            sentence: Input sentence to analyze

        Returns:
            List of dictionaries containing pronoun analysis
        """
        analysis = []
        matches = list(self.pronoun_pattern.finditer(sentence))

        for match in matches:
            start, end = match.span()
            matched_pronoun = match.group(1)
            pronoun_lower = matched_pronoun.lower()

            # Handle 'her' ambiguity
            context_info = None
            if pronoun_lower == 'her':
                context_info = self._detect_her_context(sentence, start)
                pronoun_key = context_info
            else:
                pronoun_key = pronoun_lower

            # Get pronoun information
            if pronoun_key in self.pronouns:
                pronoun_info = self.pronouns[pronoun_key]
                analysis.append({
                    'pronoun': matched_pronoun,
                    'position': (start, end),
                    'gender': pronoun_info['gender'],
                    'type': pronoun_info['pos_type'],
                    'context': context_info,
                    'can_transform_to_male': 'target_male' in pronoun_info,
                    'can_transform_to_female': 'target_female' in pronoun_info
                })
            elif pronoun_lower in self.contractions:
                contraction_info = self.contractions[pronoun_lower]
                analysis.append({
                    'pronoun': matched_pronoun,
                    'position': (start, end),
                    'type': 'contraction',
                    'expansion': contraction_info['expansion'],
                    'can_transform_to_male': 'female_to_male' in contraction_info,
                    'can_transform_to_female': 'male_to_female' in contraction_info
                })

        return analysis

    def batch_transform(self, sentences: List[str], target_gender: str) -> List[Dict]:
        """
        Transform multiple sentences

        Args:
            sentences: List of input sentences
            target_gender: Target gender for transformation

        Returns:
            List of dictionaries with original and transformed sentences
        """
        results = []

        for i, sentence in enumerate(sentences):
            try:
                transformed = self.transform_sentence(sentence, target_gender)
                analysis = self.analyze_sentence(sentence)

                results.append({
                    'index': i,
                    'original': sentence,
                    'transformed': transformed,
                    'target_gender': target_gender,
                    'pronouns_found': len(analysis),
                    'analysis': analysis,
                    'changed': sentence != transformed
                })
            except Exception as e:
                results.append({
                    'index': i,
                    'original': sentence,
                    'transformed': sentence,
                    'target_gender': target_gender,
                    'pronouns_found': 0,
                    'analysis': [],
                    'changed': False,
                    'error': str(e)
                })

        return results

def demonstrate_transformer():
    """Demonstrate the pronoun transformer with various examples"""
    transformer = PronounTransformer()

    print("=== GENDERED PRONOUN TRANSFORMER DEMO ===")
    print()

    # Test cases covering different scenarios
    test_cases = [
        # Basic transformations
        ("He gave her his book.", "female"),
        ("She told him about her plans.", "male"),

        # Reflexive pronouns
        ("He taught himself to code.", "female"),
        ("She prepared herself for the exam.", "male"),

        # Possessive pronouns
        ("The book is his, not hers.", "female"),
        ("That car is hers, this one is his.", "male"),

        # Mixed pronouns
        ("He said she would give him her notes.", "female"),
        ("She thinks he lost his keys again.", "male"),

        # Contractions
        ("He's going to meet her there.", "female"),
        ("She'll tell him when she's ready.", "male"),

        # Complex sentences
        ("He told her that his friend would help her with her homework.", "male"),
        ("She gave him her book because his was damaged.", "female"),

        # Edge cases
        ("He, she, and his friend went to her party.", "male"),
        ("HE GAVE HER HIS BOOK.", "female"),  # All caps
        ("He Gave Her His Book.", "female"),  # Title case

        # Ambiguous 'her' contexts
        ("I saw her yesterday.", "male"),  # her as object
        ("I like her car.", "male"),       # her as possessive
        ("Give her the book.", "male"),    # her as object
        ("Her book is interesting.", "male"), # her as possessive
    ]

    for i, (sentence, target_gender) in enumerate(test_cases, 1):
        print(f"Test {i}:")
        print(f"  Original: {sentence}")
        print(f"  Target gender: {target_gender}")

        # Transform sentence
        transformed = transformer.transform_sentence(sentence, target_gender)
        print(f"  Transformed: {transformed}")

        # Show analysis
        analysis = transformer.analyze_sentence(sentence)
        if analysis:
            print(f"  Pronouns found: {len(analysis)}")
            for pronoun_info in analysis:
                print(f"    - '{pronoun_info['pronoun']}' ({pronoun_info.get('gender', 'N/A')} {pronoun_info['type']})")

        print()

def interactive_mode():
    """Interactive mode for testing custom sentences"""
    transformer = PronounTransformer()

    print("=== INTERACTIVE PRONOUN TRANSFORMER ===")
    print("Enter sentences to transform (type 'quit' to exit)")
    print()

    while True:
        sentence = input("Enter sentence: ").strip()
        if sentence.lower() in ['quit', 'exit', 'q']:
            break

        if not sentence:
            continue

        target_gender = input("Target gender (male/female): ").strip().lower()
        if target_gender not in ['male', 'female']:
            print("Please enter 'male' or 'female'")
            continue

        try:
            # Show original analysis
            analysis = transformer.analyze_sentence(sentence)
            print(f"\nOriginal: {sentence}")
            if analysis:
                print("Pronouns found:")
                for info in analysis:
                    print(f"  - '{info['pronoun']}' ({info.get('gender', 'N/A')} {info['type']})")

            # Transform
            transformed = transformer.transform_sentence(sentence, target_gender)
            print(f"Transformed: {transformed}")

            if sentence == transformed:
                print("(No changes made - no transformable pronouns found)")

        except Exception as e:
            print(f"Error: {e}")

        print("-" * 50)

def test_edge_cases():
    """Test various edge cases and complex scenarios"""
    transformer = PronounTransformer()

    print("=== EDGE CASE TESTING ===")
    print()

    edge_cases = [
        # No pronouns
        ("The cat sat on the mat.", "female"),

        # Only non-gendered pronouns
        ("It is raining and they are coming.", "male"),

        # Multiple same pronouns
        ("He said he would help him with his work.", "female"),

        # Punctuation handling
        ("He said, 'She gave him her book!'", "male"),

        # Numbers and special characters
        ("He earned $100, but she earned $200 for her work.", "male"),

        # Mixed with proper nouns
        ("John said he would meet Mary, and she agreed to give him her notes.", "female"),

        # Complex possessive scenarios
        ("His car and her bike were parked next to each other.", "female"),

        # Contractions in complex sentences
        ("She's sure he'll give her his support when she's ready.", "male"),
    ]

    for sentence, target_gender in edge_cases:
        print(f"Input: {sentence}")
        print(f"Target: {target_gender}")
        transformed = transformer.transform_sentence(sentence, target_gender)
        print(f"Output: {transformed}")
        print(f"Changed: {'Yes' if sentence != transformed else 'No'}")
        print()

def process_csv_testcases(filename: str):
    """Process CSV file with pronoun transformation test cases"""
    transformer = PronounTransformer()

    try:
        import csv

        with open(filename, 'r', encoding='utf-8') as file:
            # Try to detect if file has headers
            sample = file.read(1024)
            file.seek(0)

            # Check for common column names
            has_header = any(col in sample.lower() for col in ['input', 'sentence', 'text', 'target', 'gender'])

            csv_reader = csv.reader(file)

            if has_header:
                headers = next(csv_reader)
                print(f"CSV Headers detected: {headers}")
            else:
                headers = ['sentence', 'target_gender']  # Default headers

            print(f"\n=== PROCESSING {filename} ===")
            print()

            results = []

            for row_num, row in enumerate(csv_reader, 1):
                if len(row) < 2:
                    print(f"Row {row_num}: Insufficient data - {row}")
                    continue

                sentence = row[0].strip()
                target_gender = row[1].strip().lower()

                if target_gender not in ['male', 'female']:
                    print(f"Row {row_num}: Invalid target gender '{target_gender}' - skipping")
                    continue

                if not sentence:
                    print(f"Row {row_num}: Empty sentence - skipping")
                    continue

                try:
                    # Transform the sentence
                    transformed = transformer.transform_sentence(sentence, target_gender)
                    analysis = transformer.analyze_sentence(sentence)

                    result = {
                        'row': row_num,
                        'original': sentence,
                        'transformed': transformed,
                        'target_gender': target_gender,
                        'pronouns_found': len(analysis),
                        'changed': sentence != transformed,
                        'analysis': analysis
                    }

                    results.append(result)

                    # Display result
                    print(f"Row {row_num}:")
                    print(f"  Input: {sentence}")
                    print(f"  Target: {target_gender}")
                    print(f"  Output: {transformed}")

                    if analysis:
                        print(f"  Pronouns: {', '.join([p['pronoun'] for p in analysis])}")

                    print(f"  Changed: {'Yes' if result['changed'] else 'No'}")
                    print()

                except Exception as e:
                    print(f"Row {row_num}: Error processing - {e}")
                    results.append({
                        'row': row_num,
                        'original': sentence,
                        'transformed': sentence,
                        'target_gender': target_gender,
                        'pronouns_found': 0,
                        'changed': False,
                        'error': str(e)
                    })

            # Save results
            output_filename = filename.replace('.csv', '_results.csv')
            with open(output_filename, 'w', newline='', encoding='utf-8') as outfile:
                fieldnames = ['row', 'original', 'transformed', 'target_gender', 'pronouns_found', 'changed', 'pronoun_types']
                writer = csv.DictWriter(outfile, fieldnames=fieldnames)
                writer.writeheader()

                for result in results:
                    pronoun_types = ', '.join([p['type'] for p in result.get('analysis', [])])
                    writer.writerow({
                        'row': result['row'],
                        'original': result['original'],
                        'transformed': result['transformed'],
                        'target_gender': result['target_gender'],
                        'pronouns_found': result['pronouns_found'],
                        'changed': result['changed'],
                        'pronoun_types': pronoun_types
                    })

            print(f"Results saved to {output_filename}")
            print(f"Successfully processed {len(results)} test cases")

            # Show summary statistics
            changed_count = sum(1 for r in results if r['changed'])
            pronouns_total = sum(r['pronouns_found'] for r in results)

            print(f"\nSUMMARY:")
            print(f"  Total sentences: {len(results)}")
            print(f"  Sentences changed: {changed_count}")
            print(f"  Total pronouns found: {pronouns_total}")
            print(f"  Average pronouns per sentence: {pronouns_total/len(results):.1f}")

            return results

    except FileNotFoundError:
        print(f"File '{filename}' not found.")
        print("Creating a sample test file...")

        # Create sample test file
        sample_testcases = [
            ['sentence', 'target_gender'],
            ['He gave her his book.', 'female'],
            ['She told him about her plans.', 'male'],
            ['He taught himself to code.', 'female'],
            ['She prepared herself for the exam.', 'male'],
            ['The book is his, not hers.', 'female'],
            ['He said she would give him her notes.', 'female'],
            ["He's going to meet her there.", 'female'],
            ['I saw her yesterday.', 'male'],
            ['I like her car.', 'male'],
            ['His car and her bike were parked together.', 'female'],
        ]

        with open('sample_pronoun_testcases.csv', 'w', newline='', encoding='utf-8') as sample_file:
            writer = csv.writer(sample_file)
            writer.writerows(sample_testcases)

        print("Sample file 'sample_pronoun_testcases.csv' created!")
        return process_csv_testcases('sample_pronoun_testcases.csv')

    except Exception as e:
        print(f"Error processing CSV: {e}")
        return []

def main():
    """Main function to run the pronoun transformer"""
    print("Gendered Pronoun Transformer")
    print("=" * 50)
    print()

    # First, try to process the CSV test cases
    print("Attempting to process pronoun_testcases.csv...")
    csv_results = process_csv_testcases('/content/pronoun_testcases (1).csv')

    print("\n" + "=" * 50)
    print("ADDITIONAL DEMONSTRATIONS")
    print("=" * 50)

    # Run demonstrations
    demonstrate_transformer()

    # Test edge cases
    test_edge_cases()

    # Offer interactive mode
    choice = input("Would you like to try interactive mode? (y/n): ").strip().lower()
    if choice in ['y', 'yes']:
        interactive_mode()

    print("\nDemo completed!")

if __name__ == "__main__":
    main()

Gendered Pronoun Transformer

Attempting to process pronoun_testcases.csv...
CSV Headers detected: ['input_text', 'target_gender', 'expected_output']

=== PROCESSING /content/pronoun_testcases (1).csv ===

Row 1:
  Input: He is going to the market.
  Target: female
  Output: She is going to the market.
  Pronouns: He
  Changed: Yes

Row 2:
  Input: His book is on the table.
  Target: female
  Output: Her book is on the table.
  Pronouns: His
  Changed: Yes

Row 3:
  Input: I saw him yesterday.
  Target: female
  Output: I saw her yesterday.
  Pronouns: him
  Changed: Yes

Row 4:
  Input: He hurt himself.
  Target: female
  Output: She hurt herself.
  Pronouns: He, himself
  Changed: Yes

Row 5:
  Input: I called him last night.
  Target: female
  Output: I called her last night.
  Pronouns: him
  Changed: Yes

Row 6:
  Input: That is his car.
  Target: female
  Output: That is her car.
  Pronouns: his
  Changed: Yes

Row 7:
  Input: He told me about his trip.
  Target: female
  Output: