#**Part 2**

# Importing the necessary libraries

In [7]:
import pandas as pd
import re

# Creating a function to normalise date strings by removing ordinal suffixes,converting text to lower and normalize it as well

In [8]:
def normalize_date_text(text):
    text = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', text)
    text = text.replace(',', '').lower()
    return text

# Mapping and formatting

In [9]:
def improved_extract_date(text):
    text = normalize_date_text(text)

    months = {
        'jan': '01', 'january': '01',
        'feb': '02', 'february': '02',
        'mar': '03', 'march': '03',
        'apr': '04', 'april': '04',
        'may': '05',
        'jun': '06', 'june': '06',
        'jul': '07', 'july': '07',
        'aug': '08', 'august': '08',
        'sep': '09', 'sept': '09', 'september': '09',
        'oct': '10', 'october': '10',
        'nov': '11', 'november': '11',
        'dec': '12', 'december': '12'
    }

    patterns = [
        (r'\b(\d{1,2})[./-](\d{1,2})[./-](\d{2,4})\b', lambda d: (
            d[0].zfill(2), d[1].zfill(2), '20' + d[2] if len(d[2]) == 2 else d[2])),

        (r'\b(\d{4})[/-](\d{1,2})[/-](\d{1,2})\b', lambda d: (
            d[2].zfill(2), d[1].zfill(2), d[0])),

        (r'\b(\d{1,2})\s+(?:of\s+)?([a-zA-Z]+)\s+(\d{4})\b', lambda d: (
            d[0].zfill(2), months.get(d[1][:3], '00'), d[2])),

        (r'\b([a-zA-Z]+)\s+(\d{1,2})\s+(\d{4})\b', lambda d: (
            d[1].zfill(2), months.get(d[0][:3], '00'), d[2])),

        (r'\b(\d{1,2})\s+([a-zA-Z]+)\s+(\d{4})\b', lambda d: (
            d[0].zfill(2), months.get(d[1][:3], '00'), d[2]))
    ]

    for pattern, formatter in patterns:
        match = re.search(pattern, text)
        if match:
            try:
                d, m, y = formatter(match.groups())
                if d != '00' and m != '00' and len(y) == 4:
                    return f"{d}/{m}/{y}"
            except:
                continue
    return "Could not parse"

# Loading the dataset for testing

In [10]:
df = pd.read_csv('date_parser_testcases.csv')


# Applying the function

In [11]:
df['Parsed Output'] = df['Input'].apply(improved_extract_date)
df['Correct'] = df['Parsed Output'] == df['Expected Output']


# Evaluation

In [None]:
accuracy = df['Correct'].mean()
print(f"Accuracy: {accuracy:.2%}")
df[['Input', 'Parsed Output', 'Expected Output', 'Correct']].head(10)


# Own test

In [15]:
text = "My birthdays on March 2, 2024"

In [96]:
improved_extract_date(text)


'02/03/2024'

#**Part 3**

# Downloading SpaCy

In [26]:
!pip install spacy
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m96.3 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


# Loading the libraries

In [85]:
import spacy
import pandas as pd


# Loading the spacy pre-trained model

In [86]:
nlp = spacy.load("en_core_web_sm")

# Mapping the Swap

In [87]:
PRONOUN_SWAP = {
    'male': {
        'he': 'she',
        'him': 'her',
        'his': ('her', 'hers'),
        'himself': 'herself'
    },
    'female': {
        'she': 'he',
        'her': 'him',
        'hers': 'his',
        'herself': 'himself'
    }
}

# Function to swap the pronouns, using SpaCy's dependencies

In [88]:
def spacy_pronoun_swap(sentence, target_gender):
    doc = nlp(sentence)
    opposite_gender = 'female' if target_gender == 'male' else 'male'
    new_tokens = []

    for token in doc:
        word = token.text
        lower_word = word.lower()

        if lower_word in PRONOUN_SWAP[target_gender]:
            if lower_word in ('his', 'her'):
                if token.dep_ in ['poss', 'possessive', 'nmod']:
                    replacement = PRONOUN_SWAP[target_gender][lower_word][0]
                else:
                    replacement = PRONOUN_SWAP[target_gender][lower_word][1] if isinstance(PRONOUN_SWAP[target_gender][lower_word], tuple) else PRONOUN_SWAP[target_gender][lower_word]
            else:
                replacement = PRONOUN_SWAP[target_gender][lower_word]

            replacement = match_capitalization(word, replacement)
            new_tokens.append(replacement)
        else:
            new_tokens.append(word)

    # Return a clean string instead of a list
    return spacy.tokens.Doc(doc.vocab, words=new_tokens).text


# Loading the dataset

In [89]:
df = pd.read_csv("/content/pronoun_testcases.csv")
df['predicted_output'] = df.apply(lambda row: spacy_pronoun_swap(row['input_text'], row['target_gender']), axis=1)


# Results

In [90]:
df

Unnamed: 0,input_text,target_gender,expected_output,predicted_output
0,He is going to the market.,female,She is going to the market.,He is going to the market .
1,His book is on the table.,female,Her book is on the table.,His book is on the table .
2,I saw him yesterday.,female,I saw her yesterday.,I saw him yesterday .
3,He hurt himself.,female,She hurt herself.,He hurt himself .
4,I called him last night.,female,I called her last night.,I called him last night .
5,That is his car.,female,That is her car.,That is his car .
6,He told me about his trip.,female,She told me about her trip.,He told me about his trip .
7,The teacher gave him a warning.,female,The teacher gave her a warning.,The teacher gave him a warning .
8,He blames himself for the mistake.,female,She blames herself for the mistake.,He blames himself for the mistake .
9,He brought his laptop.,female,She brought her laptop.,He brought his laptop .


# Dependencies

In [95]:
sentence = "Her book is on the table."
doc = nlp(sentence)

for token in doc:
    print(f"{token.text} | {token.dep_} | {token.pos_}")


Her | poss | PRON
book | nsubj | NOUN
is | ROOT | AUX
on | prep | ADP
the | det | DET
table | pobj | NOUN
. | punct | PUNCT
