# Part 2

In [27]:
import pandas as pd
import re

def parse_date(text):
    month_mapping = {
        "January": "01", "February": "02", "March": "03", "April": "04",
        "May": "05", "June": "06", "July": "07", "August": "08",
        "September": "09", "October": "10", "November": "11", "December": "12", 
        'january': '01', 'february': '02', 'march': '03', 'april': '04',
        'may': '05', 'june': '06', 'july': '07', 'august': '08',
        'september': '09', 'october': '10', 'november': '11', 'december': '12',
        'jan': '01', 'feb': '02', 'mar': '03', 'apr': '04', 'jun': '06',
        'jul': '07', 'aug': '08', 'sep': '09', 'oct': '10', 'nov': '11', 'dec': '12',
        'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'Jun': '06',
        'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'
    }

    patterns = [
        r'(\d{1,2})(?:st|nd|rd|th)?\s(of\s)?(\b\w+\b)\s(\d{4})',  
        r'(\b\w+\b)\s(\d{1,2})(?:st|nd|rd|th)?,?\s(\d{4})',               
        r'(\d{4})[-/\.](\d{1,2})[-/\.](\d{1,2})',                         
        r'(\d{1,2})[-/\.](\d{1,2})[-/\.](\d{4})',                         
        r'(\d{1,2})[-/\.](\d{1,2})[-/\.](\d{2})',                         
        r'(\d{4})[-/](\d{1,2})[-/](\d{1,2})',                             
        r'(\b\w+\b)\s(\d{1,2})(?:st|nd|rd|th)?\s?,\s?(\d{4})',            
        r'(\b\w+\b)\s(\d{1,2})(?:st|nd|rd|th)?(\s|,)\s?(\d{4})',          
        r'(\d{1,2})(?:st|nd|rd|th)?\s(of\s)?(\b\w+\b),?\s(\d{4})',
        r'(\b\w+\b)\s(\d{1,2})(?:st|nd|rd|th)?,?\s(\d{4})'
    ]

    pattern_with_including = r"(\b\d{1,2})(st|nd|rd|th)?\s*([a-zA-Z]+)(?:,)?(?:\s+including\s+(\d{4}))"
    match = re.search(pattern_with_including, text)
    if match:
        day = match.group(1).zfill(2)
        month_str = match.group(3).lower()
        year = match.group(4)
        month = month_mapping.get(month_str)
        return f"{day}/{month}/{year}"

    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            if len(match.groups()) == 5:  
                day = match.group(1).zfill(2)
                month_str = match.group(4).lower()
                year = match.group(5)
                month = month_mapping.get(month_str)
                return f"{day}/{month}/{year}"

            elif len(match.groups()) == 3 and re.match(r"\d{4}", match.group(3)):  
                day = match.group(1).zfill(2)
                month = match.group(2).zfill(2)
                year = match.group(3)
                if len(year) == 2:
                    year = f"20{year}" if int(year) <= 50 else f"19{year}"
                return f"{day}/{month}/{year}"

            elif len(match.groups()) == 3 and "," in pattern:  
                month_str = match.group(1).lower()
                day = match.group(2).zfill(2)
                year = match.group(3)
                month = month_mapping.get(month_str)
                return f"{day}/{month}/{year}"

            elif len(match.groups()) == 3 and re.match(r"\d{2,4}", match.group(3)): 
                day = match.group(1).zfill(2)
                month_str = match.group(2).lower()
                year = match.group(3)
                month = month_mapping.get(month_str)
                return f"{day}/{month}/{year}"

    print(f"Could not parse date from text: {text}")
    return None

df = pd.read_csv('date_parser_testcases.csv')

# Apply the date parser to the 'Input' column
df['parsed_date'] = df['Input'].apply(parse_date)

# Print the Input column along with the parsed dates
print(df[['Input', 'parsed_date']])

Could not parse date from text: We met on 1st of January 2000.
Could not parse date from text: The concert is scheduled for 15th September, 2021.
Could not parse date from text: Christmas is on 25th Dec 2024.
Could not parse date from text: Her appointment is on the 2nd of March, 2021.
Could not parse date from text: The course starts on 1st July 2023.
Could not parse date from text: Independence Day is on 4th of July, 2022.
Could not parse date from text: The conference will be held on 5th May 2023.
Could not parse date from text: His wedding is on 6th of August, 2020.
Could not parse date from text: She was born on 3rd March 1998.
Could not parse date from text: The last date is 30th November 2022.
Could not parse date from text: The conference is on 15th October 2023.
Could not parse date from text: The festival is on 12th August 2024.
Could not parse date from text: Input
Could not parse date from text: We first met on the 1st of January 2000 at the conference.
Could not parse date

In [28]:
df['Match'] = df['parsed_date'] == df['Expected Output']
df

Unnamed: 0,Input,Expected Output,parsed_date,Match
0,"The event will take place on March 5, 2023.",05/03/2023,March/05/2023,False
1,Her birthday is on 07/08/1990.,07/08/1990,07/08/1990,True
2,The deadline is 2022-12-31.,31/12/2022,12/None/31,False
3,We met on 1st of January 2000.,01/01/2000,,False
4,"The concert is scheduled for 15th September, 2...",15/09/2021,,False
...,...,...,...,...
95,"We celebrate Independence Day on 2023-07-04, a...",04/07/2023,07/None/04,False
96,The final date for submission is 30th November...,30/11/2022,,False
97,"The annual conference is on 15th October 2023,...",15/10/2023,,False
98,"His birthdate, noted as 1990-05-20, is in the ...",20/05/1990,05/None/20,False


In [29]:
df[df['Match'] == True]['Match'].count()


21