In [31]:
import pandas as pd
import re
from datetime import datetime

In [32]:
df = pd.read_csv('date_parser_testcases.csv')

# Remove the unwanted row
df.drop(50, inplace=True)
df

Unnamed: 0,Input,Expected Output
0,"The event will take place on March 5, 2023.",05-03-2023
1,Her birthday is on 07/08/1990.,07-08-1990
2,The deadline is 2022-12-31.,31-12-2022
3,We met on 1st of January 2000.,01-01-2000
4,"The concert is scheduled for 15th September, 2...",15-09-2021
...,...,...
94,"We celebrate Independence Day on 2023-07-04, a...",04-07-2023
95,The final date for submission is 30th November...,30-11-2022
96,"The annual conference is on 15th October 2023,...",15-10-2023
97,"His birthdate, noted as 1990-05-20, is in the ...",20-05-1990


In [33]:
def Format(text):
    patterns = [
        (r'([A-Za-z]+)\s+(\d{1,2})(?:st|nd|rd|th)?,?\s+(\d{4})', "%B %d, %Y"),
        (r'(\d{1,2})(st|nd|rd|th)?\s+of\s+([A-Za-z]+)\s*,?\s*(\d{4})', "%d of %B %Y"),
        (r'(\d{1,2})/(\d{1,2})/(\d{4})', "%d/%m/%Y"),
        (r'(\d{4})-(\d{2})-(\d{2})', "%Y-%m-%d"),
        (r'(\d{1,2})(st|nd|rd|th)?\s+([A-Za-z]+)\s*,?\s*(\d{4})', "%d %B, %Y"),
        (r'(\d{1,2})(st|nd|rd|th)?\s+([A-Za-z]+)\s*?\s*(\d{4})', "%d %B, %Y"),
        (r'(\d{4}).(\d{2}).(\d{2})', "%Y-%m-%d"),
        (r'(\d{2}).(\d{2}).(\d{4})', "%d.%m.%Y"),
        (r'(\d{1,2})/(\d{1,2})/(\d{2})', '%d/%m/%Y'),                
        (r'(\d{1,2})(st|nd|rd|th)?\s+([A-Za-z]+),?\s+including\s+(\d{4})', "%d %B %Y"),        
        (r'(\d{1,2})(st|nd|rd|th)?\s+of\s+([A-Za-z]+)\s+every\s+year,\s+including\s+(\d{4})', "%d %B %Y")
    ]

    for pattern, date_format in patterns:
        match = re.search(pattern, text)
        if match:
            if len(match.groups()) == 4:
                day = match.group(1).zfill(2)
                month_str = match.group(3)
                year = match.group(4)
                year = "20" + year if int(year) < 100 else year
                try:
                    month = datetime.strptime(month_str, "%B").strftime("%m")
                except ValueError:
                    try:
                        month = datetime.strptime(month_str, "%b").strftime("%m")
                    except ValueError:
                        month = "00"  
                        
            elif date_format == "%d/%m/%Y":                
                day, month, year = match.groups()
                year = "20" + year if int(year) < 100 else year
                if int(month) > 12:
                    day, month = month, day  
                    
            elif date_format == "%Y-%m-%d":                
                year, month, day = match.groups()
                year = "20" + year if int(year) < 100 else year
                
            elif date_format == "%d.%m.%Y":                
                day, month, year = match.groups()
                year = "20" + year if int(year) < 100 else year
                
            else:
                day = match.group(2).zfill(2) if match.group(2) else match.group(1).zfill(2)
                month_str = match.group(3) if 'of' in pattern else match.group(1)
                year = match.group(4) if 'of' in pattern else match.group(3)
                year = "20" + year if int(year) < 100 else year
                
                try:
                    month = datetime.strptime(month_str, "%B").strftime("%m")
                except ValueError:
                    try:
                        month = datetime.strptime(month_str, "%b").strftime("%m")
                    except ValueError:
                        month = "00"              

            formatted_date = f"{day.zfill(2)}-{month.zfill(2)}-{year}"
            return formatted_date
    return None 

In [34]:
df['Parsed Date'] = df['Input'].apply(Format)

df['Match'] = df['Parsed Date'] == df['Expected Output']

In [35]:
accuracy = df['Match'].mean() * 100
print(f"Accuracy: {accuracy:.2f}%")

df

Accuracy: 100.00%


Unnamed: 0,Input,Expected Output,Parsed Date,Match
0,"The event will take place on March 5, 2023.",05-03-2023,05-03-2023,True
1,Her birthday is on 07/08/1990.,07-08-1990,07-08-1990,True
2,The deadline is 2022-12-31.,31-12-2022,31-12-2022,True
3,We met on 1st of January 2000.,01-01-2000,01-01-2000,True
4,"The concert is scheduled for 15th September, 2...",15-09-2021,15-09-2021,True
...,...,...,...,...
94,"We celebrate Independence Day on 2023-07-04, a...",04-07-2023,04-07-2023,True
95,The final date for submission is 30th November...,30-11-2022,30-11-2022,True
96,"The annual conference is on 15th October 2023,...",15-10-2023,15-10-2023,True
97,"His birthdate, noted as 1990-05-20, is in the ...",20-05-1990,20-05-1990,True
