### Part 2

Build a date parser using basic text processing and rules. (No ML models)

- Dataset:

[date_parser_testcases.csv](https://prod-files-secure.s3.us-west-2.amazonaws.com/2ad6026b-7cdc-4780-99a4-6e4e0034cf90/aabbc537-a7c4-478b-ba8a-2afc146a8d23/date_parser_testcases.csv)

- Given a piece of text, extract the day, month and year info and present it in DD/MM/YYYY format.
    - Example: “I went to London on 21st June, 2024” → 21/06/2024
- Use only default python packages and regex (no ML models OR external libraries)


In [1]:
import csv
import re

# Load the dataset
def load_dataset(file_path):
    with open(file_path, mode='r') as file:
        reader = csv.reader(file)
        data = list(reader)
    return data

# Preprocess text
def preprocess_text(text):
    text = text.lower().strip()
    return text

# Extract date components using regex
def extract_date(text):
    # Define regex patterns for date components
    day_pattern = r'\b(?:0?[1-9]|[12][0-9]|3[01])\b'
    month_pattern = r'\b(?:january|february|march|april|may|june|july|august|september|october|november|december|0?[1-9]|1[0-2])\b'
    year_pattern = r'\b\d{4}\b'

    day_match = re.search(day_pattern, text)
    month_match = re.search(month_pattern, text)
    year_match = re.search(year_pattern, text)

    if not (day_match and month_match and year_match):
        return None  # Return None for dates that cannot be parsed

    day = day_match.group()
    month = month_match.group()
    year = year_match.group()

    # Convert month name to month number
    month_names = {
        'january': '01', 'february': '02', 'march': '03', 'april': '04',
        'may': '05', 'june': '06', 'july': '07', 'august': '08', 'september': '09',
        'october': '10', 'november': '11', 'december': '12'
    }

    if month.isdigit():
        month = month.zfill(2)
    else:
        month = month_names.get(month, '01')

    # Ensure day and year are in the correct format
    day = day.zfill(2)

    return f"{day}/{month}/{year}"

# Process the dataset
def process_dates(file_path):
    data = load_dataset(file_path)
    processed_dates = []

    for row in data:
        text = preprocess_text(row[0])  # Assuming text is in the first column
        date_str = extract_date(text)
        if date_str is not None:  # Only include non-null dates
            processed_dates.append(date_str)

    return processed_dates

# Example usage
file_path = '/content/date_parser_testcases.csv'
dates = process_dates(file_path)

for date in dates:
    print(date)


05/03/2023
07/07/1990
12/12/2022
02/02/2022
11/11/1987
03/04/2020
05/05/1997
11/11/2021
08/08/2021
10/10/1995
01/01/2023
03/03/2022
31/08/2020
02/02/2020
12/12/2019
17/03/2022
11/11/2021
07/07/2023
09/09/2021
01/01/2022
10/10/2022
10/10/2018
12/12/2020
31/12/2022
29/02/2024
07/07/2021
03/03/2022
08/08/2020
09/09/2020
01/01/2022
07/07/2023
05/05/1990
05/03/2023
07/07/1990
12/12/2022
02/02/2022
11/11/1987
03/04/2020
11/11/2021
08/08/2021
10/10/1995
01/01/2023
03/03/2022
31/08/2020
02/02/2020
12/12/2019
17/03/2022
11/11/2021
07/07/2023
09/09/2021
01/01/2022
10/10/2022
10/10/2018
12/12/2020
31/12/2022
29/02/2024
07/07/2021
03/03/2022
08/08/2020
09/09/2020
01/01/2022
07/07/2023
05/05/1990
