In [None]:
import pandas as pd
import numpy as np
import re
from datetime import datetime

# Load dataset (replace with your actual file path)
df = pd.read_csv('employees.csv')

# -----------------------------------
# 13. Date Format Standardization
# -----------------------------------
def standardize_date(date_str):
    try:
        parsed_date = pd.to_datetime(date_str, errors='coerce')
        if pd.isnull(parsed_date):
            return 'Invalid'
        return parsed_date.strftime('%Y-%m-%d')
    except Exception:
        return 'Invalid'

# Assuming 'DateOfJoining' is the column with date values
df['DateOfJoining'] = df['DateOfJoining'].apply(standardize_date)
print("Date format standardized:\n", df['DateOfJoining'])

# -----------------------------------
# 14. Numeric Constraints Enforcement
# -----------------------------------
# Example: Age must be greater than 0
df = df[df['Age'] > 0]
print("\nAfter enforcing numeric constraint (Age > 0):\n", df[['Name', 'Age']])

# -----------------------------------
# 15. String Format Checks
# -----------------------------------
# Example: Check valid email format
def is_valid_email(email):
    if pd.isnull(email):
        return False
    return bool(re.match(r"^[\w\.-]+@[\w\.-]+\.\w{2,4}$", str(email)))

df['ValidEmail'] = df['Email'].apply(is_valid_email)
print("\nEmail validity:\n", df[['Email', 'ValidEmail']])


In [None]:
import pandas as pd
import re
from dateutil import parser

# Load dataset (replace with actual file path)
df = pd.read_csv('employees.csv')

# -----------------------------------
# 16. Standardizing Date Formats
# -----------------------------------
def fix_inconsistent_date(date_str):
    try:
        parsed_date = parser.parse(str(date_str), fuzzy=True)
        return parsed_date.strftime('%Y-%m-%d')
    except Exception:
        return 'Invalid'

# Assuming 'JoiningDate' is the column with inconsistent date formats
df['JoiningDate'] = df['JoiningDate'].apply(fix_inconsistent_date)
print("Standardized Date Formats:\n", df['JoiningDate'])

# -----------------------------------
# 17. Pattern Matching for Consistency (Phone Number)
# -----------------------------------
def standardize_phone(phone):
    digits = re.sub(r'\D', '', str(phone))  # Remove all non-digit characters
    if len(digits) == 10:
        return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
    else:
        return 'Invalid'

# Assuming 'Phone' is the column with phone numbers
df['Phone'] = df['Phone'].apply(standardize_phone)
print("\nStandardized Phone Numbers:\n", df['Phone'])

# -----------------------------------
# 18. Handling Mixed Case Text
# -----------------------------------
# Example: Convert 'Department' column to uppercase
df['Department'] = df['Department'].astype(str
