**Task 1**: Checking Null Values for Completeness

**Description**: Verify if there are any null values in a dataset, which indicate incomplete data.

In [None]:
# Write your code from here
def check_null_values(dataset):
    null_count = 0
    total_values = 0
    for record in dataset:
        for key, value in record.items():
            total_values += 1
            if value is None:
                null_count += 1
    has_nulls = null_count > 0
    null_percentage = (null_count / total_values) * 100 if total_values else 0
    return has_nulls, null_count, null_percentage


**Task 2**: Checking Data Type Validity

**Description**: Ensure that columns contain data of expected types, e.g., ages are integers.

In [None]:
# Write your code from here
def check_data_type_validity(dataset, expected_types):
    invalid_counts = {field: 0 for field in expected_types}
    for record in dataset:
        for field, expected_type in expected_types.items():
            value = record.get(field)
            if value is not None and not isinstance(value, expected_type):
                invalid_counts[field] += 1
    return invalid_counts


**Task 3**: Verify Uniqueness of Identifiers

**Description**: Check if a dataset has unique identifiers (e.g., emails).

In [None]:
# Write your code from here
def verify_uniqueness(dataset, key):
    seen = set()
    duplicates = []
    for record in dataset:
        val = record.get(key)
        if val in seen:
            duplicates.append(record)
        else:
            seen.add(val)
    return len(duplicates) == 0


Task 4: Validate Email Format Using Regex

Description: Validate if email addresses in a dataset have the correct format.

In [None]:
# Write your code from here
import re

def validate_email_format(emails):
    pattern = re.compile(r'^[\w\.-]+@[\w\.-]+\.\w+$')
    return [email for email in emails if pattern.match(email)]


Task 5: Check for Logical Age Validity

Description: Ensure ages are within a reasonable human range (e.g., 0-120).

In [None]:
# Write your code from here
def check_age_validity(ages):
    return [age for age in ages if isinstance(age, int) and 0 <= age <= 120]


Task 6: Identify and Handle Missing Data

Description: Identify missing values in a dataset and impute them using a simple strategy (e.g., mean).

In [None]:
# Write your code from here
import numpy as np

def impute_missing_with_mean(data):
    arr = np.array(data, dtype=float)
    mean_val = np.nanmean(arr)
    arr[np.isnan(arr)] = mean_val
    return arr.tolist()


Task 7: Detect Duplicates

Description: Detect duplicate rows in the dataset.

In [None]:
# Write your code from here
def detect_duplicates(data):
    seen = set()
    duplicates = []
    for row in data:
        tup = tuple(row.items()) if isinstance(row, dict) else tuple(row)
        if tup in seen:
            duplicates.append(row)
        else:
            seen.add(tup)
    return duplicates


Task 8: Validate Correctness of Numerical Values

Description: Ensure numerical columns are within a specified range.

In [None]:
# Write your code from here
def validate_numerical_range(data, column, min_value, max_value):
    invalid_rows = []
    for row in data:
        value = row.get(column)
        if value is None:
            continue
        try:
            num = float(value)
            if num < min_value or num > max_value:
                invalid_rows.append(row)
        except (ValueError, TypeError):
            invalid_rows.append(row)
    return invalid_rows


Task 9: Custom Completeness Rule Violation Report

Description: Create a report showing which rows violate specific completeness rules, such as mandatory fields being empty.

In [None]:
# Write your code from here
def completeness_violation_report(data, mandatory_fields):
    violations = []
    for idx, row in enumerate(data):
        for field in mandatory_fields:
            if field not in row or row[field] is None or row[field] == "":
                violations.append({'row_index': idx, 'field': field, 'value': row.get(field)})
    return violations


Task 10: Advanced Regex for Data Validity Check

Description: Check for validity with advanced regex patterns, such as validating complex fields with multi-level rules.

In [None]:
# Write your code from here
import re

def validate_advanced_regex(data, field, pattern):
    regex = re.compile(pattern)
    invalid_records = []
    for idx, record in enumerate(data):
        value = record.get(field, "")
        if not regex.fullmatch(value):
            invalid_records.append({'index': idx, 'field': field, 'value': value})
    return invalid_records
