### Task 1: Measure Data Accuracy using a Trusted Source

**Description**: You have two datasets of product prices: `company_prices.csv` and
`trusted_prices.csv` . Check if the prices in `company_prices.csv` match the prices in
`trusted_prices.csv` . Assume both files have a "product_id" and "price" column.

In [None]:
# Write your code from here
import pandas as pd

def measure_price_accuracy(company_file, trusted_file):
    # Load datasets
    company_df = pd.read_csv(company_file)
    trusted_df = pd.read_csv(trusted_file)

    # Merge on product_id to align prices
    merged_df = pd.merge(company_df, trusted_df, on="product_id", suffixes=('_company', '_trusted'))

    # Compare prices
    total_products = len(merged_df)
    correct_prices = (merged_df['price_company'] == merged_df['price_trusted']).sum()

    # Compute accuracy score as a percentage
    accuracy_score = (correct_prices / total_products) * 100 if total_products > 0 else 0

    return round(accuracy_score, 2)


### Task 2: Detect Incorrect Values

**Description**: In `company_prices.csv` , detect any negative price values which are incorrect values for prices.

In [None]:
# Write your code from here
import pandas as pd

def detect_negative_prices(file_path):
    # Load the dataset
    df = pd.read_csv(file_path)

    # Filter rows where price is negative
    invalid_prices = df[df['price'] < 0]

    return invalid_prices


### Task 3: Check Missing Data Rates

**Description**: Calculate the percentage of missing values in `customer_data.csv` .

In [None]:
# Write your code from here
import pandas as pd

def missing_data_percentage(file_path):
    # Load the dataset
    df = pd.read_csv(file_path)
    
    # Calculate missing values per column as a percentage
    missing_per_column = df.isnull().mean() * 100

    # Calculate overall missing data percentage
    total_missing_percentage = df.isnull().values.sum() / df.size * 100

    return missing_per_column, total_missing_percentage


### Task 4: Handling Partially Available Records

**Description**: In `customer_data.csv` , identify records with missing "email" or "phone number" and decide whether to drop or fill them.

In [None]:
# Write your code from here
import pandas as pd

def handle_partial_records(file_path, strategy="drop"):
    df = pd.read_csv(file_path)

    # Identify records with missing 'email' or 'phone'
    partial_missing = df[df['email'].isnull() | df['phone'].isnull()]

    if strategy == "drop":
        # Drop records where 'email' or 'phone' is missing
        df_cleaned = df.dropna(subset=['email', 'phone'])
    elif strategy == "fill":
        # Fill missing values with placeholders
        df_cleaned = df.copy()
        df_cleaned['email'].fillna('no_email@example.com', inplace=True)
        df_cleaned['phone'].fillna('000-000-0000', inplace=True)
    else:
        raise ValueError("Strategy must be 'drop' or 'fill'.")

    return df_cleaned, partial_missing
