## Read Data from CSV and Calculate DQI

**Description**: Read data from a CSV file, identify errors as missing values, and calculate the DQI.

In [1]:
# Write your code from here

### Visualize Basic DQI with Bar Plot

**Description**: Create a bar plot for DQI and errors in a dataset.

In [2]:
# Write your code from here

In [3]:
import pandas as pd
import matplotlib.pyplot as plt

# --- Read Data from CSV and Calculate DQI ---
def calculate_dqi(file_path):
    """
    Reads data from a CSV file, identifies missing values as errors,
    calculates the Data Quality Index (DQI), and returns the DQI
    and the number of errors.

    Args:
        file_path (str): The path to the CSV file.

    Returns:
        tuple: A tuple containing the DQI (float) and the total
               number of errors (int).
    """
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None, None

    total_cells = df.size
    missing_values_count = df.isnull().sum().sum()
    error_count = missing_values_count  # Treating missing values as errors
    valid_cells = total_cells - error_count

    if total_cells > 0:
        dqi = (valid_cells / total_cells) * 100
    else:
        dqi = 0

    return dqi, error_count, df.columns.tolist(), df.isnull().sum().to_dict()

# Example usage: Replace 'your_data.csv' with the actual path to your file
file_path = 'your_data.csv'
dqi, total_errors, columns, errors_per_column = calculate_dqi(file_path)

if dqi is not None:
    print(f"Data Quality Index (DQI): {dqi:.2f}%")
    print(f"Total Errors (Missing Values): {total_errors}")
    print(f"Errors per column: {errors_per_column}")

    # --- Visualize Basic DQI with Bar Plot ---
    def visualize_dqi(dqi, total_errors, errors_per_column):
        """
        Creates a bar plot visualizing the DQI and the number of errors.

        Args:
            dqi (float): The Data Quality Index.
            total_errors (int): The total number of errors.
            errors_per_column (dict): Dictionary of error counts per column.
        """
        labels = ['DQI', 'Total Errors']
        values = [dqi, total_errors]

        plt.figure(figsize=(8, 6))
        plt.bar(labels, values, color=['green', 'red'])
        plt.ylabel('Percentage / Count')
        plt.title('Data Quality Index and Total Errors')
        plt.ylim(0, max(100, total_errors * 1.2))  # Adjust y-axis limit

        plt.show()

        # Visualize errors per column
        plt.figure(figsize=(10, 6))
        plt.bar(errors_per_column.keys(), errors_per_column.values(), color='orange')
        plt.xlabel('Columns')
        plt.ylabel('Number of Missing Values')
        plt.title('Missing Values per Column')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.show()

    visualize_dqi(dqi, total_errors, errors_per_column)

Error: File not found at your_data.csv


ValueError: not enough values to unpack (expected 4, got 2)