## Read Data from CSV and Calculate DQI

**Description**: Read data from a CSV file, identify errors as missing values, and calculate the DQI.

In [1]:
# Write your code from here
import pandas as pd

# Function to calculate the Data Quality Index (DQI)
def calculate_dqi(csv_file_path):
    # Read the CSV file into a DataFrame
    try:
        data = pd.read_csv(csv_file_path)
    except FileNotFoundError:
        return "Error: The specified file was not found."
    
    # Calculate the total number of values
    total_values = data.size  # total number of elements in the DataFrame
    
    # Calculate the number of missing (NaN) values
    missing_values = data.isnull().sum().sum()  # sum of NaN values across the entire DataFrame
    
    # Calculate DQI
    if total_values == 0:
        return "Error: No data available in the file."
    
    dqi = 100 - (missing_values / total_values * 100)
    
    return dqi

# Example usage
csv_file_path = 'data.csv'  # Path to your CSV file
dqi = calculate_dqi(csv_file_path)
print(f"Data Quality Index (DQI): {dqi}%")

Data Quality Index (DQI): Error: The specified file was not found.%


### Visualize Basic DQI with Bar Plot

**Description**: Create a bar plot for DQI and errors in a dataset.

In [2]:
# Write your code from here

import pandas as pd
import matplotlib.pyplot as plt

# Function to calculate the Data Quality Index (DQI)
def calculate_dqi(csv_file_path):
    try:
        # Read the CSV file into a DataFrame
        data = pd.read_csv(csv_file_path)
    except FileNotFoundError:
        return "Error: The specified file was not found."
    
    # Calculate the total number of values
    total_values = data.size  # total number of elements in the DataFrame
    
    # Calculate the number of missing (NaN) values
    missing_values = data.isnull().sum().sum()  # sum of NaN values across the entire DataFrame
    
    # Calculate DQI
    if total_values == 0:
        return "Error: No data available in the file."
    
    dqi = 100 - (missing_values / total_values * 100)
    
    return dqi, missing_values, total_values

# Function to visualize DQI and errors
def visualize_dqi_and_errors(csv_file_path):
    dqi, missing_values, total_values = calculate_dqi(csv_file_path)
    
    if isinstance(dqi, str):  # If an error message was returned
        print(dqi)
        return
    
    # Create the bar plot
    fig, ax = plt.subplots(figsize=(8, 6))
    
    # Data for the plot
    labels = ['Data Quality Index (DQI)', 'Missing Values']
    values = [dqi, missing_values]
    
    ax.bar(labels, values, color=['green', 'red'])
    
    # Adding labels and title
    ax.set_ylabel('Percentage / Count')
    ax.set_title('Data Quality Index and Missing Values')
    
    # Display the DQI percentage on the plot
    ax.text(0, dqi + 5, f'{dqi:.2f}%', ha='center', color='black')
    ax.text(1, missing_values + 5, f'{missing_values}', ha='center', color='black')
    
    plt.show()

# Example usage
csv_file_path = 'data.csv'  # Path to your CSV file
visualize_dqi_and_errors(csv_file_path)

ValueError: too many values to unpack (expected 3)