## Build a Data Quality Dashboard

**Description**: Create a simple dashboard that displays data quality metrics using a library like `dash` or `streamlit`.

**Steps:**
1. Install Streamlit: pip install streamlit
2. Create a Python script dashboard.py.
3. Run the dashboard: streamlit run dashboard.py

In [1]:
# Write your code from here

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Function to calculate the Data Quality Index (DQI)
def calculate_dqi(data):
    total_values = data.size  # Total number of elements in the DataFrame
    missing_values = data.isnull().sum().sum()  # Sum of NaN values across the entire DataFrame
    
    # If the total values are 0, handle the edge case
    if total_values == 0:
        return "Error: No data available in the file."
    
    # Calculate the Data Quality Index (DQI)
    dqi = 100 - (missing_values / total_values * 100)
    
    return dqi, missing_values, total_values

# Function to visualize DQI and errors using a bar plot
def visualize_dqi_and_errors(dqi, missing_values):
    fig, ax = plt.subplots(figsize=(8, 6))
    
    # Create bar chart
    labels = ['Data Quality Index (DQI)', 'Missing Values']
    values = [dqi, missing_values]
    
    ax.bar(labels, values, color=['green', 'red'])
    ax.set_ylabel('Percentage / Count')
    ax.set_title('Data Quality Index and Missing Values')
    
    # Display DQI value and missing values on the bar plot
    ax.text(0, dqi + 5, f'{dqi:.2f}%', ha='center', color='black')
    ax.text(1, missing_values + 5, f'{missing_values}', ha='center', color='black')
    
    # Show the plot
    st.pyplot(fig)

# Streamlit App Layout
st.title('Data Quality Dashboard')

# Sidebar for file upload
st.sidebar.header('Upload CSV File')
uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type="csv")

if uploaded_file is not None:
    try:
        # Read the CSV file into a DataFrame
        data = pd.read_csv(uploaded_file)

        # Show the uploaded data
        st.subheader('Uploaded Data:')
        st.write(data)

        # Calculate DQI
        dqi, missing_values, total_values = calculate_dqi(data)

        # Display data quality metrics
        if isinstance(dqi, str):  # Error handling for empty file or other issues
            st.error(dqi)
        else:
            st.subheader('Data Quality Metrics:')
            st.write(f"Data Quality Index (DQI): {dqi:.2f}%")
            st.write(f"Missing Values: {missing_values}")
            st.write(f"Total Values: {total_values}")

            # Visualize DQI and Missing Values
            visualize_dqi_and_errors(dqi, missing_values)

    except Exception as e:
        st.error(f"Error: {str(e)}")

else:
    st.info('Please upload a CSV file to view data quality metrics.')

2025-05-18 05:48:56.333 
  command:

    streamlit run /home/vscode/.local/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]
