## Build a Data Quality Dashboard

**Description**: Create a simple dashboard that displays data quality metrics using a library like `dash` or `streamlit`.

**Steps:**
1. Install Streamlit: pip install streamlit
2. Create a Python script dashboard.py.
3. Run the dashboard: streamlit run dashboard.py

In [2]:

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

# Function to calculate Data Quality Index (DQI)
def calculate_dqi(data):
    # Check if data is empty
    if data.empty:
        return "Error: No data available in the file.", 0, 0
    
    total_values = data.size  # Total number of elements in the DataFrame
    missing_values = data.isnull().sum().sum()  # Sum of NaN values across the entire DataFrame
    
    # Handle edge case where no data is available
    if total_values == 0:
        return "Error: No data available in the file.", 0, 0
    
    # Calculate Data Quality Index (DQI)
    dqi = 100 - (missing_values / total_values * 100)
    
    return dqi, missing_values, total_values

# Function to validate the uploaded file
def validate_file(uploaded_file):
    if uploaded_file is None:
        return "No file uploaded"
    try:
        # Read the file into a DataFrame
        data = pd.read_csv(uploaded_file)
        
        # Check for the essential columns (you can customize this as per your dataset)
        if 'ColumnName1' not in data.columns or 'ColumnName2' not in data.columns:
            return "Error: Missing required columns in the dataset."
        
        return data  # Return DataFrame if validation is successful
    except Exception as e:
        return f"Error reading the file: {e}"

# Function to visualize DQI and errors using a bar plot
def visualize_dqi_and_errors(dqi, missing_values):
    fig, ax = plt.subplots(figsize=(8, 6))
    
    # Create bar chart
    labels = ['Data Quality Index (DQI)', 'Missing Values']
    values = [dqi, missing_values]
    
    ax.bar(labels, values, color=['green', 'red'])
    ax.set_ylabel('Percentage / Count')
    ax.set_title('Data Quality Index and Missing Values')
    
    # Display DQI value and missing values on the bar plot
    ax.text(0, dqi + 5, f'{dqi:.2f}%', ha='center', color='black')
    ax.text(1, missing_values + 5, f'{missing_values}', ha='center', color='black')
    
    # Show the plot
    st.pyplot(fig)

# Streamlit App Layout
def main():
    st.title('Data Quality Dashboard')
    
    # Sidebar for file upload
    st.sidebar.header('Upload CSV File')
    uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type="csv")
    
    # Validate file
    file_validation_result = validate_file(uploaded_file)
    
    if isinstance(file_validation_result, str):
        # If there's an error, show it
        st.error(file_validation_result)
    else:
        # If file is valid, proceed with analysis
        data = file_validation_result
        
        st.subheader('Uploaded Data:')
        st.write(data)
        
        # Calculate DQI
        dqi, missing_values, total_values = calculate_dqi(data)
        
        if isinstance(dqi, str):  # Error handling for empty file or other issues
            st.error(dqi)
        else:
            st.subheader('Data Quality Metrics:')
            st.write(f"Data Quality Index (DQI): {dqi:.2f}%")
            st.write(f"Missing Values: {missing_values}")
            st.write(f"Total Values: {total_values}")
            
            # Visualize DQI and Missing Values
            visualize_dqi_and_errors(dqi, missing_values)

if __name__ == "__main__":
    main()

