## Build a Data Quality Dashboard

**Description**: Create a simple dashboard that displays data quality metrics using a library like `dash` or `streamlit`.

**Steps:**
1. Install Streamlit: pip install streamlit
2. Create a Python script dashboard.py.
3. Run the dashboard: streamlit run dashboard.py

In [1]:
pip install streamlit
streamlit run dashboard.py

SyntaxError: invalid syntax (2922879143.py, line 1)

In [None]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns  # For potentially better-looking plots

# --- (Reusing the data quality calculation function from the previous step) ---
def calculate_dqi(file_path):
    """
    Reads data from a CSV file, identifies missing values as errors,
    calculates the Data Quality Index (DQI), and returns the DQI
    and the number of errors, along with per-column error details.

    Args:
        file_path (str): The path to the CSV file.

    Returns:
        tuple: A tuple containing the DQI (float), the total
               number of errors (int), a list of column names (list),
               and a dictionary of error counts per column (dict).
    """
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        st.error(f"Error: File not found at {file_path}")
        return None, None, None, None

    total_cells = df.size
    missing_values_count = df.isnull().sum().sum()
    error_count = missing_values_count  # Treating missing values as errors
    valid_cells = total_cells - error_count

    if total_cells > 0:
        dqi = (valid_cells / total_cells) * 100
    else:
        dqi = 0

    return dqi, error_count, df.columns.tolist(), df.isnull().sum().to_dict()

# --- Streamlit Dashboard ---
st.title("Data Quality Dashboard")

# Allow the user to upload a CSV file
uploaded_file = st.file_uploader("Upload your CSV file", type="csv")

if uploaded_file is not None:
    # Read the uploaded CSV file into a pandas DataFrame
    try:
        df = pd.read_csv(uploaded_file)
    except Exception as e:
        st.error(f"Error reading CSV file: {e}")
        df = None

    if df is not None:
        # Calculate DQI and error metrics
        dqi, total_errors, columns, errors_per_column = calculate_dqi(uploaded_file)

        if dqi is not None:
            st.subheader("Overall Data Quality")
            st.metric("Data Quality Index (DQI)", f"{dqi:.2f}%")
            st.metric("Total Errors (Missing Values)", total_errors)

            st.subheader("Errors per Column")

            # Display errors per column in a table
            errors_df = pd.DataFrame(list(errors_per_column.items()), columns=['Column', 'Missing Values'])
            st.dataframe(errors_df)

            # Create a bar chart of errors per column using Matplotlib
            st.subheader("Visualization of Missing Values per Column")
            fig_col_errors, ax_col_errors = plt.subplots()
            sns.barplot(x=list(errors_per_column.keys()), y=list(errors_per_column.values()), ax=ax_col_errors)
            ax_col_errors.set_xlabel("Columns")
            ax_col_errors.set_ylabel("Number of Missing Values")
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            st.pyplot(fig_col_errors)

            # Optional: Display the first few rows of the dataframe
            st.subheader("Sample Data")
            st.dataframe(df.head())

ModuleNotFoundError: No module named 'streamlit'