## Data Quality Dashboard in Python

**Description**: Create a basic dashboard using a Python library (e.g., Plotly Dash) to visualize data quality metrics for a given dataset.

In [5]:
pip install pandas dash

Note: you may need to restart the kernel to use updated packages.


In [6]:
import pandas as pd
import numpy as np

data = {
    'ID': range(1, 11),
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace', 'Heidi', 'Ivan', 'Judy'],
    'Age': [25, 30, np.nan, 40, 28, 35, np.nan, 22, 31, 27],
    'Income': [50000, 60000, 45000, 70000, np.nan, 55000, 62000, 48000, 68000, 52000],
    'City': ['Bangalore', 'Mumbai', 'Chennai', 'Bangalore', 'Delhi', 'Mumbai', 'Chennai', np.nan, 'Delhi', 'Bangalore'],
    'Score': [0.85, 0.92, 0.78, 0.88, 0.95, 0.80, 0.90, 0.75, np.nan, 0.82]
}
df = pd.DataFrame(data)

In [7]:
from dash import Dash, dcc, html
from dash.dependencies import Input, Output
import plotly.express as px

app = Dash(__name__)

def calculate_data_quality(df):
    """Calculates data quality metrics."""
    total_rows = len(df)
    missing_percentage = (df.isnull().sum() / total_rows) * 100
    data_types = df.dtypes.astype(str)
    unique_counts = df.nunique()
    return missing_percentage, data_types, unique_counts

missing_percentage, data_types, unique_counts = calculate_data_quality(df)

app.layout = html.Div(children=[
    html.H1(children='Data Quality Dashboard'),

    html.Div(children='''
        A basic dashboard to visualize data quality metrics.
    '''),

    # Missing Value Percentage Bar Chart
    dcc.Graph(
        id='missing-value-chart',
        figure=px.bar(
            x=missing_percentage.index,
            y=missing_percentage.values,
            labels={'y': 'Missing Percentage', 'index': 'Column'},
            title='Missing Value Percentage per Column'
        )
    ),

    # Data Type Table
    html.Div([
        html.H3("Data Types per Column"),
        html.Table(
            # Header
            [html.Tr([html.Th(col) for col in ['Column', 'Data Type']])] +
            # Body
            [html.Tr([html.Td(col), html.Td(data_types[col])]) for col in data_types.index]
        )
    ], style={'margin-top': '20px'}),

    # Unique Value Count Bar Chart
    dcc.Graph(
        id='unique-value-chart',
        figure=px.bar(
            x=unique_counts.index,
            y=unique_counts.values,
            labels={'y': 'Number of Unique Values', 'index': 'Column'},
            title='Number of Unique Values per Column'
        )
    ),
])

if __name__ == '__main__':
    app.run_server(debug=True)

ObsoleteAttributeException: app.run_server has been replaced by app.run

In [None]:
python data_quality_dashboard.py