In [1]:
!pip install dash jupyter_dash pandas plotly



In [4]:
from dash import Dash, dcc, html
import pandas as pd
import plotly.express as px
from jupyter_dash import JupyterDash

# Load your breach dataset
df = pd.read_csv("breach_report.csv")

# Convert dates
df['Breach Submission Date'] = pd.to_datetime(df['Breach Submission Date'], errors='coerce')

# Fill missing values for plotting
df['Covered Entity Type'] = df['Covered Entity Type'].fillna('Unknown Entity')
df['Type of Breach'] = df['Type of Breach'].fillna('Unknown Breach')
df['Location of Breached Information'] = df['Location of Breached Information'].fillna('Unknown Location')

# ===== Graph 1: Top 10 states by number of breaches =====
state_counts = df['State'].value_counts().nlargest(10).reset_index()
state_counts.columns = ['State', 'Count']
fig1 = px.bar(state_counts, x='State', y='Count',
              title='Top 10 States by Number of Breaches',
              text='Count')
fig1.update_traces(textposition='outside')

# ===== Graph 2: Type of breach distribution (Top 5) =====
breach_counts = df['Type of Breach'].value_counts().nlargest(5).reset_index()
breach_counts.columns = ['Type of Breach', 'Count']
fig2 = px.pie(breach_counts, names='Type of Breach', values='Count',
              title='Top 5 Breach Types')

# ===== Graph 3: Average Individuals Affected per breach type (Top 10) =====
avg_affected = (
    df.groupby('Type of Breach')['Individuals Affected']
      .mean()
      .nlargest(10)
      .reset_index()
)
fig3 = px.bar(avg_affected, x='Type of Breach', y='Individuals Affected',
              title='Top 10 Breach Types by Avg Individuals Affected',
              text='Individuals Affected')
fig3.update_traces(texttemplate='%{text:.0f}', textposition='outside')

# ===== Graph 4: Breaches by location of breached info (Top 5) =====
loc_counts = df['Location of Breached Information'].value_counts().nlargest(5).reset_index()
loc_counts.columns = ['Location of Breached Information', 'Count']
fig4 = px.bar(loc_counts, x='Location of Breached Information', y='Count',
              title='Top 5 Locations of Breached Information',
              text='Count')
fig4.update_traces(textposition='outside')

# ===== Graph 5: Number of breaches over time =====
breaches_over_time = df.groupby(df['Breach Submission Date'].dt.to_period('M')).size().reset_index(name='Count')
breaches_over_time['Breach Submission Date'] = breaches_over_time['Breach Submission Date'].astype(str)
fig5 = px.line(breaches_over_time, x='Breach Submission Date', y='Count',
               title='Breaches Over Time')

# ===== Graph 6: Treemap =====
fig6 = px.treemap(df,
                  path=['Covered Entity Type', 'Type of Breach'],
                  values='Individuals Affected',
                  title='Treemap: Entity Type and Breach Type')

# ===== Build the Dashboard =====
app = Dash(__name__)
app.layout = html.Div(children=[
    html.H1('Healthcare Data Breach Dashboard'),
    dcc.Graph(figure=fig1),
    dcc.Graph(figure=fig2),
    dcc.Graph(figure=fig3),
    dcc.Graph(figure=fig4),
    dcc.Graph(figure=fig5),
    dcc.Graph(figure=fig6)
])

# ===== Run app in Jupyter Notebook =====
if __name__ == '__main__':
    app.run(mode='inline', port=8051, debug=True)
