In [20]:
import pandas as pd
import plotly.graph_objects as go



# Load the data
df = pd.read_csv("../data/data-security-incidents-trends-q1-2019-to-q4-2024.csv") 


# Create unique incident identifier
df['Incident_ID'] = df['BI Reference'] + '_' + df['Year'].astype(str) + '_' + df['Quarter']

# Calculate yearly totals and percentages
yearly_data = []
total_all_years = df['Incident_ID'].nunique()

for year in sorted(df['Year'].unique()):
    year_data = df[df['Year'] == year]
    total_incidents = year_data['Incident_ID'].nunique()
    percentage = round((total_incidents/total_all_years * 100), 1)
    yearly_data.append([year, total_incidents, percentage])


# Create table
fig = go.Figure(data=[go.Table(
    header=dict(
        values=['Year', 'Total Incidents', 'Percentage'],
        font=dict(size=14, color='white'),
        fill_color='darkblue',
        align='center'
    ),
    cells=dict(
        values=[
            [year for year, _, _ in yearly_data],
            [total for _, total, _ in yearly_data],
            [f"{pct}%" for _, _, pct in yearly_data]
        ],
        font=dict(size=13),
        align='center',
        format=[None, ',',None],  # Add thousands separator for numbers
        height=30
    )
)])

# Update layout
fig.update_layout(
    title={
        'text': 'Yearly Frequency of Incidents</sub>',
        'y':0.85,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(size=14)
    },
    width=600,
    height=400
)





# Save the figure 
fig.write_html('../figures/yearly_frequency_table.PNG')

fig.show()


In [21]:
# Frequency Distribution of Yearly Incidents
import plotly.graph_objects as go 

# Print the structure of yearly_data to debug
print("Structure of yearly_data:")
print(yearly_data)

# Extract just the year and total values
years = [item[0] for item in yearly_data]
totals = [item[1] for item in yearly_data]
perc = [item[2] for item in yearly_data]
total_sum = sum(totals)

# Create bar graph
fig = go.Figure(data=[
    go.Bar(
        x=years,  # Use simple lists instead of list comprehension
        y=totals,
        text=[f'{p}%' for p in perc],
        textposition='outside',
        marker_color='rgb(49, 130, 189)'
    )
])

# Update layout
fig.update_layout(
    title={
        'text': 'Figure 1: Yearly Distribution of Unique Incidents',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(size=14)
    },
    xaxis_title="Year",
    yaxis_title="Number of Unique Incidents",
    width=800,
    height=500,
    showlegend=False,
    plot_bgcolor='white',
    yaxis=dict(
        gridcolor='lightgrey',
        gridwidth=1
    )
)
import plotly.io as pio
import os

# Make sure the figures directory exists
os.makedirs('../figures', exist_ok=True)

# Configure kaleido
pio.kaleido.scope.chromium_args = ()

# Try saving with explicit settings
pio.write_image(
    fig,
    '../figures/yearly_frequency_table.png',
    format='png',
    engine='kaleido',
    scale=2,
    width=1200,
    height=800
)

Structure of yearly_data:
[[np.int64(2019), 11366, 18.3], [np.int64(2020), 9702, 15.6], [np.int64(2021), 9742, 15.7], [np.int64(2022), 8129, 13.1], [np.int64(2023), 11069, 17.8], [np.int64(2024), 12193, 19.6]]


ValueError: Failed to start Kaleido subprocess. Error stream:

/Users/personal/Library/CloudStorage/GoogleDrive-olga.czerwik@gmail.com/My Drive/uni/Analysis and Reporting/project/.venv/lib/python3.13/site-packages/kaleido/executable/kaleido: line 4: cd: /Users/personal/Library/CloudStorage/GoogleDrive-olga.czerwik@gmail.com/My: No such file or directory
/Users/personal/Library/CloudStorage/GoogleDrive-olga.czerwik@gmail.com/My Drive/uni/Analysis and Reporting/project/.venv/lib/python3.13/site-packages/kaleido/executable/kaleido: line 5: ./bin/kaleido: No such file or directory
