# Cybersecurity news
Interactive exploratory notebook for trend analysis and marketing-ready visuals.


In [None]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
articles = pd.read_csv('cybersecurity-news-en-title-3000.csv')
weekly_path = 'weekly_counts.csv'
peaks_path = 'weekly_peak_headlines.csv'
weekly = pd.read_csv(weekly_path) if os.path.exists(weekly_path) else pd.DataFrame()
peaks = pd.read_csv(peaks_path) if os.path.exists(peaks_path) else pd.DataFrame()
articles.head()


In [None]:
articles['pub_date'] = pd.to_datetime(articles['pub_date'], errors='coerce', utc=True)
articles = articles.dropna(subset=['pub_date']).copy()
articles['pub_date'] = articles['pub_date'].dt.tz_convert('UTC').dt.tz_localize(None)
articles['week'] = articles['pub_date'].dt.to_period('W').dt.start_time
articles.shape


In [None]:
keyword_columns = ["mentions_ransomware", "mentions_ddos", "mentions_data_breach", "mentions_vulnerability", "mentions_hacking", "mentions_cyber_attack"]
if not weekly.empty and keyword_columns:
    weekly['week'] = pd.to_datetime(weekly['week'])
    long_df = weekly.melt(
        id_vars=['week'],
        value_vars=keyword_columns,
        var_name='mention_flag',
        value_name='count'
    )
    long_df['count'] = pd.to_numeric(long_df['count'], errors='coerce').fillna(0).astype(int)

    long_df['week_total'] = long_df.groupby('week')['count'].transform('sum')
    long_df['share_pct'] = np.where(long_df['week_total'] > 0, (long_df['count'] / long_df['week_total']) * 100, 0.0)
    long_df = long_df.sort_values(['mention_flag', 'week']).reset_index(drop=True)
    long_df['wow_pct'] = (
        long_df.groupby('mention_flag')['count']
        .pct_change()
        .replace([np.inf, -np.inf], np.nan)
        .fillna(0.0) * 100
    )

    weekly_rep_parts = []
    for flag in keyword_columns:
        if flag not in articles.columns:
            continue
        rep = (
            articles[articles[flag].astype(str).str.lower().isin(['true', '1'])]
            .sort_values('pub_date')
            .groupby('week', as_index=False)
            .first()[['week', 'title', 'source_title']]
        )
        rep['mention_flag'] = flag
        weekly_rep_parts.append(rep)

    if weekly_rep_parts:
        weekly_rep = pd.concat(weekly_rep_parts, ignore_index=True)
        long_df = long_df.merge(weekly_rep, on=['week', 'mention_flag'], how='left')
    else:
        long_df['title'] = ''
        long_df['source_title'] = ''

    long_df['title'] = long_df['title'].fillna('')
    long_df['source_title'] = long_df['source_title'].fillna('')
    long_df['headline_short'] = np.where(
        long_df['title'].str.len() > 110,
        long_df['title'].str.slice(0, 107) + '...',
        long_df['title']
    )

    fig = px.line(
        long_df,
        x='week',
        y='count',
        color='mention_flag',
        markers=True,
        custom_data=[
            'share_pct',
            'wow_pct',
            'headline_short',
            'source_title',
        ],
        title='Weekly Mention Counts (Interactive)'
    )

    fig.update_traces(
        hovertemplate=(
            '<b>Mention: %{fullData.name}</b><br>'
            'Week: %{x|%Y-%m-%d}<br>'
            'Count: %{y:,}<br>'
            'Share: %{customdata[0]:.1f}%<br>'
            'Week-over-week change: %{customdata[1]:+.1f}%<br>'
            'Example headline: %{customdata[2]}<br>'
            'Source: %{customdata[3]}'
            '<extra></extra>'
        ),
        line={'width': 2.6},
        marker={'size': 6},
    )

    if not peaks.empty:
        peaks['peak_week'] = pd.to_datetime(peaks['peak_week'])
        peaks['headline_short'] = np.where(
            peaks['representative_headline'].fillna('').str.len() > 110,
            peaks['representative_headline'].fillna('').str.slice(0, 107) + '...',
            peaks['representative_headline'].fillna('')
        )
        custom = peaks[['headline_short', 'source_title']].fillna('').to_numpy()
        fig.add_trace(go.Scatter(
            x=peaks['peak_week'],
            y=peaks['peak_count'],
            mode='markers',
            marker=dict(size=10, symbol='diamond', line=dict(width=1)),
            name='Peak week',
            customdata=custom,
            text=peaks['mention_flag'],
            hovertemplate=(
                '<b>Mention: %{text}</b><br>'
                'Week: %{x|%Y-%m-%d}<br>'
                'Count: %{y}<br>'
                'Example headline: %{customdata[0]}<br>'
                'Source: %{customdata[1]}'
                '<extra></extra>'
            )
        ))

    fig.update_layout(
        template='plotly_white',
        hovermode='closest',
        legend_title_text='',
        xaxis_title='Week',
        yaxis_title='Article count',
        font=dict(size=13),
        margin=dict(l=30, r=30, t=70, b=30),
        hoverlabel=dict(bgcolor='white', font_size=12),
    )
    fig.update_xaxes(showgrid=True, gridcolor='#EFEFEF')
    fig.update_yaxes(showgrid=True, gridcolor='#EFEFEF')
    fig.show()
else:
    print('No weekly data available for interactive mention chart.')


In [None]:
top_sources = (
    articles['source_title']
    .value_counts()
    .head(20)
    .rename_axis('source_title')
    .reset_index(name='articles')
)
fig = px.bar(
    top_sources,
    x='articles',
    y='source_title',
    orientation='h',
    title='Top 20 Sources by Article Count (Interactive)',
    color='articles',
    color_continuous_scale='Blues'
)
fig.update_layout(
    template='plotly_white',
    yaxis={'categoryorder': 'total ascending'},
    coloraxis_showscale=False,
    margin=dict(l=30, r=20, t=60, b=20),
)
fig.show()


In [None]:
if not peaks.empty:
    peaks[['mention_flag', 'peak_week', 'peak_count', 'representative_headline', 'source_title', 'article_link']]
else:
    print('No peak headline rows generated for this dataset.')
