# Wikipedia Current Events ‚Äî Data Pull
Fetches the monthly curated event lists from Wikipedia's Current Events Portal
and saves them as structured CSV data for downstream analysis.

## 1. Setup

In [1]:
from _notebook_setup import *

hit_api = True
save_output = True

# Date range ‚Äî adjust as needed
START_YEAR  = 2024
START_MONTH = 1
END_YEAR    = datetime.now().year
END_MONTH   = datetime.now().month - 1 or 12  # last completed month

‚úÖ Notebook setup complete!
‚úÖ Available APIs: wikipedia
‚úÖ Available libraries: pd, np, plt, sns, px, go, datetime
‚úÖ Helper functions: save_data(), load_data(), save_plotly_figure()
üìÅ Project directory: /Users/annebode/dev/selfevidence.github.io/projects/news_tracker
üìÅ Output directory:  /Users/annebode/dev/selfevidence.github.io/projects/news_tracker/output
üìä Ready for analysis!


## 2. Fetch Data

In [None]:
if hit_api:
    df_events = wikipedia.get_months(
        start_year=START_YEAR,
        start_month=START_MONTH,
        end_year=END_YEAR,
        end_month=END_MONTH,
    )
    if save_output:
        save_data(df=df_events, filename='00_wikipedia_events.csv')
else:
    df_events = load_data(filename='00_wikipedia_events.csv')
    df_events['date'] = pd.to_datetime(df_events['date'])

df_events['category'] = df_events['category'].str.lower()
print(f"\n{len(df_events)} total events across {df_events['date'].dt.to_period('M').nunique()} months")
df_events.head(10)

Unnamed: 0,date,year,month,category,sub_topic,description,wiki_links,sources
0,2024-01-01,2024,1,armed conflicts and attacks,Israel‚ÄìHamas war,"The IDF withdraws five brigades , consisting o...",Israel Defense Force|Brigade|Israeli Ground Fo...,Al Jazeera
1,2024-01-01,2024,1,armed conflicts and attacks,Israel‚ÄìHamas war,At least 27 rockets are fired by Hamas shortly...,Hamas|Central District (Israel)|Southern Distr...,Times of Israel |Ynet
2,2024-01-01,2024,1,armed conflicts and attacks,Nagorno-Karabakh conflict,The Republic of Artsakh officially ceases to e...,Republic of Artsakh|President of Artsakh|Samve...,Interfax
3,2024-01-01,2024,1,armed conflicts and attacks,Ambush,Six people are killed during an ambush in the ...,Ambush|Abyei|Sudan|South Sudan,Al Jazeera
4,2024-01-01,2024,1,arts and culture,Public Domain Day,"The animated short film Steamboat Willie , the...",Steamboat Willie|German language|All Quiet on ...,Mashable
5,2024-01-01,2024,1,arts and culture,Abu Dhabi,"The Sheikh Zayed Festival at Abu Dhabi , Unite...",Abu Dhabi|United Arab Emirates|Guinness World ...,Gulf News
6,2024-01-01,2024,1,disasters and accidents,2024 Noto earthquake,A magnitude 7.6 earthquake strikes the Noto Pe...,Richter scale|List of earthquakes in Japan|Not...,Reuters
7,2024-01-01,2024,1,disasters and accidents,Graz,One person is killed and 21 others are injured...,Graz|Austria|New Year's Eve,AP
8,2024-01-01,2024,1,international relations,Foreign relations of Ethiopia,Ethiopia announces an agreement with Somalilan...,Ethiopia|Somaliland|Berbera|Somaliland Declara...,Reuters
9,2024-01-01,2024,1,international relations,Egypt,"Egypt , Ethiopia , Iran , Saudi Arabia , and t...",Egypt|Ethiopia|Iran|Saudi Arabia|United Arab E...,Tehran Times


## 3. Explore

In [3]:
# Events per month
df_events.groupby(df_events['date'].dt.to_period('M')).size().rename('event_count')

date
2024-01    314
2024-02    263
2024-03    306
2024-04    324
2024-05    503
2024-06    486
2024-07    551
2024-08    578
2024-09    477
2024-10    409
2024-11    464
2024-12    615
2025-01    572
2025-02    439
2025-03    458
2025-04    411
2025-05    432
2025-06    384
2025-07    370
2025-08    372
2025-09    595
2025-10    495
2025-11    495
2025-12    403
2026-01    436
Freq: M, Name: event_count, dtype: int64

In [11]:
# Events per category (overall)
df_events['category'].value_counts()

category
armed conflicts and attacks    3524
law and crime                  1953
disasters and accidents        1909
politics and elections         1444
international relations         911
business and economy            414
sports                          370
science and technology          225
health and environment          209
arts and culture                187
attacks and armed conflicts       3
entertainment                     1
disaster and accidents            1
disasters and incidents           1
Name: count, dtype: int64

In [12]:
df_events_clean = df_events.copy(deep=True)
df_events_clean['category'] = df_events_clean['category'].map(
    {
        'attacks and armed conflicts': 'armed conflicts and attacks',
        'entertainment': 'arts and culture',
        'disaster and accidents': 'disasters and accidents',
        'disasters and incidents': 'disasters and accidents',
    }
).fillna(df_events_clean['category'])
df_events_clean['category'].value_counts()

category
armed conflicts and attacks    3527
law and crime                  1953
disasters and accidents        1911
politics and elections         1444
international relations         911
business and economy            414
sports                          370
science and technology          225
health and environment          209
arts and culture                188
Name: count, dtype: int64

In [None]:
# Browse a specific month
sample_month = '2024-06'
df_events_clean[df_events_clean['date'].dt.to_period('M').astype(str) == sample_month][['date','category','description']]

Unnamed: 0,date,category,description
1710,2024-06-01,Armed conflicts and attacks,Five people and a Hezbollah militant killed an...
1711,2024-06-01,Armed conflicts and attacks,The Sudanese Armed Forces bomb a hospital in K...
1712,2024-06-01,Armed conflicts and attacks,Around eleven civilians are killed and 42 othe...
1713,2024-06-01,Armed conflicts and attacks,Sudan ‚Äôs Ambassador to Russia confirms willing...
1714,2024-06-01,Armed conflicts and attacks,Russia launches missile and drone strikes acro...
...,...,...,...
2191,2024-06-30,Politics and elections,French citizens vote in the first round of leg...
2192,2024-06-30,Politics and elections,Uruguayans vote to elect the presidential cand...
2193,2024-06-30,Politics and elections,Thousands of Haredi Jewish men protest in Jeru...
2194,2024-06-30,Politics and elections,The Bulgarian Orthodox Church elects Metropoli...


In [14]:
df_events_clean[df_events_clean['date'] == '2026-01-15']

Unnamed: 0,date,year,month,category,sub_topic,description,wiki_links,sources
10905,2026-01-15,2026,1,armed conflicts and attacks,Operation Southern Spear,The United States Coast Guard boards and seize...,United States|United States Coast Guard|Guyana...,The Guardian
10906,2026-01-15,2026,1,business and economy,Frigidaire,"Frigidaire issues a recall for 330,000 mini-fr...",Frigidaire|Product recall|Refrigerator,AP
10907,2026-01-15,2026,1,disasters and accidents,2026 Utrecht explosions,At least four people are injured and several b...,Utrecht|Netherlands|Gas explosion,NOS in Dutch |AFP via Al Arabiya|BBC News
10908,2026-01-15,2026,1,disasters and accidents,South Africa,At least 19 people are killed and hundreds are...,South Africa|Provinces of South Africa|Limpopo...,AP
10909,2026-01-15,2026,1,disasters and accidents,Crane (machine),Two people are killed when a construction cran...,Crane (machine)|Rama II Road|Samut Sakhon|Thai...,BBC News|Reuters
10910,2026-01-15,2026,1,international relations,Greenland crisis,French president Emmanuel Macron announces the...,France|President of France|Emmanuel Macron|Fre...,France 24 in French
10911,2026-01-15,2026,1,international relations,Japan‚ÄìPhilippines relations,Japan and the Philippines sign a defense pact ...,Japan|Philippines|Defense pact|Ammunition,AP
10912,2026-01-15,2026,1,international relations,United Arab Emirates‚ÄìYemen relations,Faraj Al-Bahsani is dismissed from Yemen 's Pr...,Faraj Al-Bahsani|Yemen|Presidential Leadership...,The New Arab
10913,2026-01-15,2026,1,politics and elections,2026 Ugandan general election,Ugandans vote to elect their president and 529...,Ugandans|President of Uganda|Parliament of Uganda,AP
10914,2026-01-15,2026,1,politics and elections,Singapore,Singaporean prime minister Lawrence Wong resci...,Singapore|Prime Minister of Singapore|Lawrence...,CNA


In [17]:
df_events_clean['sub_topic'].value_counts().head(20)

sub_topic
Russian invasion of Ukraine                   588
Gaza war                                      400
Syrian civil war                              238
Israel‚ÄìHezbollah conflict (2023‚Äìpresent)      196
Israel‚ÄìHamas war                              165
Sudanese civil war (2023‚Äìpresent)             110
Middle Eastern crisis (2023‚Äìpresent)          103
Red Sea crisis                                 93
Israeli‚ÄìPalestinian conflict                   84
Somali Civil War (2009‚Äìpresent)                84
2024 Iran‚ÄìIsrael conflict                      81
Kivu conflict                                  77
Insurgency in Khyber Pakhtunkhwa               75
Middle Eastern crisis (2023-present)           60
Russo-Ukrainian war (2022‚Äìpresent)             58
2024 Summer Olympics                           55
Mexican drug war                               52
2024 United States presidential election       47
Tariffs in the second Trump administration     47
Mass shooting         

## 4. Top 25 Topics ‚Äî Last 3 Months

Two approaches compared side by side:
- **Option A ‚Äî Plotly `go.Table`**: consistent with existing chart stack, same iframe embed. No column sorting.
- **Option B ‚Äî DataTables.js**: sortable columns, live search, pagination. Generated as a self-contained HTML file ‚Äî embeds identically to Plotly charts. jQuery is already loaded by the minimal-mistakes theme so it costs nothing extra.


In [None]:
# ‚îÄ‚îÄ Colour palette (shared by both approaches) ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
CATEGORY_COLORS = {
    'armed conflicts and attacks': '#dc2626',
    'politics and elections':      '#2563eb',
    'law and crime':               '#d97706',
    'disasters and accidents':     '#7c3aed',
    'international relations':     '#0891b2',
    'business and economy':        '#16a34a',
    'science and technology':      '#0d9488',
    'health and environment':      '#65a30d',
    'arts and culture':            '#db2777',
    'sports':                      '#ea580c',
}

# ‚îÄ‚îÄ Last 3 months ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
latest_date = df_events_clean['date'].max()
cutoff      = latest_date - pd.DateOffset(months=3)

top25 = (
    df_events_clean[
        (df_events_clean['date'] > cutoff) &
        (df_events_clean['sub_topic'].str.strip() != '')
    ]
    .groupby('sub_topic')
    .agg(
        event_count = ('description', 'count'),
        category    = ('category',    lambda x: x.mode()[0]),
        first_seen  = ('date', 'min'),
        last_seen   = ('date', 'max'),
    )
    .sort_values('event_count', ascending=False)
    .head(25)
    .reset_index()
)

top25.insert(0, 'rank', range(1, len(top25) + 1))
top25['period'] = (
    top25['first_seen'].dt.strftime('%b %-d') + ' ‚Äì ' +
    top25['last_seen'].dt.strftime('%b %-d')
)
top25['color'] = top25['category'].map(CATEGORY_COLORS).fillna('#6b7280')

top25[['rank', 'sub_topic', 'category', 'event_count', 'period']]

### Option A ‚Äî Plotly `go.Table`

In [None]:
row_fills = ['#f8fafc' if i % 2 == 0 else 'white' for i in range(len(top25))]

fig = go.Figure(data=[go.Table(
    columnwidth=[30, 260, 200, 65, 130],
    header=dict(
        values=['<b>#</b>', '<b>Topic</b>', '<b>Category</b>', '<b>Events</b>', '<b>Period</b>'],
        fill_color='#1e293b',
        font=dict(color='white', size=13),
        align=['center', 'left', 'left', 'center', 'center'],
        height=44,
        line_color='#334155',
    ),
    cells=dict(
        values=[
            top25['rank'],
            top25['sub_topic'],
            top25['category'].str.title(),
            top25['event_count'],
            top25['period'],
        ],
        fill_color=[row_fills],
        font=dict(
            size=12,
            color=[
                ['#94a3b8']  * len(top25),     # rank ‚Äî muted
                ['#0f172a']  * len(top25),     # topic ‚Äî dark
                top25['color'].tolist(),        # category ‚Äî coloured
                ['#0f172a']  * len(top25),     # count ‚Äî dark
                ['#94a3b8']  * len(top25),     # period ‚Äî muted
            ],
        ),
        align=['center', 'left', 'left', 'center', 'center'],
        height=36,
        line_color='#e2e8f0',
    ),
)])

fig.update_layout(
    title=dict(
        text=(
            f'<b>Top 25 Most Covered Topics</b>'
            f'<span style="font-size:13px; color:#64748b">'
            f'  ¬∑  last 3 months  ¬∑  Wikipedia Current Events</span>'
        ),
        x=0.02, xanchor='left', font=dict(size=18),
    ),
    margin=dict(l=16, r=16, t=56, b=16),
    height=len(top25) * 36 + 100,
    paper_bgcolor='white',
)

save_plotly_figure(fig, filename='00_top25_topics_plotly', for_blog=True)
go.FigureWidget(fig)

### Option B ‚Äî DataTables.js

In [None]:
def save_datatable(df, filename, title, columns, page_length=25, for_blog=True):
    """
    Generate a self-contained DataTables.js HTML file from a DataFrame.

    columns: list of dicts, each with:
        key        ‚Äî DataFrame column name
        label      ‚Äî header label
        align      ‚Äî 'left' | 'center' | 'right'  (default 'left')
        badge_col  ‚Äî optional: column name holding a hex colour for pill badges
    """
    def _cell(row, col):
        val = str(row[col['key']])
        align = col.get('align', 'left')
        if 'badge_col' in col:
            c = row[col['badge_col']]
            val = (
                f'<span style="background:{c}18;color:{c};padding:2px 9px;'
                f'border-radius:10px;font-size:11px;font-weight:600;'
                f'white-space:nowrap;letter-spacing:.3px">{val}</span>'
            )
        return f'<td style="text-align:{align};vertical-align:middle">{val}</td>'

    rows_html = ''.join(
        '<tr>' + ''.join(_cell(row, col) for col in columns) + '</tr>'
        for _, row in df.iterrows()
    )
    headers_html = ''.join(f'<th>{c["label"]}</th>' for c in columns)

    html = f"""<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width,initial-scale=1">
  <link rel="stylesheet" href="https://cdn.datatables.net/1.13.7/css/jquery.dataTables.min.css">
  <style>
    *, *::before, *::after {{ box-sizing: border-box; }}
    body {{
      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
      margin: 0; padding: 16px 20px 20px;
      background: #fff; color: #0f172a; font-size: 13px;
    }}
    h3 {{ margin: 0 0 14px; font-size: 15px; font-weight: 700; color: #1e293b; }}
    table.dataTable {{ border-collapse: collapse !important; width: 100% !important; }}
    table.dataTable thead th {{
      background: #1e293b; color: #fff;
      font-size: 12px; font-weight: 600; letter-spacing: .4px; text-transform: uppercase;
      border: none !important; padding: 12px 14px !important;
    }}
    table.dataTable thead th.sorting::after,
    table.dataTable thead th.sorting_asc::after,
    table.dataTable thead th.sorting_desc::after {{ opacity: .7; }}
    table.dataTable tbody tr:nth-child(even) {{ background: #f8fafc; }}
    table.dataTable tbody tr:hover {{ background: #eff6ff !important; transition: background .1s; }}
    table.dataTable tbody td {{
      padding: 9px 14px !important; border-bottom: 1px solid #e2e8f0 !important;
    }}
    .dataTables_wrapper {{  }}
    .dataTables_wrapper .dataTables_filter label,
    .dataTables_wrapper .dataTables_length label {{ font-size: 12px; color: #475569; }}
    .dataTables_wrapper .dataTables_filter input {{
      border: 1px solid #cbd5e1; border-radius: 6px;
      padding: 5px 10px; font-size: 12px; outline: none; margin-left: 6px;
    }}
    .dataTables_wrapper .dataTables_filter input:focus {{
      border-color: #3b82f6; box-shadow: 0 0 0 3px #dbeafe;
    }}
    .dataTables_wrapper .dataTables_length select {{
      border: 1px solid #cbd5e1; border-radius: 6px;
      padding: 4px 8px; font-size: 12px; margin: 0 4px;
    }}
    .dataTables_wrapper .dataTables_info {{ font-size: 12px; color: #64748b; padding-top: 10px; }}
    .dataTables_wrapper .dataTables_paginate {{ padding-top: 8px; }}
    .dataTables_wrapper .dataTables_paginate .paginate_button {{
      font-size: 12px; border-radius: 4px !important; border: none !important; padding: 4px 8px !important;
    }}
    .dataTables_wrapper .dataTables_paginate .paginate_button.current {{
      background: #2563eb !important; color: #fff !important;
    }}
    .dataTables_wrapper .dataTables_paginate .paginate_button:not(.current):hover {{
      background: #eff6ff !important; color: #1d4ed8 !important;
    }}
  </style>
</head>
<body>
  <h3>{title}</h3>
  <table id="dt" class="display" style="width:100%">
    <thead><tr>{headers_html}</tr></thead>
    <tbody>{rows_html}</tbody>
  </table>
  <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
  <script src="https://cdn.datatables.net/1.13.7/js/jquery.dataTables.min.js"></script>
  <script>
    $(function() {{
      $('#dt').DataTable({{
        pageLength: {page_length},
        order: [[3, 'desc']],
        language: {{ search: '', searchPlaceholder: 'Search topics‚Ä¶' }},
      }});
    }});
  </script>
</body>
</html>"""

    out_path = FIGURES_DIR / f"{filename}.html"
    out_path.write_text(html, encoding='utf-8')
    print(f"üíæ DataTable saved: {out_path}")

    if for_blog:
        blog_path = DOCS_CHARTS_DIR / f"{filename}.html"
        blog_path.write_text(html, encoding='utf-8')
        print(f"üìù Blog version saved: {blog_path}")
        print(f'‚ú® Embed with:\n<iframe src="{{{{ site.baseurl }}}}/assets/charts/news_tracker/{filename}.html"'
              f' width="100%" height="680" frameborder="0"></iframe>')

    return out_path


save_datatable(
    df=top25,
    filename='00_top25_topics_datatable',
    title='Top 25 Most Covered Topics ¬∑ last 3 months ¬∑ Wikipedia Current Events',
    columns=[
        dict(key='rank',        label='#',        align='center'),
        dict(key='sub_topic',   label='Topic',    align='left'),
        dict(key='category',    label='Category', align='left',   badge_col='color'),
        dict(key='event_count', label='Events',   align='center'),
        dict(key='period',      label='Period',   align='center'),
    ],
    page_length=25,
    for_blog=True,
)