In [1]:
#imports
from bs4 import BeautifulSoup
import requests

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_Apple_Inc._media_events'

In [3]:
page = requests.get(url)
soup = BeautifulSoup(page.text,'html')

In [4]:
soup.find_all('span', class_='mw-headline')

[<span class="mw-headline" id="1984">1984</span>,
 <span class="mw-headline" id="Macintosh_event_(January_24,_1984)">Macintosh event (January 24, 1984)</span>,
 <span class="mw-headline" id="Macintosh_event_(Boston_Computer_Society)">Macintosh event (Boston Computer Society)</span>,
 <span class="mw-headline" id="Apple_II_Forever_Event_(April_1984)">Apple II Forever Event (April 1984)</span>,
 <span class="mw-headline" id="1997">1997</span>,
 <span class="mw-headline" id="MacWorld_Expo_and_NeXT_technology_announcement">MacWorld Expo and NeXT technology announcement</span>,
 <span class="mw-headline" id="1998">1998</span>,
 <span class="mw-headline" id="Seybold_Seminars_New_York_1998">Seybold Seminars New York 1998</span>,
 <span class="mw-headline" id="May_1998_Event">May 1998 Event</span>,
 <span class="mw-headline" id="1999">1999</span>,
 <span class="mw-headline" id="Macworld_SanFrancisco_(January_5,_1999)">Macworld SanFrancisco (January 5, 1999)</span>,
 <span class="mw-headline" i

In [5]:
import re

# Assuming `soup` is your BeautifulSoup object containing the parsed HTML

events_data = []
current_year = ""

def extract_date(event_name, current_year):
    # Regular expression patterns
    full_date_pattern = r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{1,2},\s\d{4}\b'
    month_year_pattern = r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{4}\b'
    date_range_pattern = r'\b\d{1,2}–\d{1,2}\b'
    
    # Try to extract the full date (e.g., January 24, 1984)
    full_date_match = re.search(full_date_pattern, event_name)
    if full_date_match:
        return full_date_match.group(0)
    
    # Try to extract a month and year (e.g., January 1984), assuming the current year
    month_year_match = re.search(month_year_pattern, event_name)
    if month_year_match:
        # Check for date range within the same month (e.g., 5–9)
        date_range_match = re.search(date_range_pattern, event_name)
        if date_range_match:
            # Extract start and end dates
            start_date, end_date = date_range_match.group(0).split("–")
            return f"{month_year_match.group(0)} from {start_date} to {end_date}"
        return month_year_match.group(0)
    
    # If the event name contains no recognizable date, return the year or null
    return current_year if current_year else "NULL"

for span in soup.find_all('span', class_='mw-headline'):
    if span['id'].isdigit() and len(span['id']) == 4:
        current_year = span['id']
    else:
        event_name = span.text
        if event_name == 'See also' or event_name == 'References':
            continue
        date = extract_date(event_name, current_year)
        print(event_name)
        print(date)
        events_data.append([date, event_name])

Macintosh event (January 24, 1984)
January 24, 1984
Macintosh event (Boston Computer Society)
1984
Apple II Forever Event (April 1984)
April 1984
MacWorld Expo and NeXT technology announcement
1997
Seybold Seminars New York 1998
1998
May 1998 Event
May 1998
Macworld SanFrancisco (January 5, 1999)
January 5, 1999
WWDC (May 1999)
May 1999
Macworld Expo NY '99 (July 21, 1999)
July 21, 1999
Seybold 1999
1999
Apple special event (October 5, 1999)
October 5, 1999
Macworld 2000 (San Francisco)
2000
Macworld Expo, New York (July 19, 2000)
July 19, 2000
Macworld 2001 (San Francisco)
2001
Macworld 2002 San Francisco (January 7, 2002)
January 7, 2002
Macworld 2002 New York (July 17, 2002)
July 17, 2002
Apple Expo 2002, Paris (September 10, 2002)
September 10, 2002
Macworld 2003 San Francisco (January 20, 2003)
January 20, 2003
WWDC 2003 (June 23, 2003)
June 23, 2003
Apple Expo Paris 2003 (September 16, 2003)
September 16, 2003
Macworld 2004
2004
Music Event London (June 15, 2004)
June 15, 2004
WW

In [6]:
print(events_data)

[['January 24, 1984', 'Macintosh event (January 24, 1984)'], ['1984', 'Macintosh event (Boston Computer Society)'], ['April 1984', 'Apple II Forever Event (April 1984)'], ['1997', 'MacWorld Expo and NeXT technology announcement'], ['1998', 'Seybold Seminars New York 1998'], ['May 1998', 'May 1998 Event'], ['January 5, 1999', 'Macworld SanFrancisco (January 5, 1999)'], ['May 1999', 'WWDC (May 1999)'], ['July 21, 1999', "Macworld Expo NY '99 (July 21, 1999)"], ['1999', 'Seybold 1999'], ['October 5, 1999', 'Apple special event (October 5, 1999)'], ['2000', 'Macworld 2000 (San Francisco)'], ['July 19, 2000', 'Macworld Expo, New York (July 19, 2000)'], ['2001', 'Macworld 2001 (San Francisco)'], ['January 7, 2002', 'Macworld 2002 San Francisco (January 7, 2002)'], ['July 17, 2002', 'Macworld 2002 New York (July 17, 2002)'], ['September 10, 2002', 'Apple Expo 2002, Paris (September 10, 2002)'], ['January 20, 2003', 'Macworld 2003 San Francisco (January 20, 2003)'], ['June 23, 2003', 'WWDC 200

In [8]:
import csv
# Write the data to a CSV file
csv_file_path = "apple_events.csv"
with open(csv_file_path, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Date", "Event Name"])  # Update the header to include Date instead of Year
    writer.writerows(events_data)