In [1]:
import json
import pandas as pd
from bs4 import BeautifulSoup

# Load the JSON file
file_path = "JSON.json"
with open(file_path, "r", encoding="utf-8") as file:
    data = json.load(file)

# Extract the HTML content
html_content = data["data"]["html"]

# Parse the HTML
soup = BeautifulSoup(html_content, "html.parser")

# Extract events
events = []
for event_div in soup.find_all("div", class_="grid-item"):
    title_tag = event_div.find("div", class_="title")
    date_tag = event_div.find("div", class_="xcal-date")
    location_tag = event_div.find("div", class_="xcal-location")
    link_tag = event_div.find("a", class_="grid-link")
    image_tag = event_div.find("img")

    title = title_tag.text.strip() if title_tag else "N/A"
    date = date_tag.text.strip() if date_tag else "N/A"
    location = location_tag.text.strip() if location_tag else "N/A"
    link = "https://helmet.finna.fi" + link_tag["href"] if link_tag else "N/A"
    image_url = image_tag["data-src"] if image_tag else "N/A"

    events.append({
        "Title": title,
        "Date": date,
        "Location": location,
        "Link": link,
        "Image URL": image_url
    })

# Convert to DataFrame
events_df = pd.DataFrame(events)

# Save to CSV
output_csv_path = "./helmet_events.csv"
events_df.to_csv(output_csv_path, index=False)

# Show first few rows
print(events_df.head())

print(f"✅ Events saved to {output_csv_path}")


                                               Title  \
0                     Exhibition: Aino Nuotio: VAPAA   
1         Ville Hiltunen: Passengers 27.1.-16.2.2025   
2  Diana Bazhan: Flowers in the middle of winter ...   
3                   Art exhibition: Sininen sinfonia   
4     Art exhibition:  Pikkuväkeä ja muita persoonia   

                         Date                   Location  \
0  Date 26.1.2025 – 14.2.2025     Location Sello Library   
1  Date 27.1.2025 – 16.2.2025  Location Entresse Library   
2  Date 27.1.2025 – 16.2.2025  Location Entresse Library   
3    Date 3.2.2025 – 1.3.2025   Location Tapiola Library   
4    Date 3.2.2025 – 1.3.2025   Location Tapiola Library   

                                                Link  \
0  https://helmet.finna.fi/FeedContent/LinkedEven...   
1  https://helmet.finna.fi/FeedContent/LinkedEven...   
2  https://helmet.finna.fi/FeedContent/LinkedEven...   
3  https://helmet.finna.fi/FeedContent/LinkedEven...   
4  https://helmet.finn

In [9]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

# Base API URL for fetching events
base_api_url = "https://helmet.finna.fi/AJAX/JSON?method=getLinkedEvents&params%5Bpage_size%5D=30&url=https%3A%2F%2Fapi.hel.fi%2Flinkedevents%2Fv1%2Fevent%2F%3Finclude%3Dlocation%26keyword%3Dhelmet%253A12006%252Cyso%253Ap11617%252Cyso%253Ap16485%252Chelmet%253A11733%252Cyso%253Ap14004%252Cyso%253Ap556%252Chelmet%253A11193%252Cyso%253Ap8113%252Chelmet%253A12005%252Cyso%253Ap4354%252Cyso%253Ap13050%252Chelmet%253A11727%252Cyso%253Ap11406%252C%250A%2B%2B%2B%2B%2B%2B%2B%2Byso%253Ap14004%252Cyso%253Ap1808%252Cyso%253Ap11185%252Cyso%253Ap20421%252Chelmet%253A10778%252Chelmet%253A11699%252Cyso%253Ap2149%252Cyso%253Ap9270%252Chelmet%253A11687%252Cyso%253Ap6062%252Chelmet%253A11777%252Cyso%253Ap14710%252Chelmet%253A10675%252Cyso%253Ap2433%252Chelmet%253A11689%252Cyso%253Ap360%252Cyso%253Ap2787%252C%250A%2B%2B%2B%2B%2B%2B%2B%2Bhelsinki%253Aagjffu7tgq%252Chelsinki%253Aagjffvmzeu%26language%3Den%26page={}&page_size=30"

# Headers (mimic a real browser request)
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
    "X-Requested-With": "XMLHttpRequest"
}

all_events = []
page = 1

while True:
    print(f"Fetching page {page}...")
    response = requests.get(base_api_url.format(page), headers=headers)

    if response.status_code != 200:
        print(f"❌ Failed to fetch page {page}, status code: {response.status_code}")
        break

    data = response.json()

    # Extract the HTML content from JSON
    if "data" in data and "html" in data["data"]:
        html_content = data["data"]["html"]
        soup = BeautifulSoup(html_content, "html.parser")

        # Extract event details
        event_divs = soup.find_all("div", class_="grid-item")
        if not event_divs:
            print("✅ No more events found. Stopping.")
            break

        for event_div in event_divs:
            title_tag = event_div.find("div", class_="title")
            date_tag = event_div.find("div", class_="xcal-date")
            location_tag = event_div.find("div", class_="xcal-location")
            link_tag = event_div.find("a", class_="grid-link")
            image_tag = event_div.find("img")

            title = title_tag.text.strip() if title_tag else "N/A"
            date = date_tag.text.strip() if date_tag else "N/A"
            location = location_tag.text.strip() if location_tag else "N/A"
            link = "https://helmet.finna.fi" + link_tag["href"] if link_tag else "N/A"
            image_url = image_tag["data-src"] if image_tag else "N/A"

            all_events.append({
                "Title": title,
                "Date": date,
                "Location": location,
                "Link": link,
                "Image URL": image_url
            })

        page += 1
        time.sleep(1)  # Prevent rate limiting
    else:
        print("❌ Unexpected response format, stopping.")
        break

# Save all events to CSV
events_df = pd.DataFrame(all_events)
output_csv_path = "helmet_all_events.csv"
events_df.to_csv(output_csv_path, index=False)

print(f"✅ All events saved to {output_csv_path}")


Fetching page 1...
Fetching page 2...
Fetching page 3...
Fetching page 4...
Fetching page 5...
Fetching page 6...
Fetching page 7...
Fetching page 8...
Fetching page 9...
Fetching page 10...
Fetching page 11...
Fetching page 12...
Fetching page 13...
Fetching page 14...
Fetching page 15...
Fetching page 16...
Fetching page 17...
Fetching page 18...
Fetching page 19...
Fetching page 20...
Fetching page 21...
Fetching page 22...
Fetching page 23...
Fetching page 24...
Fetching page 25...
Fetching page 26...
Fetching page 27...
Fetching page 28...
Fetching page 29...
Fetching page 30...
Fetching page 31...
Fetching page 32...
Fetching page 33...
Fetching page 34...
Fetching page 35...
Fetching page 36...
Fetching page 37...
Fetching page 38...
Fetching page 39...
Fetching page 40...
Fetching page 41...
Fetching page 42...
Fetching page 43...
Fetching page 44...
Fetching page 45...
Fetching page 46...
Fetching page 47...
Fetching page 48...
Fetching page 49...
Fetching page 50...
Fetching 

In [10]:
events_df

Unnamed: 0,Title,Date,Location,Link,Image URL
0,Koko perheen after-ski-keidas,Päivämäärä 2.3.2025,Sijainti Stoa,https://helmet.finna.fi/FeedContent/LinkedEven...,/FeedContent/EventImage?query%5Bpage_size%5D=3...
1,Muumi-päivä,Päivämäärä 22.2.2025,Sijainti Keskustakirjasto Oodi,https://helmet.finna.fi/FeedContent/LinkedEven...,/FeedContent/EventImage?query%5Bpage_size%5D=3...
2,Talviloma leikkipuisto Lorussa,Päivämäärä 17.2.2025 – 21.2.2025,Sijainti Keskustakirjasto Oodi,https://helmet.finna.fi/FeedContent/LinkedEven...,/FeedContent/EventImage?query%5Bpage_size%5D=3...
3,Talviloma leikkipuisto Lorussa,Päivämäärä 21.2.2025,Sijainti Keskustakirjasto Oodi,https://helmet.finna.fi/FeedContent/LinkedEven...,/FeedContent/EventImage?query%5Bpage_size%5D=3...
4,Talviloma leikkipuisto Lorussa,Päivämäärä 20.2.2025,Sijainti Keskustakirjasto Oodi,https://helmet.finna.fi/FeedContent/LinkedEven...,/FeedContent/EventImage?query%5Bpage_size%5D=3...
...,...,...,...,...,...
11295,Ville Aalto: Superseder - An Environment,Päivämäärä 19.6.2024,Sijainti MUU Helsinki Nykytaidekeskus,https://helmet.finna.fi/FeedContent/LinkedEven...,/FeedContent/EventImage?query%5Bpage_size%5D=3...
11296,Ville Aalto: Superseder - An Environment,Päivämäärä 18.6.2024,Sijainti MUU Helsinki Nykytaidekeskus,https://helmet.finna.fi/FeedContent/LinkedEven...,/FeedContent/EventImage?query%5Bpage_size%5D=3...
11297,Ville Aalto: Superseder - An Environment,Päivämäärä 16.6.2024,Sijainti MUU Helsinki Nykytaidekeskus,https://helmet.finna.fi/FeedContent/LinkedEven...,/FeedContent/EventImage?query%5Bpage_size%5D=3...
11298,Ville Aalto: Superseder - An Environment,Päivämäärä 15.6.2024,Sijainti MUU Helsinki Nykytaidekeskus,https://helmet.finna.fi/FeedContent/LinkedEven...,/FeedContent/EventImage?query%5Bpage_size%5D=3...
