## Assignment 01

Write a function ``assignment_01`` that reads Wikipedia html pages and extracts the infobox key-value pairs as strings. The infobox is the blue table in the top right of wikipedia pages.


In [4]:
from bs4 import BeautifulSoup
import requests


bht_url = "https://de.wikipedia.org/wiki/Berliner_Hochschule_f%C3%BCr_Technik"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}

def assignment_01(url):
    # YOUR CODE HERE
    # 1. Get the page content
    response = requests.get(url, headers=headers)
    
    # 2. Parse the HTML
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # 3. Find the specific infobox table (Wikipedia always uses class="infobox")
    infobox = soup.find('table', {'class': 'infobox'})
    
    # 4. Extract key-value pairs
    result = {}
    
    # Go through every row in the table
    for row in infobox.find_all('tr'):
        # Attempt to find a header (key) and a data cell (value)
        key_cell = row.find('th')
        value_cell = row.find('td')
        
        # Only save it if both exist in this row
        if key_cell and value_cell:
            key = key_cell.text.strip()
            value = value_cell.text.strip()
            result[key] = value
            
    return result

    # raise NotImplementedError()

In [5]:
infobox = assignment_01(bht_url)
assert infobox["Ort"] == "Berlin-Wedding"

## Assignment 02

Write a function ``assignment_02`` that reads the information about all Christmas markets in Berlin and returns the name of the district that has most registered Christmas markets.

In [8]:
import json
import requests
import pandas as pd


christmas_markets_url = "https://www.berlin.de/sen/web/service/maerkte-feste/weihnachtsmaerkte/index.php/index/all.json?q="

def assignment_02(url):
    # YOUR CODE HERE
     # 1. Get the data from the URL
    response = requests.get(url)
    data = response.json()
    
    # 2. Convert the data to a pandas DataFrame
    # The actual list of markets is stored under the key 'index'
    df = pd.DataFrame(data['index'])
    
    # 3. Find the district with the most markets
    # The column for district is called 'bezirk'
    # .value_counts() counts them, and .idxmax() gives us the name of the highest one
    return df['bezirk'].value_counts().idxmax()
    # raise NotImplementedError()


In [9]:
assert assignment_02(christmas_markets_url) == "Charlottenburg-Wilmersdorf"