In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
def scrape_representatives(url):
   # Set up headers to mimic a browser request
   headers = {
       'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
   }
   
   try:
       # Fetch the webpage
       response = requests.get(url, headers=headers)
       response.raise_for_status()  # Raise an exception for bad status codes
       
       # Parse the HTML content
       soup = BeautifulSoup(response.text, 'html.parser')
       representatives = []
       
       # Find all table rows
       rows = soup.find_all('tr')
       
       for row in rows:
           # Skip header rows
           if row.find('th'):
               continue
               
           # Get all td elements
           cells = row.find_all('td')
           if cells and len(cells) >= 3:
               name = cells[0].text.strip()
               party = cells[1].text.strip()
               email = cells[2].find('a')['href'].replace('mailto:', '') if cells[2].find('a') else ''
               
               if name and email:
                   representatives.append({
                       'name': name,
                       'party': party,
                       'email': email
                   })
       
       return representatives
       
   except requests.RequestException as e:
       print(f"Error fetching the webpage: {e}")
       return []
   except Exception as e:
       print(f"Error processing the data: {e}")
       return []

In [3]:
url = "https://www.stortinget.no/no/Stottemeny/kontakt/representanter-og-partigrupper/Representantenes-e-postadresser/"  
results = scrape_representatives(url)

In [4]:
no_parl = pd.DataFrame(results)
no_parl.head()

Unnamed: 0,name,party,email
0,"Abusland, Anja Ninasdotter",Senterpartiet,anja.ninasdotter.abusland@stortinget.no
1,"Almeland, Grunde",Venstre,grunde.kreken.almeland@stortinget.no
2,"Amundsen, Per-Willy",Fremskrittspartiet,per-willy.amundsen@stortinget.no
3,"Arnstad, Marit",Senterpartiet,marit.arnstad@stortinget.no
4,"Asheim, Henrik",Høyre,henrik.asheim@stortinget.no


In [5]:
no_parl.to_csv("../data/no_parliament.csv", index = False)