In [2]:
from robobrowser import RoboBrowser
from bs4 import BeautifulSoup as bs
import pandas as pd
import numpy as np

def get_number(s):
    try:
        return float(s.replace(",",""))
    except ValueError:
        return None

#Open URL and create RoboBrowser Instance
browser = RoboBrowser(parser='html.parser', history=False)
dates = pd.date_range('2016-01-02', periods=366, freq='2D').format('YYYY-MM-dd')[1:] # url parameter is last date shown on website, website shows two dates for one request
print(dates)
results = pd.DataFrame()

for d in dates:
    url='https://www.epexspot.com/en/market-data/intradaycontinuous/intraday-table/' + d + '/DE'
    print(url)
    browser.open(url)  
    soup = bs(str(browser.parsed), 'html.parser') # get html
    dayData = soup.find("table") # select table
    #print(soup.prettify())
    myDates = pd.date_range(pd.to_datetime(d) - pd.to_timedelta(1, 'D'), periods=2, freq="D") # create two shown dates
    tableHeader = dayData.find("tr", class_=None) # get table header row
    columnNames = [p.get_text() for p in tableHeader.findAll("th")][2:11]
    priceRows = dayData.findAll("tr", class_=None)[1:] # get rows with €/Mwh with hourly interval
    hours = ["'00 - 01", "'01 - 02", "'02 - 03", "'03 - 04", "'04 - 05", "'05 - 06", 
             "'06 - 07", "'07 - 08", "'08 - 09", "'09 - 10", "'10 - 11", "'11 - 12", 
             "'12 - 13", "'13 - 14", "'14 - 15", "'15 - 16", "'16 - 17", "'17 - 18", 
             "'18 - 19", "'19 - 20", "'20 - 21", "'21 - 22", "'22 - 23", "'23 - 24"] # table column names; "'" at beginning for opening in excel

    # get prices for both days on website
    prices = np.zeros((2, 24, 9))
    for i in range(24):
        prices[0][i] = [get_number(p.get_text()) for p in priceRows[i].findAll("td")][3:12]
        prices[1][i] = [get_number(p.get_text()) for p in priceRows[i].findAll("td")][13:]
    #print(prices)
    for i in range(2):
        df = pd.DataFrame(prices[i])
        df.columns = columnNames
        df["hours"] = hours
        df = df.set_index("hours", drop=False)
        df["date"] = myDates[i]
        if results.empty:
            results = df
        else:
            results = results.append(df)
        #print(df)
results = results.set_index(["date", "hours"])
results = results[:-24] #cut off 1.1.2018
print(results)
results.to_csv("intraday.csv", sep=";", decimal=",")


['2016-01-02', '2016-01-04', '2016-01-06']
https://www.epexspot.com/en/market-data/intradaycontinuous/intraday-table/2016-01-02/DE
https://www.epexspot.com/en/market-data/intradaycontinuous/intraday-table/2016-01-04/DE
https://www.epexspot.com/en/market-data/intradaycontinuous/intraday-table/2016-01-06/DE
                     Low(€/MWh)  High(€/MWh)  Last(€/MWh)  \
date       hours                                            
2016-01-01 '00 - 01       23.00        38.00        36.00   
           '01 - 02       17.15        30.00        24.99   
           '02 - 03       12.00        35.00        32.85   
           '03 - 04       13.00        32.00        20.00   
           '04 - 05       14.00        34.00        30.00   
           '05 - 06       14.00        35.00        34.00   
           '06 - 07       12.00        34.00        25.11   
           '07 - 08       12.90        37.50        36.00   
           '08 - 09       17.73        41.00        31.90   
           '09 - 10   