## <span style='color:#ff5f27'> 📝 Imports

In [1]:
import requests
from datetime import datetime
from bs4 import BeautifulSoup
import pandas as pd
import re

## <span style='color:#ff5f27'> 🧑🏻‍🏫 Functions

In [2]:
def parse_data(parser,tag: str, class_: str):
    new_data = []
    data = parser.find_all(tag,class_)
    for el in data:
        new_data.append(el.text)
    
    return new_data


def parse_page(page_name):
    response = requests.get(f'https://www.elbruk.se/timpriser-{page_name}')
    content = response.content
    
    parser = BeautifulSoup(content, 'html.parser')
    
    data = parse_data(parser,'div','info-box-content')
    electricity_prices = [float(re.findall(r'\d+\,\d+',info.split('\n')[2])[0].replace(',','.')) for info in data[:4]]
    
    return electricity_prices


def timestamp_2_time(x):
    dt_obj = datetime.strptime(str(x), '%Y-%m-%d %H:%M')
    dt_obj = dt_obj.timestamp() * 1000
    return int(dt_obj)

## <span style='color:#ff5f27'> 🕵🏻‍♂️ Parsing Electricity Data

In [3]:
page_names = ['se1-lulea','se2-sundsvall','se3-stockholm','se4-malmo']

data = [[datetime.now().strftime("%Y-%m-%d %H:%M"),page_name.split('-')[1].capitalize(),*parse_page(page_name)] for page_name in page_names]

data

[['2022-09-05 21:03', 'Lulea', 202.32, 91.28, 57.18, 544.49],
 ['2022-09-05 21:03', 'Sundsvall', 202.32, 91.28, 57.18, 544.49],
 ['2022-09-05 21:03', 'Stockholm', 398.0, 544.84, 159.07, 626.7],
 ['2022-09-05 21:03', 'Malmo', 398.0, 544.84, 159.07, 626.7]]

## <span style='color:#ff5f27'> 👩🏻‍🔬 DataFrame Creation/Update

In [4]:
columns = [
    'date',
    'city',
    'price_day',
    'price_current',
    'price_min',
    'price_max'
]

In [5]:
dataframe = pd.DataFrame(
    data = data,
    columns = columns
)
dataframe.date = dataframe.date.apply(timestamp_2_time)

dataframe.head()

Unnamed: 0,date,city,price_day,price_current,price_min,price_max
0,1662400980000,Lulea,202.32,91.28,57.18,544.49
1,1662400980000,Sundsvall,202.32,91.28,57.18,544.49
2,1662400980000,Stockholm,398.0,544.84,159.07,626.7
3,1662400980000,Malmo,398.0,544.84,159.07,626.7


## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [6]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/167
Connected. Call `.close()` to terminate connection gracefully.


## <span style="color:#ff5f27;">🪄 👩🏻‍🔬 Retrieving or Creating Feature Group</span>

In [7]:
def retrieve_feature_group(name='electricity_prices',fs=fs):
    feature_group = fs.get_feature_group(
        name=name,
        version=1
    )
    return feature_group

def create_feature_group(data,name='electricity_prices',fs=fs):
    
    feature_group = fs.get_or_create_feature_group(
        name=name,
        description = 'Characteristics of each day',
        version = 1,
        primary_key = ['index'],
        online_enabled = True,
        event_time = ['date']
    )
        
    feature_group.insert(data.reset_index())
    
    return feature_group

In [8]:
try:
    feature_group = retrieve_feature_group()
    feature_group.insert(dataframe.reset_index())
    
except:
    feature_group = create_feature_group(dataframe)

Uploading Dataframe: 0.00% |          | Rows 0/4 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/167/jobs/named/electricity_prices_1_offline_fg_backfill/executions


---