# <span style="font-width:bold; font-size: 3rem; color:#1EB182;"><img src="images/icon102.png" width="38px"></img> **Hopsworks Feature Store** </span><span style="font-width:bold; font-size: 3rem; color:#333;">- Part 03: Training Data & Feature views</span>

<span style="font-width:bold; font-size: 1.4rem;">This is the third part of advanced tutorials about Hopsworks Feature Store. This notebook explains how to read from a feature group and create training dataset within the feature store</span>

## 🗒️ In this notebook you will see how to create a training dataset from the feature groups: 

1. Retrieving Feature Groups.
2. Defining Transformation functions.
4. Feature View creation.
5. Training Dataset with training, validation and test data.

![part2](images/02_training-dataset.png) 

### <span style="color:#ff5f27;"> 📝 Imports</span>

In [None]:
import pandas as pd

import datetime

import warnings
warnings.filterwarnings('ignore')

## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

---

## <span style="color:#ff5f27;">🪝 Retrieving Feature Groups </span>

In [None]:
btc_price_fg = fs.get_or_create_feature_group(
    name='bitcoin_price_fg',
    version=1
)

btc_price_fg.read().head(3)

In [None]:
tweets_textblob_fg = fs.get_or_create_feature_group(
    name='bitcoin_tweets_textblob_fg',
    version=1
)

tweets_textblob_fg.show(3)

In [None]:
tweets_vader_fg = fs.get_or_create_feature_group(
    name='bitcoin_tweets_vader_fg',
    version=1
)

tweets_vader_fg.show(3)

---

## <span style="color:#ff5f27;"> 🖍 Query Preparation</span>

In [None]:
fg_query = btc_price_fg.select_except(["date","unix"]).join(tweets_textblob_fg.select(["subjectivity","polarity"])).join(tweets_vader_fg.select("compound"))
fg_query.show(5)

--- 

## <span style="color:#ff5f27;"> 🔮 Feature View Creation and Retrieving </span>

In [None]:
# Load the transformation functions.
min_max_scaler = fs.get_transformation_function(name="min_max_scaler")

# Map features to transformation functions.
transformation_functions = {
    'open': min_max_scaler, 
    'high': min_max_scaler, 
    'low': min_max_scaler, 
    'close': min_max_scaler,
    'volume': min_max_scaler, 
    'quote_av': min_max_scaler, 
    'trades': min_max_scaler,
    'tb_base_av': min_max_scaler, 
    'tb_quote_av': min_max_scaler, 
    'mean_7_days': min_max_scaler, 
    'mean_14_days': min_max_scaler,
    'mean_56_days': min_max_scaler, 
    'signal': min_max_scaler, 
    'std_7_days': min_max_scaler, 
    'exp_mean_7_days': min_max_scaler,
    'exp_std_7_days': min_max_scaler, 
    'momentum_7_days': min_max_scaler,
    'rate_of_change_7_days': min_max_scaler,
    'strength_index_7_days': min_max_scaler, 
    'std_14_days': min_max_scaler, 
    'exp_mean_14_days': min_max_scaler,
    'exp_std_14_days': min_max_scaler, 
    'momentum_14_days': min_max_scaler, 
    'rate_of_change_14_days': min_max_scaler,
    'strength_index_14_days': min_max_scaler, 
    'std_56_days': min_max_scaler, 
    'exp_mean_56_days': min_max_scaler,
    'exp_std_56_days': min_max_scaler, 
    'momentum_56_days': min_max_scaler, 
    'rate_of_change_56_days': min_max_scaler,
    'strength_index_56_days': min_max_scaler, 
    'subjectivity': min_max_scaler, 
    'polarity': min_max_scaler, 
    'compound': min_max_scaler,                           
}

In [None]:
feature_view = fs.create_feature_view(
    name='bitcoin_feature_view',
    version=1,
    transformation_functions=transformation_functions,
    query=fg_query
)

---

## <span style="color:#ff5f27;"> 🏋️ Training Dataset Creation</span>
---

### <span style="color:#ff5f27;">🪓 TimeSeriesSplit</span>

In [None]:
from datetime import datetime
date_format = "%Y-%m-%d %H:%M:%S"

In [None]:
# Create training datasets based event time filter
start_time = int(float(datetime.strptime("2021-02-05 10:00:00", date_format).timestamp()) * 1000)
end_time = int(float(datetime.strptime("2022-01-01 23:59:59", date_format).timestamp()) * 1000)


td_train_version, td_job = feature_view.create_training_data(
        start_time = start_time,
        end_time = end_time,    
        description = 'transactions fraud online training dataset jan/feb',
        data_format = "csv",
        coalesce = True,
        write_options = {'wait_for_job': True},
    )

In [None]:
# Create training datasets based event time filter
start_time = int(float(datetime.strptime("2022-01-02 00:00:00", date_format).timestamp()) * 1000)
end_time = int(float(datetime.strptime("2022-04-30 23:59:59", date_format).timestamp()) * 1000)

td_validation_version, td_job = feature_view.create_training_data(
        start_time = start_time,
        end_time = end_time,    
        description = 'transactions fraud online training dataset jan/feb',
        data_format = "csv",
        coalesce = True,
        write_options = {'wait_for_job': True},
    )

In [None]:
# Create training datasets based event time filter
start_time = int(float(datetime.strptime("2022-05-01 00:00:00", date_format).timestamp()) * 1000)
end_time = int(float(datetime.strptime("2022-06-04 23:59:59", date_format).timestamp()) * 1000)

td_test_version, td_job = feature_view.create_training_data(
        start_time = start_time,
        end_time = end_time,    
        description = 'transactions fraud online training dataset jan/feb',
        data_format = "csv",
        coalesce = True,
        write_options = {'wait_for_job': True},
    )

---