# <span style="color:#ff5f27;">🔮 Creating Feature Groups, Feature View and Training Dataset</span>
---

### <span style="color:#ff5f27;"> 📝 Imports</span>

In [None]:
import pandas as pd

import datetime

import warnings
warnings.filterwarnings('ignore')

---

### <span style="color:#ff5f27;"> 💽 Loading Data</span>

#### <span style="color:#ff5f27;"> ⛳️ Tweets Textblob</span>

In [None]:
tweets_textblob = pd.read_csv(
    'data/tweets_textblob.csv',
    index_col = 0
)

tweets_textblob.head()

In [None]:
tweets_textblob.info()

In [None]:
tweets_textblob.describe()

In [None]:
tweets_textblob.shape

#### <span style="color:#ff5f27;"> ⛳️ Tweets Vader</span>

In [None]:
tweets_vader = pd.read_csv(
    'data/tweets_vader.csv',
    index_col = 0
)

tweets_vader.head()

In [None]:
tweets_vader.info()

In [None]:
tweets_vader.describe()

In [None]:
tweets_vader.shape

#### <span style="color:#ff5f27;"> ⛳️ BTC resampled</span>

In [None]:
btc_df = pd.read_csv(
    'data/btc_processed.csv',
    index_col = 0
)

btc_df.head()

In [None]:
btc_df.info()

--- 

## <span style="color:#ff5f27;"> 🪄 Creating Feature Groups </span>



In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

In [None]:
tweets_textblob_fg = fs.get_or_create_feature_group(
    name = 'tweets_textblob_fg',
    version = 1,
    primary_key = ['unix'],
    online_enabled = True,
    event_time = ['unix']
)

tweets_textblob_fg.insert(tweets_textblob)

In [None]:
tweets_vader_fg = fs.get_or_create_feature_group(
    name = 'tweets_vader_fg',
    version = 1,
    primary_key = ['unix'],
    online_enabled = True,
    event_time = ['unix']
)

tweets_vader_fg.insert(tweets_vader)

In [None]:
btc_price_fg = fs.get_or_create_feature_group(
    name = 'btc_price_fg',
    description = 'Bitcoin price aggregated for days',
    version = 1,
    primary_key = ['index'],
    online_enabled = True,
    event_time = ['unix']
)

btc_price_fg.insert(btc_df.reset_index())

---

## <span style="color:#ff5f27;">🪝 Retrieving Feature Groups </span>

In [None]:
btc_price_fg = fs.get_or_create_feature_group(
    name = 'btc_price_fg',
    version = 1
)

btc_price_fg.read().shape

In [None]:
tweets_textblob_fg = fs.get_or_create_feature_group(
    name = 'tweets_textblob_fg',
    version = 1
)

tweets_textblob_fg.show(5)

In [None]:
tweets_vader_fg = fs.get_or_create_feature_group(
    name = 'tweets_vader_fg',
    version = 1
)

tweets_vader_fg.show(5)

---

## <span style="color:#ff5f27;"> 🖍 Query Preparation</span>

In [None]:
fg_query = btc_price_fg.select_all()\
    .join(
    tweets_textblob_fg.select_all(),
    on = 'date').join(
     tweets_vader_fg.select_all(),
     on = 'date'
    )

fg_query.show(5)

--- 

## <span style="color:#ff5f27;"> 🔮 Feature View Creation and Retrieving </span>

In [None]:
fs.create_feature_view(
    name = 'btc_feature_view',
    version = 1,
    labels = ['close'],
    query = fg_query
)

In [None]:
feature_view = fs.get_feature_view(
    name = 'btc_feature_view',
    version = 1
)

---

## <span style="color:#ff5f27;"> 🏋️ Training Dataset Creation</span>

In [None]:
feature_view.create_training_data(
    description = 'training_dataset',
    data_format = 'csv'
)

In [None]:
feature_view.create_train_test_split(
    test_size = 0.2
)

### <span style="color:#ff5f27;">🪝 Retrieving</span>

In [None]:
X_train, y_train = feature_view.get_training_data(
    training_dataset_version = 1
)

In [None]:
X_train.head()

In [None]:
y_train.head()

In [None]:
X_train.shape

In [None]:
X_train, y_train, X_test, y_test = feature_view.get_train_test_split(
    training_dataset_version = 2
)

In [None]:
X_train.shape

In [None]:
X_test.shape

---