# <span style="font-width:bold; font-size: 3rem; color:#1EB182;"><img src="images/icon102.png" width="38px"></img> **Hopsworks Feature Store** </span><span style="font-width:bold; font-size: 3rem; color:#333;">- Part 01: Backfill Features to the Feature Store</span>


## 🗒️ This notebook is divided in 3 sections:
1. Loading the data 
2. Connect to the Hopsworks feature store.
3. Create feature groups and insert them to the feature store.

![tutorial-flow](images/01_featuregroups.png)

## API keys are stored in .env file in the next format:
`BINANCE_API_KEY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`

`BINANCE_API_SECRET = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"`


`TWITTER_API_KEY = "cccccccccccccccccccccccccccc"`

`TWITTER_API_SECRET = "ddddddddddddddddddddddddddddddddddd"`

### <span style="color:#ff5f27;"> 📝 Imports</span>

In [None]:
!pip install -U unicorn-binance-rest-api --quiet
!pip install -U python-dotenv --quiet

In [None]:
from functions import *

from dotenv import load_dotenv
load_dotenv()

## <span style="color:#ff5f27;"> 💽 Loading Data</span>

#### <span style='color:#ff5f27'> 📈 Bitcoin Data

In [None]:
df_bitcoin = parse_btc_data(number_of_days_ago=2000)

df_bitcoin = df_bitcoin[(df_bitcoin.date >= '2021-02-05 10:00:00') & (df_bitcoin.date <= '2022-06-04 23:00:00')] 
df_bitcoin.reset_index(drop=True,inplace=True)

df_bitcoin.head(3)

In [None]:
df_bitcoin_processed = process_btc_data(df_bitcoin)
df_bitcoin_processed.tail(3)

#### <span style='color:#ff5f27'> 🔮 Tweets Data

In [None]:
df_tweets = pd.read_csv("https://repo.hops.works/dev/davit/bitcoin/bitcoin_tweets.csv")
df_tweets.head(3)

In [None]:
df_tweets.user_verified = df_tweets.user_verified.astype(str)
df_tweets_correct = df_tweets[(df_tweets["user_verified"] == "False") | (df_tweets["user_verified"] == "True")]

df_tweets_incorrect = df_tweets.drop(list(df_tweets_correct.index))
df_tweets_corrected = df_tweets_incorrect.shift(periods=2, axis="columns")
df_tweets_processed = pd.concat([df_tweets_correct, df_tweets_corrected]).sort_values(by=["date"])
df_tweets_processed = df_tweets_processed[df_tweets_processed.date.notna()]

df_tweets_processed = df_tweets_processed.drop(2612133)
df_tweets_processed["source"]= df_tweets_processed["source"].apply(str)
df_tweets_processed["source"]= df_tweets_processed["source"].str.lower()
df_tweets_processed = df_tweets_processed[~df_tweets_processed["source"].str.contains("bot")]
df_tweets_processed = df_tweets_processed.loc[:,["date","text", "user_followers","user_friends", "user_favourites"]]

df_tweets_processed["text"] = df_tweets_processed["text"].apply(str)
df_tweets_processed = df_tweets_processed.sort_values(by='date')
df_tweets_processed.reset_index(inplace=True)
df_tweets_processed.drop(columns=["index"], inplace=True)


for i,s in enumerate(tqdm(df_tweets_processed['text'],position=0, leave=True)):
    text = str(df_tweets_processed.loc[i, 'text'])
    text = text.replace("#", "")
    text = re.sub('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', '', text, flags=re.MULTILINE)
    text = re.sub('@\\w+ *', '', text, flags=re.MULTILINE)
    df_tweets_processed.loc[i, 'text'] = text

In [None]:
df_tweets_processed.head()

In [None]:
tweets_textblob = textblob_processing(df_tweets_processed)
tweets_textblob.head()

In [None]:
tweets_vader = vader_processing(df_tweets_processed)
tweets_vader.head()

---
## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

## <span style="color:#ff5f27;"> 🪄 Creating Feature Groups </span>

#### <span style='color:#ff5f27'> 📈 Bitcoin Feature Group

In [None]:
btc_price_fg = fs.get_or_create_feature_group(
    name='bitcoin_price_fg',
    description='Bitcoin price aggregated for days',
    version=1,
    primary_key=['unix'],
    online_enabled=True,
    event_time=['unix']
)

btc_price_fg.insert(df_bitcoin_processed)

#### <span style='color:#ff5f27'> 🔮 Tweets Feature Groups

In [None]:
tweets_textblob_fg = fs.get_or_create_feature_group(
    name='bitcoin_tweets_textblob_fg',
    version=1,
    primary_key=['unix'],
    online_enabled=True,
    event_time=['unix']
)

tweets_textblob_fg.insert(tweets_textblob)

In [None]:
tweets_vader_fg = fs.get_or_create_feature_group(
    name='bitcoin_tweets_vader_fg',
    version=1,
    primary_key=['unix'],
    online_enabled=True,
    event_time=['unix']
)

tweets_vader_fg.insert(tweets_vader)

---