## <span style='color:#ff5f27'> 📝 Imports

In [1]:
import pandas as pd

In [2]:
from hops import hdfs

project_path = hdfs.project_path()
project_path

'hdfs://rpc.namenode.service.consul:8020/Projects/Model_Serving/'

## <span style='color:#ff5f27'> 💽 Loading Historical Data</span>

#### <span style='color:#ff5f27'> 🪪 Profiles Data

In [3]:
df_profiles = pd.read_csv(project_path + 'Jupyter/data/profiles.csv')
df_profiles.columns = [col_name.lower() for col_name in df_profiles.columns]
df_profiles.head()

  self.client = HadoopFileSystem(


Unnamed: 0,passengerid,name,sex,age,survived
0,1,"Braund, Mr. Owen Harris",male,22.0,0
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1
2,3,"Heikkinen, Miss. Laina",female,26.0,1
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1
4,5,"Allen, Mr. William Henry",male,35.0,0


In [4]:
df_profiles.drop('name',axis=1,inplace=True)
df_profiles.head()

Unnamed: 0,passengerid,sex,age,survived
0,1,male,22.0,0
1,2,female,38.0,1
2,3,female,26.0,1
3,4,female,35.0,1
4,5,male,35.0,0


In [5]:
df_profiles.age = df_profiles.groupby(['sex','survived'])['age'].transform(lambda x: x.fillna(x.median()))

In [6]:
df_profiles.isna().sum()

passengerid    0
sex            0
age            0
survived       0
dtype: int64

#### <span style='color:#ff5f27'> 🛳 Tickets Data

In [7]:
df_tickets = pd.read_csv(project_path + 'Jupyter/data/tickets.csv')
df_tickets.columns = [col_name.lower() for col_name in df_tickets.columns]
df_tickets.head()

Unnamed: 0,passengerid,pclass,sibsp,parch,ticket,fare,cabin,embarked
0,1,3,1,0,A/5 21171,7.25,,S
1,2,1,1,0,PC 17599,71.2833,C85,C
2,3,3,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,0,113803,53.1,C123,S
4,5,3,0,0,373450,8.05,,S


In [8]:
df_tickets.drop(['ticket','cabin'],axis=1,inplace=True)
df_tickets.embarked = df_tickets.embarked.fillna('S')
df_tickets.head()

Unnamed: 0,passengerid,pclass,sibsp,parch,fare,embarked
0,1,3,1,0,7.25,S
1,2,1,1,0,71.2833,C
2,3,3,0,0,7.925,S
3,4,1,1,0,53.1,S
4,5,3,0,0,8.05,S


In [9]:
df_tickets.isna().sum()

passengerid    0
pclass         0
sibsp          0
parch          0
fare           0
embarked       0
dtype: int64

## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [10]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()           

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://2176a0f0-3503-11ed-be64-b1a4781e5f0a.cloud.hopsworks.ai/p/128
Connected. Call `.close()` to terminate connection gracefully.


## <span style="color:#ff5f27;">🪄 Creating Feature Groups</span>

#### <span style='color:#ff5f27'> 🪪 Profile Data

In [21]:
df_profiles.passengerid = df_profiles.passengerid.astype('str')

In [25]:
df_profiles.dtypes

passengerid     object
sex             object
age            float64
survived         int64
dtype: object

In [23]:
profile_fg = fs.get_or_create_feature_group(
        name='profile_fg',
        description='Personal information of each passenger',
        version=2,
        primary_key='passengerid',
        online_enabled=True
    )    

profile_fg.insert(df_profiles)

Uploading Dataframe: 0.00% |          | Rows 0/891 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://2176a0f0-3503-11ed-be64-b1a4781e5f0a.cloud.hopsworks.ai/p/128/jobs/named/profile_fg_2_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7f094c5d9b50>, None)

In [22]:
profile_fg = fs.get_or_create_feature_group(
        name='profile_fg',
        description='Personal information of each passenger',
        version=2,
        primary_key=['passengerid'],
        online_enabled=True
    )    

profile_fg.insert(df_profiles)

Feature Group created successfully, explore it at 
https://2176a0f0-3503-11ed-be64-b1a4781e5f0a.cloud.hopsworks.ai/p/128/fs/76/fg/79


Uploading Dataframe: 0.00% |          | Rows 0/891 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://2176a0f0-3503-11ed-be64-b1a4781e5f0a.cloud.hopsworks.ai/p/128/jobs/named/profile_fg_2_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7f094d0fd370>, None)

In [11]:
profile_fg = fs.get_or_create_feature_group(
        name='profile_fg',
        description='Personal information of each passenger',
        version=1,
        primary_key=['passengerid'],
        online_enabled=True
    )    

profile_fg.insert(df_profiles)

Feature Group created successfully, explore it at 
https://2176a0f0-3503-11ed-be64-b1a4781e5f0a.cloud.hopsworks.ai/p/128/fs/76/fg/30


Uploading Dataframe: 0.00% |          | Rows 0/891 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://2176a0f0-3503-11ed-be64-b1a4781e5f0a.cloud.hopsworks.ai/p/128/jobs/named/profile_fg_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7f094d105fa0>, None)

#### <span style='color:#ff5f27'> 🛳 Ticket Data

In [12]:
tickets_fg = fs.get_or_create_feature_group(
        name='tickets_fg',
        description='Trip info depending on each passenger',
        version=1,
        primary_key=['passengerid'],
        online_enabled=True
    )    

tickets_fg.insert(df_tickets)

Feature Group created successfully, explore it at 
https://2176a0f0-3503-11ed-be64-b1a4781e5f0a.cloud.hopsworks.ai/p/128/fs/76/fg/31


Uploading Dataframe: 0.00% |          | Rows 0/891 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://2176a0f0-3503-11ed-be64-b1a4781e5f0a.cloud.hopsworks.ai/p/128/jobs/named/tickets_fg_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7f094c58e490>, None)

---