# <span style="font-width:bold; font-size: 3rem; color:#1EB182;">**Hopsworks Feature Store** </span><span style="font-width:bold; font-size: 3rem; color:#333;">- Part 02: Feature Pipeline</span>

## 🗒️ This notebook is divided in 3 sections:
1. Parsing Data.
2. Preparing dataframes.
3. Feature Group Insertion.

## <span style='color:#ff5f27'> 📝 Imports

In [1]:
import pandas as pd
from datetime import datetime
import time 
import os 

from functions import *

## <span style='color:#ff5f27'> 🧑🏻‍🏫 Dataset Preparation

#### <span style='color:#ff5f27'> 🚖 Rides Data

In [2]:
df_rides = generate_rides_data(150)

df_rides

Unnamed: 0,ride_id,pickup_datetime,pickup_longitude,dropoff_longitude,pickup_latitude,dropoff_latitude,passenger_count,taxi_id,driver_id
0,02aced448db3214511ae6b5cded4be2c,1594448000000,-73.92737,-74.04951,41.43796,40.50445,3,186,105
1,a1069aa4c5ba217241fa4617ec384213,1601264800000,-73.55189,-74.20641,40.57860,41.35486,2,125,30
2,105982e4208fc5a3c740e0a224e6d46e,1582430900000,-74.43926,-73.67484,41.75341,41.73447,4,164,88
3,5860a22ed7c4960bd18c8e5ac89118ef,1588611800000,-72.88311,-73.06721,41.11562,41.69151,2,68,92
4,7ecbcceb5b3818b7d7e69d6b857dfc83,1580524800000,-73.00463,-73.76347,40.76049,41.07748,1,18,123
...,...,...,...,...,...,...,...,...,...
145,916c8e241f6ed6442b27241e684f5c84,1583816100000,-74.22544,-72.87204,41.47547,40.65869,4,177,43
146,dd960f6e15e7781f69e488df1b714100,1607897500000,-73.53736,-73.10414,41.35457,41.01956,1,103,151
147,99429b2952a14eb22e3af100ca08f2b8,1592493400000,-73.24330,-73.93402,40.95460,40.74437,1,73,175
148,b8ea20f84c81d50a86c73ec3d8d1ab4b,1586235500000,-73.65874,-74.10058,41.46944,40.52795,1,47,195


In [3]:
df_rides = calculate_distance_features(df_rides)

In [None]:
df_rides = calculate_datetime_features(df_rides)

In [None]:
# lets save our newly-generated ride_ids.
# we will retrieve them and use in fares_fg
ride_ids = df_rides.ride_id

In [None]:
for col in ["passenger_count", "taxi_id", "driver_id"]:
    df_rides[col] = df_rides[col].astype("int64")


#### <span style='color:#ff5f27'> 💸 Fares Data

In [None]:
df_fares = generate_fares_data(150)

df_fares

In [None]:
df_fares = df_fares.astype("int64")

In [None]:
# lets load our ride_ids which were created moments ago for rides_fg
df_fares["ride_id"] = ride_ids

In [None]:
for col in ["tolls", "total_fare"]:
    df_fares[col] = df_fares[col].astype("double")

## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

## <span style="color:#ff5f27;">🪄 Retrieving Feature Groups</span>

In [None]:
rides_fg = fs.get_or_create_feature_group(name="rides_fg",
                                          version=1)   

fares_fg = fs.get_or_create_feature_group(name="fares_fg",
                                          version=1)   

## <span style="color:#ff5f27;">🧬 Inserting into Feature Groups</span>

In [None]:
rides_fg.insert(df_rides)

In [None]:
fares_fg.insert(df_fares)

---

## <span style="color:#ff5f27;">⏭️ **Next:** Part 03 </span>

In the next notebook, we will create a feature view and training dataset.