# Iris - Feature Pipeline

Set `BACKFILL=True` if you want to create features from `iris.csv` file containing historical data.

In [1]:
import random
import pandas as pd
import hopsworks

BACKFILL=False

In [2]:
def generate_flower(name, sepal_len_max, sepal_len_min, sepal_width_max, sepal_width_min, 
                    petal_len_max, petal_len_min, petal_width_max, petal_width_min):
    """
    Returns a single iris flower as a single row in a DataFrame
    """
    df = pd.DataFrame({ "sepal_length": [random.uniform(sepal_len_max, sepal_len_min)],
                       "sepal_width": [random.uniform(sepal_width_max, sepal_width_min)],
                       "petal_length": [random.uniform(petal_len_max, petal_len_min)],
                       "petal_width": [random.uniform(petal_width_max, petal_width_min)]
                      })
    df['variety'] = name
    return df


def get_random_iris_flower():
    """
    Returns a DataFrame containing one random iris flower
    """
    virginica_df = generate_flower("Virginica", 8, 5.5, 3.8, 2.2, 7, 4.5, 2.5, 1.4)
    versicolor_df = generate_flower("Versicolor", 7.5, 4.5, 3.5, 2.1, 3.1, 5.5, 1.8, 1.0)
    setosa_df =  generate_flower("Setosa", 6, 4.5, 4.5, 2.3, 1.2, 2, 0.7, 0.3)

    # randomly pick one of these 3 and write it to the featurestore
    pick_random = random.uniform(0,3)
    if pick_random >= 2:
        iris_df = virginica_df
    elif pick_random >= 1:
        iris_df = versicolor_df
    else:
        iris_df = setosa_df

    return iris_df

In [3]:
if BACKFILL == True:
    iris_df = pd.read_csv("https://repo.hops.works/master/hopsworks-tutorials/data/iris.csv")
else:
    iris_df = get_random_iris_flower()
    
iris_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,variety
0,6.782198,3.440803,3.333318,1.009991,Versicolor


## Adding data to hopsworks

In [5]:
project = hopsworks.login()
fs = project.get_feature_store()

Copy your Api Key (first register/login): https://c.app.hopsworks.ai/account/api/generated
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/2266




Connected. Call `.close()` to terminate connection gracefully.


## Write to a feature group

In [6]:
PRIMARY_KEYS = ["sepal_length","sepal_width","petal_length","petal_width"]
VERSION = 1
NAME = "iris"
DESCRIPTION = "Iris flower dataset"
iris_fg = fs.get_or_create_feature_group(name=NAME,
                                         version=VERSION,
                                         primary_key=PRIMARY_KEYS,
                                         description=DESCRIPTION
                                        )
iris_fg.insert(iris_df)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/2266/fs/2211/fg/2805


Uploading Dataframe: 0.00% |          | Rows 0/1 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/2266/jobs/named/iris_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7f3eeb404b80>, None)