In [5]:
import featureform as ff
import pandas as pd
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq

In [6]:
# Need to be in CSV or Parquet
df = pd.read_pickle('5_8_weekly_loc_data.pkl')
table = pa.Table.from_pandas(df)
pq.write_table(table, '5_8_weekly_loc_data.parquet')

In [8]:
df.head()

Unnamed: 0,created,location_age,location_name,decision_score,day_of_week,created_time,created_date,created:hour,created:day_part,created:day_of_week,loc_app_delta
0,2023-05-08 14:57:07,308,"Aspen Dental - Worcester, MA (Lincoln)",732,Monday,14:57:07,2023-05-08,14,afternoon,0,NaT
1,2023-05-08 17:23:57,467,"Aspen Dental - Pocatello, ID",674,Monday,17:23:57,2023-05-08,17,evening,0,NaT
2,2023-05-08 19:47:53,467,"Aspen Dental - Elk Grove, CA",735,Monday,19:47:53,2023-05-08,19,evening,0,NaT
3,2023-05-09 14:17:09,308,"Aspen Dental - Amsterdam, NY",712,Tuesday,14:17:09,2023-05-09,14,afternoon,1,NaT
4,2023-05-10 11:58:32,489,"Aspen Dental - Hamilton, OH",717,Wednesday,11:58:32,2023-05-10,11,noon,2,NaT


In [7]:
ff.register_user("r4z4").make_default_owner()

local = ff.register_local()

applications = local.register_file(
    name="5_8_weekly_loc_data_applications",
    variant="quickstart",
    description="A dataset of applications for the week of 5/8/23",
    path="5_8_weekly_loc_data.parquet"
)

In [9]:
# define a DF transformation on our dataset.
@local.df_transformation(variant="quickstart",
                         inputs=[("applications", "quickstart")])
def average_location_decision_score(applications):
    """the average application decision score """
    return applications.groupby("location_name")["decision_score"].mean()

In [None]:
# register a passenger entity to associate w/ a feature & label.

In [10]:
@ff.entity
class User:
    avg_decision_scores = ff.Feature(
        average_location_decision_score[["location_name", "decision_score"]],
        variant="quickstart",
        type=ff.Int32,
        inference_store=local,
    )

    fraudulent = ff.Label(
        applications[["location_name", "IsFraud"]],
        variant="quickstart",
        type=ff.Bool,
    )

In [None]:
# join together the feature and label into a training set.

In [11]:
# ff.Labels & ff.Features above
ff.register_training_set(
    "fraud_training", "quickstart",
    label=("fraudulent", "quickstart"),
    features=[("avg_decision_scores", "quickstart")],
)

In [None]:
# Now that our definitions are complete, we can apply it to our Featureform instance.
## featureform apply definitions.py --local