In [1]:
import json

import pandas as pd
from datetime import datetime, timedelta

import trane
import featuretools as ft
import numpy as np

In [2]:
df = pd.read_csv("data/bike-sampled.csv")
df["date"] = df["date"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
df = df.sort_values(by=["date"])
df.tail()

Unnamed: 0,date,hour,usertype,gender,tripduration,temperature,from_station_id,dpcapacity_start,to_station_id,dpcapacity_end
3304,2017-01-31,10,Subscriber,Female,6.966667,37.9,247,19.0,247,19.0
3305,2017-01-31,10,Subscriber,Male,6.483333,37.9,425,15.0,426,19.0
3306,2017-01-31,10,Subscriber,Female,8.25,37.9,175,19.0,45,15.0
3299,2017-01-31,10,Subscriber,Male,16.266667,37.9,202,15.0,317,23.0
0,2017-01-31,23,Subscriber,Male,3.316667,35.1,230,19.0,131,15.0


In [3]:
entity_col = "__fake_root_entity__"
meta = trane.TableMeta(json.loads(open("data/meta.json").read()))
df, meta = trane.overall_prediction_helper(df, meta)

# MAP str to int
df_ft = df.copy()
str_col_list = ["usertype", "gender"]
str_mappers = {}
for str_col in str_col_list:
    str_to_id = {}
    id_to_str = []
    n_entity = 0

    for item in set(df_ft[str_col]):
        str_to_id[item] = n_entity
        id_to_str.append(item)
        n_entity += 1

    if str_col == entity_col:
        df[str_col] = df[str_col].apply(lambda x: str_to_id[x])
    df_ft[str_col] = df_ft[str_col].apply(lambda x: str_to_id[x])
    str_mappers[str_col] = (str_to_id, id_to_str)


df_ft.tail()

Unnamed: 0,date,hour,usertype,gender,tripduration,temperature,from_station_id,dpcapacity_start,to_station_id,dpcapacity_end,__fake_root_entity__
3304,2017-01-31,10,1,1,6.966667,37.9,247,19.0,247,19.0,0
3305,2017-01-31,10,1,0,6.483333,37.9,425,15.0,426,19.0,0
3306,2017-01-31,10,1,1,8.25,37.9,175,19.0,45,15.0,0
3299,2017-01-31,10,1,0,16.266667,37.9,202,15.0,317,23.0,0
0,2017-01-31,23,1,0,3.316667,35.1,230,19.0,131,15.0,0


In [4]:
from datetime import datetime
import trane

cutoff_base = datetime.strptime("2017-01-06", "%Y-%m-%d")
cutoff_end = datetime.strptime("2017-01-31", "%Y-%m-%d")
cutoff_strategy = trane.CutoffStrategy(entity_col, cutoff_base, cutoff_end, 1)

features = trane.FeaturetoolsWrapper(
    df=df_ft,
    entity_col=entity_col,
    time_col="date",
    logical_types={
        "hour": "Categorical",
        "usertype": "Categorical",
        "gender": "Categorical",
        "from_station_id": "Categorical",
        "to_station_id": "Categorical",
    },
    name="bikes",
)
features.compute_features(df_ft, cutoff_strategy, 5)

Built 43 features


TypeError: Cannot interpret '<trane.core.cutoff_strategy.CutoffStrategy object at 0x14f73e190>' as a data type

In [None]:
problem_generator = trane.PredictionProblemGenerator(
    table_meta=meta, entity_col=entity_col, time_col="date"
)

problems = problem_generator.generate()

In [None]:
evaluator = trane.PredictionProblemEvaluator(
    df,
    entity_col=entity_col,
    cutoff_strategy=cutoff_strategy,
    min_train_set=5,
    min_test_set=5,
    previous_k_as_feature=2,
    latest_k_as_test=8,
)

In [None]:
result = trane.multi_process_evaluation(evaluator, problems, features)
with open("prob_with_acc.json", "w") as f:
    json.dump(result, f)