In [1]:
import json
import os
import warnings

os.chdir('..')
warnings.filterwarnings('ignore')

import numpy as np 
import pandas as pd

import trane

In [2]:
os.getcwd()

'/Users/arcarter/code/Trane-Refactor/Trane'

In [4]:
# load a dataframe
df = pd.read_csv('Example/medical_no_show.csv', parse_dates=['appointment_day', 'scheduled_day'])
df = df.head(5000)

# load the table metadata
meta = trane.TableMeta(json.loads(open('Example/meta.json').read()))

# define a cutoff strategy
cutoff_fn = lambda rows, entity_id: np.datetime64('1980-02-25')
cutoff_strategy = trane.CutoffStrategy(generate_fn=cutoff_fn, description='with a fixed cutoff of 1980-02-25')

# define operations
filter_op = trane.ops.LessFilterOp(column_name='age'); filter_op.set_hyper_parameter(65)
row_op = trane.ops.IdentityRowOp(column_name='no_show')
transformation_op = trane.ops.IdentityTransformationOp(column_name='no_show')
aggregation_op = trane.ops.LastAggregationOp(column_name='no_show')

operations = [filter_op, row_op, transformation_op, aggregation_op]

# create the prediction problem
problem = trane.PredictionProblem(
    operations=operations,
    entity_id_col='appointment_id',
    label_col='no_show',
    table_meta=meta,
    cutoff_strategy=cutoff_strategy)

In [4]:
res = problem.execute(df)
res.head(10)

Unnamed: 0_level_0,cutoff,label
appointment_id,Unnamed: 1_level_1,Unnamed: 2_level_1
5030230,1980-02-25,0
5303666,1980-02-25,0
5304747,1980-02-25,0
5322246,1980-02-25,0
5338898,1980-02-25,0
5351190,1980-02-25,0
5351199,1980-02-25,0
5351207,1980-02-25,1
5351216,1980-02-25,1
5351222,1980-02-25,1


In [10]:
# You can also generate questions
sampled_df = df.sample(frac=.2)

problem_generator = trane.PredictionProblemGenerator(
    table_meta=meta, entity_col='appointment_id', label_col='no_show', filter_col='age')

problems = problem_generator.generate(sampled_df)
problems

[<trane.core.prediction_problem.PredictionProblem at 0x1157e8208>,
 <trane.core.prediction_problem.PredictionProblem at 0x1157b44e0>,
 <trane.core.prediction_problem.PredictionProblem at 0x1157b4710>,
 <trane.core.prediction_problem.PredictionProblem at 0x1161c6278>,
 <trane.core.prediction_problem.PredictionProblem at 0x1157e1748>,
 <trane.core.prediction_problem.PredictionProblem at 0x1157a2b70>,
 <trane.core.prediction_problem.PredictionProblem at 0x109a1e2b0>,
 <trane.core.prediction_problem.PredictionProblem at 0x1157b4048>,
 <trane.core.prediction_problem.PredictionProblem at 0x1036455c0>,
 <trane.core.prediction_problem.PredictionProblem at 0x1183bf208>,
 <trane.core.prediction_problem.PredictionProblem at 0x10b2e37f0>,
 <trane.core.prediction_problem.PredictionProblem at 0x115466e10>,
 <trane.core.prediction_problem.PredictionProblem at 0x1157b47b8>,
 <trane.core.prediction_problem.PredictionProblem at 0x10b2e3860>,
 <trane.core.prediction_problem.PredictionProblem at 0x1157a2f

In [12]:
str(problems[0])

'AllFilterOp(age)->IdentityRowOp(no_show)->IdentityTransformationOp(no_show)->FirstAggregationOp(no_show)'