In [1]:
import json

import pandas as pd
from datetime import datetime, timedelta

import trane
import featuretools as ft
import numpy as np



In [2]:
df = pd.read_csv("flight-delays/flight-sampled.csv", dtype={"TAIL_NUMBER": str})
df['DATE'] = df['DATE'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
df = df.sort_values(by=['DATE'])
df.tail()


Unnamed: 0,DATE,DAY_OF_WEEK,AIRLINE,FLIGHT_NUMBER,TAIL_NUMBER,ORIGIN_AIRPORT,DESTINATION_AIRPORT,SCHEDULED_DEPARTURE_HOUR,SCHEDULED_TIME,ELAPSED_TIME,DEPARTURE_DELAY,ARRIVAL_DELAY,CANCELLED,CANCELLATION_REASON,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY
461945,2015-01-31,6,OO,OO5411,N980SW,HYS,DEN,10,74.0,88.0,-1.0,13.0,0,-,0.0,0.0,0.0,0.0,0.0
461946,2015-01-31,6,UA,UA1711,N24211,EWR,IAH,10,256.0,236.0,21.0,1.0,0,-,0.0,0.0,0.0,0.0,0.0
461947,2015-01-31,6,B6,B6298,N629JB,MCO,LGA,10,148.0,162.0,-9.0,5.0,0,-,0.0,0.0,0.0,0.0,0.0
461938,2015-01-31,6,OO,OO6343,N710SK,SNA,SFO,10,85.0,80.0,-6.0,-11.0,0,-,0.0,0.0,0.0,0.0,0.0
469967,2015-01-31,6,UA,UA1104,N73251,ANC,DEN,23,309.0,312.0,-1.0,2.0,0,-,0.0,0.0,0.0,0.0,0.0


In [3]:
# entity_col = "__fake_root_entity__"
# df, meta = trane.overall_prediction_helper(df, meta)
entity_col = "AIRLINE"

# MAP str to int
df_ft = df.copy()
str_col_list = ['AIRLINE', 'FLIGHT_NUMBER', 'TAIL_NUMBER', "ORIGIN_AIRPORT", "DESTINATION_AIRPORT", "CANCELLATION_REASON"]
str_mappers = {}
for str_col in str_col_list:
    str_to_id = {}
    id_to_str = []
    n_entity = 0
    
    for item in set(df_ft[str_col]):
        str_to_id[item] = n_entity
        id_to_str.append(item)
        n_entity += 1

    if str_col == entity_col:
        df[str_col] = df[str_col].apply(lambda x: str_to_id[x])
    df_ft[str_col] = df_ft[str_col].apply(lambda x: str_to_id[x])
    str_mappers[str_col] = (str_to_id, id_to_str)

    
meta = trane.TableMeta(json.loads(open('flight-delays/meta.json').read()))

df_ft.tail()

Unnamed: 0,DATE,DAY_OF_WEEK,AIRLINE,FLIGHT_NUMBER,TAIL_NUMBER,ORIGIN_AIRPORT,DESTINATION_AIRPORT,SCHEDULED_DEPARTURE_HOUR,SCHEDULED_TIME,ELAPSED_TIME,DEPARTURE_DELAY,ARRIVAL_DELAY,CANCELLED,CANCELLATION_REASON,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY
461945,2015-01-31,6,2,6491,6,57,228,10,74.0,88.0,-1.0,13.0,0,1,0.0,0.0,0.0,0.0,0.0
461946,2015-01-31,6,13,882,940,266,206,10,256.0,236.0,21.0,1.0,0,1,0.0,0.0,0.0,0.0,0.0
461947,2015-01-31,6,12,580,2630,291,279,10,148.0,162.0,-9.0,5.0,0,1,0.0,0.0,0.0,0.0,0.0
461938,2015-01-31,6,2,12264,3055,267,252,10,85.0,80.0,-6.0,-11.0,0,1,0.0,0.0,0.0,0.0,0.0
469967,2015-01-31,6,13,6752,2593,103,228,23,309.0,312.0,-1.0,2.0,0,1,0.0,0.0,0.0,0.0,0.0


In [4]:
cutoff_base = datetime.strptime("2015-01-06", "%Y-%m-%d")
cutoff_end = datetime.strptime("2015-01-31", "%Y-%m-%d")
cutoff_strategy = trane.FixWindowCutoffStrategy(entity_col, cutoff_base, cutoff_end, 1)

features = trane.FeaturetoolsWrapper(df_ft, entity_col, 'DATE', 
                                        {'DAY_OF_WEEK': ft.variable_types.Categorical,
                                         'AIRLINE': ft.variable_types.Categorical,
                                         'FLIGHT_NUMBER': ft.variable_types.Categorical,
                                         'TAIL_NUMBER': ft.variable_types.Categorical,
                                         'ORIGIN_AIRPORT': ft.variable_types.Categorical,
                                         'DESTINATION_AIRPORT': ft.variable_types.Categorical,
                                         'CANCELLED': ft.variable_types.Categorical,
                                         'CANCELLATION_REASON': ft.variable_types.Categorical
                                        }, 'flights')
features.compute_features(df_ft, cutoff_strategy, 5)

Built 83 features
Elapsed: 00:00 | Progress:   4%|▍         

  "not set on entity %s" % (dt.id)


Elapsed: 00:04 | Progress: 100%|██████████


In [5]:
problem_generator = trane.PredictionProblemGenerator(
    table_meta=meta, entity_col=entity_col, time_col="DATE")

problems = problem_generator.generate()


Success/Attempt = 0/0Success/Attempt = 1/1Success/Attempt = 1/2Success/Attempt = 1/3Success/Attempt = 1/4Success/Attempt = 1/5Success/Attempt = 1/6Success/Attempt = 1/7Success/Attempt = 2/8Success/Attempt = 3/9Success/Attempt = 4/10Success/Attempt = 5/11Success/Attempt = 6/12Success/Attempt = 6/13Success/Attempt = 6/14Success/Attempt = 7/15Success/Attempt = 8/16Success/Attempt = 9/17Success/Attempt = 10/18Success/Attempt = 11/19Success/Attempt = 11/20Success/Attempt = 12/21Success/Attempt = 13/22Success/Attempt = 14/23Success/Attempt = 15/24Success/Attempt = 16/25Success/Attempt = 16/26Success/Attempt = 16/27Success/Attempt = 16/28Success/Attempt = 16/29Success/Attempt = 16/30Success/Attempt = 17/31Success/Attempt = 18/32Success/Attempt = 18/33Success/Attempt = 18/34Success/Attempt = 18/35Success/Attempt = 18/36Success/Attempt = 18/37Success/Attempt = 18/38Success/Attempt = 19/39Success/Attempt = 20/40Success/Attempt = 21/41Success/Attempt = 2

Success/Attempt = 265/883Success/Attempt = 265/884Success/Attempt = 265/885Success/Attempt = 265/886Success/Attempt = 265/887Success/Attempt = 265/888Success/Attempt = 265/889Success/Attempt = 265/890Success/Attempt = 265/891Success/Attempt = 265/892Success/Attempt = 265/893Success/Attempt = 265/894Success/Attempt = 265/895Success/Attempt = 265/896Success/Attempt = 265/897Success/Attempt = 265/898Success/Attempt = 265/899Success/Attempt = 265/900Success/Attempt = 265/901Success/Attempt = 265/902Success/Attempt = 265/903Success/Attempt = 265/904Success/Attempt = 265/905Success/Attempt = 265/906Success/Attempt = 265/907Success/Attempt = 265/908Success/Attempt = 265/909Success/Attempt = 265/910Success/Attempt = 265/911Success/Attempt = 265/912Success/Attempt = 265/913Success/Attempt = 265/914Success/Attempt = 265/915Success/Attempt = 265/916Success/Attempt = 265/917Success/Attempt = 265/918Success/Attempt = 265/919Success/Attempt = 265/920Success/Att

Success/Attempt = 540/1831Success/Attempt = 540/1832Success/Attempt = 541/1833Success/Attempt = 542/1834Success/Attempt = 543/1835Success/Attempt = 544/1836Success/Attempt = 545/1837Success/Attempt = 545/1838Success/Attempt = 545/1839Success/Attempt = 545/1840Success/Attempt = 545/1841Success/Attempt = 545/1842Success/Attempt = 545/1843Success/Attempt = 545/1844Success/Attempt = 545/1845Success/Attempt = 545/1846Success/Attempt = 545/1847Success/Attempt = 545/1848Success/Attempt = 545/1849Success/Attempt = 545/1850Success/Attempt = 545/1851Success/Attempt = 545/1852Success/Attempt = 545/1853Success/Attempt = 545/1854Success/Attempt = 545/1855Success/Attempt = 545/1856Success/Attempt = 545/1857Success/Attempt = 545/1858Success/Attempt = 545/1859Success/Attempt = 545/1860Success/Attempt = 545/1861Success/Attempt = 545/1862Success/Attempt = 545/1863Success/Attempt = 545/1864Success/Attempt = 545/1865Success/Attempt = 545/1866Success/Attempt = 545/1867

Success/Attempt = 685/2593Success/Attempt = 685/2594Success/Attempt = 685/2595Success/Attempt = 685/2596Success/Attempt = 685/2597Success/Attempt = 685/2598Success/Attempt = 685/2599Success/Attempt = 685/2600Success/Attempt = 685/2601Success/Attempt = 685/2602Success/Attempt = 685/2603Success/Attempt = 685/2604Success/Attempt = 685/2605Success/Attempt = 685/2606Success/Attempt = 685/2607Success/Attempt = 685/2608Success/Attempt = 685/2609Success/Attempt = 685/2610Success/Attempt = 685/2611Success/Attempt = 685/2612Success/Attempt = 685/2613Success/Attempt = 685/2614Success/Attempt = 685/2615Success/Attempt = 685/2616Success/Attempt = 685/2617Success/Attempt = 686/2618Success/Attempt = 687/2619Success/Attempt = 688/2620Success/Attempt = 689/2621Success/Attempt = 690/2622Success/Attempt = 690/2623Success/Attempt = 690/2624Success/Attempt = 691/2625Success/Attempt = 692/2626Success/Attempt = 693/2627Success/Attempt = 694/2628Success/Attempt = 695/2629

Success/Attempt = 915/3382Success/Attempt = 915/3383Success/Attempt = 915/3384Success/Attempt = 915/3385Success/Attempt = 915/3386Success/Attempt = 915/3387Success/Attempt = 915/3388Success/Attempt = 915/3389Success/Attempt = 915/3390Success/Attempt = 915/3391Success/Attempt = 916/3392Success/Attempt = 917/3393Success/Attempt = 918/3394Success/Attempt = 919/3395Success/Attempt = 920/3396Success/Attempt = 920/3397Success/Attempt = 920/3398Success/Attempt = 921/3399Success/Attempt = 922/3400Success/Attempt = 923/3401Success/Attempt = 924/3402Success/Attempt = 925/3403Success/Attempt = 925/3404Success/Attempt = 925/3405Success/Attempt = 925/3406Success/Attempt = 925/3407Success/Attempt = 925/3408Success/Attempt = 925/3409Success/Attempt = 926/3410Success/Attempt = 927/3411Success/Attempt = 928/3412Success/Attempt = 929/3413Success/Attempt = 930/3414Success/Attempt = 930/3415Success/Attempt = 930/3416Success/Attempt = 931/3417Success/Attempt = 932/3418

Success/Attempt = 1175/4326Success/Attempt = 1175/4327Success/Attempt = 1176/4328Success/Attempt = 1177/4329Success/Attempt = 1178/4330Success/Attempt = 1179/4331Success/Attempt = 1180/4332Success/Attempt = 1180/4333Success/Attempt = 1180/4334Success/Attempt = 1181/4335Success/Attempt = 1182/4336Success/Attempt = 1183/4337Success/Attempt = 1184/4338Success/Attempt = 1185/4339Success/Attempt = 1185/4340Success/Attempt = 1185/4341Success/Attempt = 1185/4342Success/Attempt = 1185/4343Success/Attempt = 1185/4344Success/Attempt = 1185/4345Success/Attempt = 1186/4346Success/Attempt = 1187/4347Success/Attempt = 1188/4348Success/Attempt = 1189/4349Success/Attempt = 1190/4350Success/Attempt = 1190/4351Success/Attempt = 1190/4352Success/Attempt = 1191/4353Success/Attempt = 1192/4354Success/Attempt = 1193/4355Success/Attempt = 1194/4356Success/Attempt = 1195/4357Success/Attempt = 1195/4358Success/Attempt = 1195/4359Success/Attempt = 1195/4360Success/Attempt = 1

Success/Attempt = 1345/5136Success/Attempt = 1345/5137Success/Attempt = 1346/5138Success/Attempt = 1347/5139Success/Attempt = 1348/5140Success/Attempt = 1349/5141Success/Attempt = 1350/5142Success/Attempt = 1350/5143Success/Attempt = 1350/5144Success/Attempt = 1351/5145Success/Attempt = 1352/5146Success/Attempt = 1353/5147Success/Attempt = 1354/5148Success/Attempt = 1355/5149Success/Attempt = 1355/5150Success/Attempt = 1355/5151Success/Attempt = 1355/5152Success/Attempt = 1355/5153Success/Attempt = 1355/5154Success/Attempt = 1355/5155Success/Attempt = 1356/5156Success/Attempt = 1357/5157Success/Attempt = 1358/5158Success/Attempt = 1359/5159Success/Attempt = 1360/5160Success/Attempt = 1360/5161Success/Attempt = 1360/5162Success/Attempt = 1361/5163Success/Attempt = 1362/5164Success/Attempt = 1363/5165Success/Attempt = 1364/5166Success/Attempt = 1365/5167Success/Attempt = 1365/5168Success/Attempt = 1365/5169Success/Attempt = 1365/5170Success/Attempt = 1

Success/Attempt = 1561/5932Success/Attempt = 1561/5933Success/Attempt = 1561/5934Success/Attempt = 1561/5935Success/Attempt = 1561/5936Success/Attempt = 1561/5937Success/Attempt = 1561/5938Success/Attempt = 1561/5939Success/Attempt = 1561/5940Success/Attempt = 1561/5941Success/Attempt = 1561/5942Success/Attempt = 1561/5943Success/Attempt = 1561/5944Success/Attempt = 1561/5945Success/Attempt = 1561/5946Success/Attempt = 1561/5947Success/Attempt = 1561/5948Success/Attempt = 1561/5949Success/Attempt = 1561/5950Success/Attempt = 1561/5951Success/Attempt = 1561/5952Success/Attempt = 1561/5953Success/Attempt = 1561/5954Success/Attempt = 1561/5955Success/Attempt = 1561/5956Success/Attempt = 1561/5957Success/Attempt = 1561/5958Success/Attempt = 1561/5959Success/Attempt = 1561/5960Success/Attempt = 1561/5961Success/Attempt = 1561/5962Success/Attempt = 1561/5963Success/Attempt = 1561/5964Success/Attempt = 1561/5965Success/Attempt = 1561/5966Success/Attempt = 1

In [6]:
evaluator = trane.PredictionProblemEvaluator(df, 
                                             entity_col=entity_col, 
                                             cutoff_strategy=cutoff_strategy, 
                                             min_train_set=20,
                                             min_test_set=20,
                                             previous_k_as_feature=2,
                                             latest_k_as_test=8
                                             )


In [8]:
result = trane.multi_process_evaluation(evaluator, problems, features)
# with open("prob_with_acc.json", "w") as f:
#     json.dump(result, f)

100%|██████████| 10/10 [00:09<00:00,  1.07it/s]
