In [1]:
import pandas as pd
import numpy as np

from datetime import datetime as dt
from functools import partial, wraps

from plasticc.featurize import process_meta, featurize
from plasticc.training import path_from_cv_score, train_and_validate
from plasticc.final import featurize_test, predict_test

Using TensorFlow backend.


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
fcp = {
    'flux': {
        'longest_strike_above_mean': None,
        'longest_strike_below_mean': None,
        'mean_change': None,
        'mean_abs_change': None,
        'length': None,
    },

    'flux_by_flux_ratio_sq': {
        'longest_strike_above_mean': None,
        'longest_strike_below_mean': None,       
    },

    'flux_passband': {
        'fft_coefficient': [
                {'coeff': 0, 'attr': 'abs'}, 
                {'coeff': 1, 'attr': 'abs'}
            ],
        'kurtosis' : None, 
        'skewness' : None,
    },

    'mjd': {
        'maximum': None, 
        'minimum': None,
        'mean_change': None,
        'mean_abs_change': None,
    },
}

In [4]:
aggs = {
    'flux': ['min', 'max', 'mean', 'median', 'std', 'skew'],
    'flux_err': ['min', 'max', 'mean', 'median', 'std', 'skew'],
    'detected': ['mean'],
    'flux_ratio_sq':['sum', 'skew'],
    'flux_by_flux_ratio_sq':['sum','skew'],
}

### Generate features for model training

In [5]:
%%time
meta_train = process_meta('../data/raw/training_set_metadata.csv')
train = pd.read_csv('../data/raw/training_set.csv')

CPU times: user 2.7 s, sys: 328 ms, total: 3.03 s
Wall time: 1.77 s


In [6]:
%%time
X = featurize(train, meta_train, aggs, fcp, n_jobs=4)

Feature Extraction: 100%|██████████| 20/20 [00:09<00:00,  2.44it/s]
Feature Extraction: 100%|██████████| 20/20 [00:01<00:00, 10.47it/s]
Feature Extraction: 100%|██████████| 20/20 [00:01<00:00, 11.53it/s]
Feature Extraction: 100%|██████████| 20/20 [00:01<00:00, 19.84it/s]


CPU times: user 1min 10s, sys: 3.01 s, total: 1min 13s
Wall time: 32.3 s


In [7]:
if 'target' in X:
    y = X['target']
    del X['target']
else:
    print("What the duck")
    3//0

In [8]:
%%time
train_mean = X.mean(axis=0)
#train_mean.to_hdf('train_data.hdf5', 'data')
# pd.set_option('display.max_rows', 500)
#import pdb; pdb.set_trace()
X.fillna(0, inplace=True)

CPU times: user 144 ms, sys: 0 ns, total: 144 ms
Wall time: 7.16 ms


### Train model with CV

In [18]:
lgbm_params = {
    'device': 'cpu',
    'objective': 'multiclass',
    'num_class': 14,
    'boosting_type': 'gbdt',
    'n_jobs': 16,
    'max_depth': 6,
    'n_estimators': 1024,
    'subsample_freq': 2,
    'subsample_for_bin': 5000,
    'min_data_per_group': 100,
    'max_cat_to_onehot': 4,
    'cat_l2': 1.0,
    'cat_smooth': 59.5,
    'max_cat_threshold': 32,
    'metric_freq': 10,
    'verbosity': -1,
    'metric': 'multi_logloss',
    'xgboost_dart_mode': False,
    'uniform_drop': False,
    'colsample_bytree': 0.5,
    'drop_rate': 0.173,
    'learning_rate': 0.0267,
    'max_drop': 5,
    'min_child_samples': 10,
    'min_child_weight': 100.0,
    'min_split_gain': 0.126,
    'num_leaves': 7,
    'reg_alpha': 0.1,
    'reg_lambda': 0.00023,
    'skip_drop': 0.44,
    'subsample': 0.75,
    'max_bin': 32,
    'min_data_in_leaf': 13,
    'lambda_l1': 2,
}


xgb_params = {
    'objective': 'multiclass',
    'booster': 'gbdtree',
    'n_jobs': 16,
    'max_depth': 7,
    'n_estimators': 1024,
    'verbosity': -1,
    'colsample_bytree': 0.5,
    'learning_rate': 0.0267,
    'min_child_weight': 100.0,
    'reg_alpha': 0.1,
    'reg_lambda': 0.00023,
    'subsample': 0.75
}

In [19]:
colnames_to_ignore = set([
    'object_id',
    'hostgal_specz',
    'ra',
    'decl',
    'gal_l',
    'gal_b',
    'ddf',
    'latlon1',
    'haversine',
])
feature_colnames = [col for col in X.columns if col not in colnames_to_ignore]
id_colname = 'object_id'

In [20]:
len(feature_colnames)

60

In [21]:
%%time
# modeling from CV
clfs, score, importances = train_and_validate(
    X=X, 
    y=y, 
    feature_colnames=feature_colnames, 
    id_colname=id_colname, 
    model='lgbm', 
    model_params=lgbm_params, 
    nr_fold=6, 
    random_state=1
)

Training until validation scores don't improve for 50 rounds.
[100]	training's multi_logloss: 0.340002	training's wloss: 0.8295	valid_1's multi_logloss: 1.72625	valid_1's wloss: 1.02895
[200]	training's multi_logloss: 0.182281	training's wloss: 0.552799	valid_1's multi_logloss: 1.35969	valid_1's wloss: 0.774142
[300]	training's multi_logloss: 0.135836	training's wloss: 0.44372	valid_1's multi_logloss: 1.19622	valid_1's wloss: 0.705004
[400]	training's multi_logloss: 0.112114	training's wloss: 0.380978	valid_1's multi_logloss: 1.10718	valid_1's wloss: 0.681572
[500]	training's multi_logloss: 0.0965806	training's wloss: 0.337432	valid_1's multi_logloss: 1.0476	valid_1's wloss: 0.6709
[600]	training's multi_logloss: 0.0850397	training's wloss: 0.303567	valid_1's multi_logloss: 0.999548	valid_1's wloss: 0.666795
Early stopping, best iteration is:
[612]	training's multi_logloss: 0.0838321	training's wloss: 0.299934	valid_1's multi_logloss: 0.994546	valid_1's wloss: 0.666546
no 1-fold loss: 

In [22]:
importances.sort_values(by='mean_gain', ascending=False).head(15)

Unnamed: 0,feature,gain,fold,mean_gain
54,mjd_diff_det,2485,5,2623.5
54,mjd_diff_det,2308,2,2623.5
54,mjd_diff_det,2845,3,2623.5
54,mjd_diff_det,2656,4,2623.5
54,mjd_diff_det,3017,6,2623.5
54,mjd_diff_det,2430,1,2623.5
57,distmod,2692,6,2182.833333
57,distmod,2050,5,2182.833333
57,distmod,2085,4,2182.833333
57,distmod,2461,3,2182.833333


In [23]:
submission_file_path = path_from_cv_score(score)
submission_file_path

'/home/kk385830/astronomical-classification/submissions/subm_0.633887_2018-12-12-20-10.csv'

### Test set features

In [14]:
chunk_size_one_100 = 453653105 // 100 + 1
# chunk_size_one_100 = 1000001 // 100 + 1
chunk_size_one_100

4536532

In [None]:
%%time
# should take 100x (time after 1st iteration)
featurize_test(
    featurize_configs={'aggs': aggs, 'fcp': fcp}, 
    n_jobs=12,
    meta_path='../data/raw/test_set_metadata.csv',
    test_path='../data/raw/test_set.csv',
    output_path='../data/features/test-all-feat-from-kernel-repro.csv',
    id_colname='object_id',
    chunks=chunk_size_one_100,  # alternatively: 5000000
)

Feature Extraction: 100%|██████████| 60/60 [00:10<00:00,  5.83it/s]
Feature Extraction: 100%|██████████| 60/60 [00:02<00:00, 23.79it/s]
Feature Extraction: 100%|██████████| 60/60 [00:02<00:00, 29.98it/s]
Feature Extraction: 100%|██████████| 60/60 [00:01<00:00, 37.97it/s]


        4536532 done in   1.0 minutes


Feature Extraction: 100%|██████████| 60/60 [00:12<00:00,  4.89it/s]
Feature Extraction: 100%|██████████| 60/60 [00:02<00:00, 27.38it/s]
Feature Extraction: 100%|██████████| 60/60 [00:01<00:00, 32.15it/s]
Feature Extraction: 100%|██████████| 60/60 [00:01<00:00, 38.33it/s]


        9073064 done in   1.9 minutes


Feature Extraction: 100%|██████████| 60/60 [00:21<00:00,  2.74it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.76it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 16.32it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 18.97it/s]


       13609596 done in   3.4 minutes


Feature Extraction: 100%|██████████| 60/60 [00:28<00:00,  2.48it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.41it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.92it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.12it/s]


       18146128 done in   5.4 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.38it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.12it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.71it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.09it/s]


       22682660 done in   7.3 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.17it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.30it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.34it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.16it/s]


       27219192 done in   9.3 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.69it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.41it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.81it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.16it/s]


       31755724 done in  11.2 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.23it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 15.03it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.47it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.69it/s]


       36292256 done in  13.1 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.33it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.39it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.95it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.10it/s]


       40828788 done in  15.0 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.24it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.05it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.51it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.10it/s]


       45365320 done in  16.9 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.70it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.27it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.95it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.35it/s]


       49901852 done in  18.8 minutes


Feature Extraction: 100%|██████████| 60/60 [00:28<00:00,  2.46it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.72it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.60it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 10.89it/s]


       54438384 done in  20.7 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.71it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 14.13it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.31it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.96it/s]


       58974916 done in  22.6 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.18it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.67it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.85it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.82it/s]


       63511448 done in  24.5 minutes


Feature Extraction: 100%|██████████| 60/60 [00:28<00:00,  2.36it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 13.39it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.46it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.12it/s]


       68047980 done in  26.4 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.47it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 12.71it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.77it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.20it/s]


       72584512 done in  28.3 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.90it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.57it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.17it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.68it/s]


       77121044 done in  30.1 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.34it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.21it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.83it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.58it/s]


       81657576 done in  32.0 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.47it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 13.16it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.95it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 16.41it/s]


       86194108 done in  33.9 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  3.14it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.27it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.64it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.39it/s]


       90730640 done in  35.8 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.62it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.46it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.86it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.05it/s]


       95267172 done in  37.7 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.19it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.93it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.33it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.90it/s]


       99803704 done in  39.6 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.30it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 12.85it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.22it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.55it/s]


      104340236 done in  41.5 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  3.18it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.48it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.15it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.84it/s]


      108876768 done in  43.3 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.43it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.94it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.67it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.73it/s]


      113413300 done in  45.2 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.25it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.94it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.81it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.18it/s]


      117949832 done in  47.1 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.22it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.52it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.48it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.39it/s]


      122486364 done in  49.0 minutes


Feature Extraction: 100%|██████████| 60/60 [00:28<00:00,  2.59it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 14.65it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.42it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.35it/s]


      127022896 done in  50.9 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.58it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.17it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.34it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.31it/s]


      131559428 done in  52.7 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.86it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.24it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.54it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.50it/s]


      136095960 done in  54.5 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.92it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 12.00it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.83it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.33it/s]


      140632492 done in  56.3 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.24it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.24it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 15.57it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 16.84it/s]


      145169024 done in  58.1 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  3.05it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.46it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.64it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.65it/s]


      149705556 done in  59.9 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.45it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 12.16it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.84it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 16.09it/s]


      154242088 done in  61.7 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.28it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.63it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.05it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.59it/s]


      158778620 done in  63.6 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.87it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.62it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.02it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.99it/s]


      163315152 done in  65.4 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.23it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.06it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00,  9.86it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.77it/s]


      167851684 done in  67.2 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.39it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 12.08it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.25it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.79it/s]


      172388216 done in  69.1 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.07it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 10.09it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.38it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.45it/s]


      176924748 done in  70.9 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  3.00it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.36it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.58it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.92it/s]


      181461280 done in  72.7 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.76it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.02it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.31it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.61it/s]


      185997812 done in  74.5 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.74it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 10.24it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.11it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 11.11it/s]


      190534344 done in  76.3 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.50it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 10.21it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.68it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.95it/s]


      195070876 done in  78.1 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.06it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.98it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.56it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.54it/s]


      199607408 done in  79.9 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.59it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.49it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.60it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 16.57it/s]


      204143940 done in  81.8 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.71it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.45it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.81it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.56it/s]


      208680472 done in  83.6 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.75it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00,  7.91it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.29it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.91it/s]


      213217004 done in  85.4 minutes


Feature Extraction: 100%|██████████| 60/60 [00:24<00:00,  2.81it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.34it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.68it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.19it/s]


      217753536 done in  87.2 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.81it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.27it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.93it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.10it/s]


      222290068 done in  89.0 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.02it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.07it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.23it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 15.89it/s]


      226826600 done in  90.9 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.46it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.07it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.19it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.21it/s]


      231363132 done in  92.7 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.07it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 13.17it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.67it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.79it/s]


      235899664 done in  94.5 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.92it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 13.35it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.50it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.66it/s]


      240436196 done in  96.3 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.75it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.73it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 10.67it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 19.90it/s]


      244972728 done in  98.2 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  3.09it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.77it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.55it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.71it/s]


      249509260 done in 100.0 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.72it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.33it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.12it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.04it/s]


      254045792 done in 101.8 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.59it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.83it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 12.63it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 16.25it/s]


      258582324 done in 103.6 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.65it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 13.93it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.20it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.43it/s]


      263118856 done in 105.4 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  3.07it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.93it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.64it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.43it/s]


      267655388 done in 107.2 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.59it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.56it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.24it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 16.07it/s]


      272191920 done in 109.0 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.26it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 13.02it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 13.99it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.52it/s]


      276728452 done in 110.9 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.47it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 13.54it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 12.50it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.01it/s]


      281264984 done in 112.7 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.50it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.73it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.84it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 18.15it/s]


      285801516 done in 114.5 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.05it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 10.64it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.54it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.81it/s]


      290338048 done in 116.4 minutes


Feature Extraction: 100%|██████████| 60/60 [00:27<00:00,  2.31it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.19it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.83it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.57it/s]


      294874580 done in 118.2 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.82it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 10.00it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.07it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.56it/s]


      299411112 done in 120.0 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.30it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 12.82it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.83it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.52it/s]


      303947644 done in 121.8 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  3.24it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 14.62it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.90it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 20.45it/s]


      308484176 done in 123.6 minutes


Feature Extraction: 100%|██████████| 60/60 [00:26<00:00,  2.52it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.34it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.18it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.44it/s]


      313020708 done in 125.4 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.94it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.98it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 17.79it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.05it/s]


      317557240 done in 127.2 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  3.04it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00,  9.83it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.51it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.35it/s]


      322093772 done in 129.0 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.72it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.25it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.17it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 15.36it/s]


      326630304 done in 130.8 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.58it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.98it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 15.09it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 16.76it/s]


      331166836 done in 132.6 minutes


Feature Extraction: 100%|██████████| 60/60 [00:24<00:00,  2.69it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.65it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.13it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 17.84it/s]


      335703368 done in 134.3 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.62it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.35it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.57it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 15.28it/s]


      340239900 done in 136.1 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.54it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.55it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.73it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 16.64it/s]


      344776432 done in 137.9 minutes


Feature Extraction: 100%|██████████| 60/60 [00:24<00:00,  2.73it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.62it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 15.64it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 15.33it/s]


      349312964 done in 139.6 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.32it/s]
Feature Extraction: 100%|██████████| 60/60 [00:05<00:00, 11.37it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 17.06it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.89it/s]


      353849496 done in 141.4 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.38it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.52it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 13.87it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 15.46it/s]


      358386028 done in 143.2 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.61it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.58it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 17.82it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 15.05it/s]


      362922560 done in 145.0 minutes


Feature Extraction: 100%|██████████| 60/60 [00:24<00:00,  2.88it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.96it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.20it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 15.53it/s]


      367459092 done in 146.8 minutes


Feature Extraction: 100%|██████████| 60/60 [00:25<00:00,  2.83it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.13it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 14.59it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 15.05it/s]


      371995624 done in 148.6 minutes


Feature Extraction: 100%|██████████| 60/60 [00:24<00:00,  3.15it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 12.46it/s]
Feature Extraction: 100%|██████████| 60/60 [00:04<00:00, 19.15it/s]
Feature Extraction: 100%|██████████| 60/60 [00:03<00:00, 17.05it/s]


      376532156 done in 150.3 minutes


### Test set predictions

In [24]:
%%time
submission = predict_test(
    clfs=clfs, 
    feature_colnames=feature_colnames, 
    id_colname=id_colname, 
    input_path='../data/features/test-all-feat-from-kernel-repro.csv', 
    output_path=submission_file_path, 
    verbose=True
)

Loading data...


  0%|          | 0/6 [00:00<?, ?it/s]

Generating predictions...


100%|██████████| 6/6 [12:49<00:00, 132.60s/it]


Postprocessing...
Submission shape before grouping: (3492890, 16)
Submission shape after grouping: (3492890, 15)
Submission shape after postprocessing: (3492890, 15)
Validating submission file...
Saving submission...
Submission saved to f/home/kk385830/astronomical-classification/submissions/subm_0.633887_2018-12-12-20-10.csv
CPU times: user 3h 19min 16s, sys: 1min 48s, total: 3h 21min 4s
Wall time: 15min 27s


In [25]:
submission.head()

Unnamed: 0_level_0,class_6,class_15,class_16,class_42,class_52,class_53,class_62,class_64,class_65,class_67,class_88,class_90,class_92,class_95,class_99
object_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
13,3.9e-05,0.000657,2.9e-05,0.366398,0.368153,6.4e-05,0.084674,1.9e-05,1.9e-05,0.000734,0.000127,0.038328,3.7e-05,0.000157,0.140565
14,5.4e-05,0.005733,6.6e-05,0.099824,0.029541,9e-05,0.055105,0.000179,0.00044,0.015483,0.000307,0.67656,6.5e-05,0.016401,0.100153
17,0.000124,0.007659,0.00018,0.069897,0.100743,0.000183,0.025223,0.006217,0.000202,0.06385,0.0032,0.592817,6.5e-05,0.010686,0.118956
23,0.000117,0.001379,0.000108,0.02524,0.011016,0.00016,0.036232,0.005721,0.000174,0.386109,0.000227,0.274889,5.6e-05,0.113153,0.145418
34,3e-05,0.001124,2.6e-05,0.072985,0.139158,5.2e-05,0.023494,3.5e-05,5.3e-05,0.007502,4.6e-05,0.649205,2.1e-05,0.000144,0.106124
