In [2]:
import pandas as pd
import numpy as np
import pickle as p

from sklearn.metrics import roc_auc_score
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

Compile the training data

In [3]:
meta_data = pd.read_csv('/home/ec2-user/training_data_meta.csv')

In [4]:
#Join cleaned class data with training metadata

class_data = pd.read_csv('/home/ec2-user/stanford_labels_cleaned.csv')

In [5]:
meta_data = pd.merge(meta_data, class_data, on = 'class', how = 'left')

In [6]:
meta_data['Body Type'].value_counts()

Sedan          2075
SUV            1558
Coupe          1540
Convertible    1036
Crew Cab        381
Hatchback       380
Cab             297
Van             291
Wagon           253
Minivan         250
Quad Cab         44
Club Cab         39
Name: Body Type, dtype: int64

In [7]:
meta_data = meta_data.loc[meta_data['Body Type'].isin(['Coupe', 'Sedan'])].copy()

In [8]:
meta_data['is_sedan_target'] = (meta_data['Body Type'] == 'Sedan').astype(int)

In [9]:
image_dict = p.load(open('/home/ec2-user/scaled_grayscale_dict.p', 'rb'))

In [10]:
training_data = []
for i in meta_data[['is_sedan_target', 'fname']].iterrows():
    row = [i[1]['is_sedan_target']]
    row.extend(image_dict[i[1]['fname']].flatten())
    training_data.append(row)
training_data = np.array(training_data).astype(float)

Fit the model

In [11]:
X_train, X_test, y_train, y_test = train_test_split(training_data[:,1:], 
                                                    training_data[:,0], 
                                                    test_size=0.33, 
                                                    random_state=42)

In [12]:
scaler = StandardScaler()

scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [13]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [14]:
dtrain = xgb.DMatrix(X_train, label = y_train)
dtest = xgb.DMatrix(X_test, label = y_test)

In [27]:
params = {'booster' : 'gbtree', 
          'verbosity' : 1, 
          'learning_rate' : 0.01, #aka eta
          'max_depth' : 3,
          'subsample' : .5,
          'reg_lambda' : 0.01, #L2 reg
          'reg_alpha' : 0.02, #L1 reg
          'objective' : 'binary:logistic',
          'eval_metric' : 'auc'
         }

eval_list = [(dtest, 'eval'), (dtrain, 'train')]

model = xgb.train(params, dtrain, num_boost_round = 300, evals = eval_list, early_stopping_rounds = 50)

[03:56:46] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[0]	eval-auc:0.538094	train-auc:0.578429
Multiple eval metrics have been passed: 'train-auc' will be used for early stopping.

Will train until train-auc hasn't improved in 50 rounds.
[03:56:46] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[1]	eval-auc:0.531254	train-auc:0.62725
[03:56:46] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[2]	eval-auc:0.526033	train-auc:0.651923
[03:56:46] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=3
[3]	eval-auc:0.531549	train-auc:0.663576
[03:56:46] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[4]	eval-auc:0.526064	train-auc:0.677084
[03:56:46] /workspace/src/tree/updater

[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[49]	eval-auc:0.560937	train-auc:0.762457
[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[50]	eval-auc:0.559563	train-auc:0.762952
[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[51]	eval-auc:0.560718	train-auc:0.766236
[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=3
[52]	eval-auc:0.561119	train-auc:0.76581
[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=3
[53]	eval-auc:0.56174	train-auc:0.766239
[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[54]	eval-auc:0.561656	train-auc:0.765608
[03:56:47] /

[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[99]	eval-auc:0.559703	train-auc:0.79651
[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 0 pruned nodes, max_depth=3
[100]	eval-auc:0.559534	train-auc:0.797318
[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[101]	eval-auc:0.5598	train-auc:0.797485
[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[102]	eval-auc:0.558887	train-auc:0.797768
[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[103]	eval-auc:0.558584	train-auc:0.798363
[03:56:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[104]	eval-auc:0.559436	train-auc:0.799507
[03:56:47

[03:56:48] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[148]	eval-auc:0.565038	train-auc:0.826593
[03:56:48] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[149]	eval-auc:0.564189	train-auc:0.827635
[03:56:48] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=3
[150]	eval-auc:0.563969	train-auc:0.828161
[03:56:48] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=3
[151]	eval-auc:0.564019	train-auc:0.828993
[03:56:48] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[152]	eval-auc:0.564209	train-auc:0.830756
[03:56:48] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[153]	eval-auc:0.564775	train-auc:0.830704
[03:

[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[198]	eval-auc:0.569797	train-auc:0.854976
[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[199]	eval-auc:0.569339	train-auc:0.855111
[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[200]	eval-auc:0.569209	train-auc:0.855476
[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[201]	eval-auc:0.569841	train-auc:0.85601
[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=3
[202]	eval-auc:0.569577	train-auc:0.855783
[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[203]	eval-auc:0.569511	train-auc:0.856248
[03:5

[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 0 pruned nodes, max_depth=3
[248]	eval-auc:0.567689	train-auc:0.870863
[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[249]	eval-auc:0.567658	train-auc:0.871001
[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=3
[250]	eval-auc:0.567674	train-auc:0.871333
[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=3
[251]	eval-auc:0.567445	train-auc:0.871432
[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[252]	eval-auc:0.567414	train-auc:0.87168
[03:56:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[253]	eval-auc:0.567385	train-auc:0.872066
[03:56

[03:56:50] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[298]	eval-auc:0.567752	train-auc:0.891512
[03:56:50] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=3
[299]	eval-auc:0.56723	train-auc:0.891478


In [None]:
model.predict(dtrain)

In [87]:
print('training auroc : %f'%(roc_auc_score(y_train, model.predict(dtrain))))

training auroc : 0.577371


In [88]:
print('testing auroc : %f'%(roc_auc_score(y_test, model.predict(dtest))))

testing auroc : 0.527468
