In [1]:
import pandas as pd
import os
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

import numpy as np

import xgboost as xgb

In [2]:
RANDOM_STATE = 1234
pd.options.display.precision = 20

In [3]:
DATA_DIR = 'data'

In [4]:
df_train = pd.read_csv(
    os.path.join(DATA_DIR, 'train.csv'),
    sep=','
)

df_test = pd.read_csv(
    os.path.join(DATA_DIR, 'test.csv'),
    sep=','
)

In [5]:
df_train.head()

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,SignFacing (Target)
0,2c9180975a056a64015a1e0a52e57021,Rear,195,1.02,46,45,Rear
1,2c9180975a056a64015a1e17b32171e4,Rear,203,1.09,59,54,Rear
2,2c9180975a056a64015a1de4deb16bd5,Front,26,0.96,104,108,Front
3,2c9180975a056a64015a1de4deb16bdd,Rear,199,0.81,38,47,Rear
4,2c9180975a056a64015a1de4deb16bd6,Rear,208,0.93,54,58,Rear


In [6]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38485 entries, 0 to 38484
Data columns (total 7 columns):
Id                     38485 non-null object
DetectedCamera         38485 non-null object
AngleOfSign            38485 non-null int64
SignAspectRatio        38485 non-null float64
SignWidth              38485 non-null int64
SignHeight             38485 non-null int64
SignFacing (Target)    38485 non-null object
dtypes: float64(1), int64(3), object(3)
memory usage: 2.1+ MB


In [7]:
df_test.head()

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight
0,2c9180975a056a64015a1e10d3f270fe,Right,67,0.63,107,169
1,2c9180975a056a64015a1de4deb16bdc,Front,16,0.88,61,69
2,2c9180975a056a64015a1e0e70ea70ce,Right,44,1.15,232,202
3,2c9180975a056a64015a1dfed0c46ec6,Right,50,1.1,137,125
4,2c9180975a056a64015a1dfed0c46ec7,Front,30,0.95,99,104


In [8]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31485 entries, 0 to 31484
Data columns (total 6 columns):
Id                 31485 non-null object
DetectedCamera     31485 non-null object
AngleOfSign        31485 non-null int64
SignAspectRatio    31485 non-null float64
SignWidth          31485 non-null int64
SignHeight         31485 non-null int64
dtypes: float64(1), int64(3), object(2)
memory usage: 1.4+ MB


In [9]:
pd.crosstab(df_train['DetectedCamera'], df_train['SignFacing (Target)'], margins=True)

SignFacing (Target),Front,Left,Rear,Right,All
DetectedCamera,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Front,10759,101,0,50,10910
Left,1005,541,7752,0,9298
Rear,0,33,7657,71,7761
Right,9563,0,253,700,10516
All,21327,675,15662,821,38485


#### Feature Engineering

In [10]:
def get_sine(row):
    return np.sin(np.deg2rad(row['AngleOfSign']))

def get_cos(row):
    return np.cos(np.deg2rad(row['AngleOfSign']))

def get_tan(row):
    return np.tan(np.deg2rad(row['AngleOfSign']))

def convert_angle(row):
    if row['DetectedCamera'] == 'Front':
        return row['AngleOfSign']
    elif row['DetectedCamera'] == 'Left':
        return 270.0 - row['AngleOfSign']
    elif row['DetectedCamera'] == 'Right':
        return 90.0 - row['AngleOfSign']
    elif row['DetectedCamera'] == 'Rear':
        return 180.0 - row['AngleOfSign']

def add_trig_features(df):
    df.loc[
        :,
        'AngleOfSign'
    ] = df.apply(
        convert_angle,
        axis=1
    )
    
    df.loc[
        :,
        'sine_angle'
    ] = df.apply(
        get_sine,
        axis=1
    )
    
    df.loc[
        :,
        'cos_angle'
    ] = df.apply(
        get_cos,
        axis=1
    )
    
    df.loc[
        :,
        'tan_angle'
    ] = df.apply(
        get_tan,
        axis=1
    )
    
    return df

df_train = add_trig_features(df_train)
df_test = add_trig_features(df_test)

df_train.head()

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,SignFacing (Target),sine_angle,cos_angle,tan_angle
0,2c9180975a056a64015a1e0a52e57021,Rear,-15.0,1.02,46,45,Rear,-0.2588190451025207,0.9659258262890684,-0.2679491924311226
1,2c9180975a056a64015a1e17b32171e4,Rear,-23.0,1.09,59,54,Rear,-0.3907311284892737,0.9205048534524404,-0.4244748162096047
2,2c9180975a056a64015a1de4deb16bd5,Front,26.0,0.96,104,108,Front,0.4383711467890774,0.898794046299167,0.4877325885658613
3,2c9180975a056a64015a1de4deb16bdd,Rear,-19.0,0.81,38,47,Rear,-0.3255681544571566,0.9455185755993168,-0.3443276132896652
4,2c9180975a056a64015a1de4deb16bd6,Rear,-28.0,0.93,54,58,Rear,-0.4694715627858908,0.8829475928589269,-0.5317094316614787


In [11]:
df_test.head()

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,sine_angle,cos_angle,tan_angle
0,2c9180975a056a64015a1e10d3f270fe,Right,23.0,0.63,107,169,0.3907311284892737,0.9205048534524404,0.4244748162096047
1,2c9180975a056a64015a1de4deb16bdc,Front,16.0,0.88,61,69,0.2756373558169991,0.9612616959383188,0.2867453857588079
2,2c9180975a056a64015a1e0e70ea70ce,Right,46.0,1.15,232,202,0.719339800338651,0.6946583704589973,1.0355303137905691
3,2c9180975a056a64015a1dfed0c46ec6,Right,40.0,1.1,137,125,0.6427876096865393,0.7660444431189781,0.8390996311772798
4,2c9180975a056a64015a1dfed0c46ec7,Front,30.0,0.95,99,104,0.4999999999999999,0.8660254037844387,0.5773502691896256


#### Label Encoding

In [12]:
def label_encoder(df):
    for column in df.columns:
        if column == 'Id':
            continue
        if df[column].dtype == 'object':
            label = preprocessing.LabelEncoder()
            label.fit(list(df[column].values))
            df[column + str('_encoded')] = label.transform(list(df[column].values))
    return df

In [13]:
df_train['SignFacing (Target)'].value_counts()

Front    21327
Rear     15662
Right      821
Left       675
Name: SignFacing (Target), dtype: int64

In [14]:
df_train['DetectedCamera'].value_counts()

Front    10910
Right    10516
Left      9298
Rear      7761
Name: DetectedCamera, dtype: int64

In [15]:
df_test.head()

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,sine_angle,cos_angle,tan_angle
0,2c9180975a056a64015a1e10d3f270fe,Right,23.0,0.63,107,169,0.3907311284892737,0.9205048534524404,0.4244748162096047
1,2c9180975a056a64015a1de4deb16bdc,Front,16.0,0.88,61,69,0.2756373558169991,0.9612616959383188,0.2867453857588079
2,2c9180975a056a64015a1e0e70ea70ce,Right,46.0,1.15,232,202,0.719339800338651,0.6946583704589973,1.0355303137905691
3,2c9180975a056a64015a1dfed0c46ec6,Right,40.0,1.1,137,125,0.6427876096865393,0.7660444431189781,0.8390996311772798
4,2c9180975a056a64015a1dfed0c46ec7,Front,30.0,0.95,99,104,0.4999999999999999,0.8660254037844387,0.5773502691896256


#### Let's build direction based models

In [16]:
df_train_front = df_train[df_train['DetectedCamera'] == 'Front']
df_train_left = df_train[df_train['DetectedCamera'] == 'Left']
df_train_rear = df_train[df_train['DetectedCamera'] == 'Rear']
df_train_right = df_train[df_train['DetectedCamera'] == 'Right']

df_train_front = label_encoder(df_train_front)
df_train_left = label_encoder(df_train_left)
df_train_rear = label_encoder(df_train_rear)
df_train_right = label_encoder(df_train_right)

df_train_front = df_train_front.reset_index()
df_train_left = df_train_left.reset_index()
df_train_rear = df_train_rear.reset_index()
df_train_right = df_train_right.reset_index()

df_test = label_encoder(df_test)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [17]:
df_train_front.head()

Unnamed: 0,index,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,SignFacing (Target),sine_angle,cos_angle,tan_angle,DetectedCamera_encoded,SignFacing (Target)_encoded
0,2,2c9180975a056a64015a1de4deb16bd5,Front,26.0,0.96,104,108,Front,0.4383711467890774,0.898794046299167,0.4877325885658613,0,0
1,5,2c9180975a056a64015a1e44ba4377f6,Front,29.0,0.8299999999999998,65,78,Front,0.484809620246337,0.8746197071393957,0.5543090514527689,0,0
2,8,2c9180975a056a64015a1e01fd466f4f,Front,25.0,0.95,104,109,Front,0.4226182617406994,0.90630778703665,0.4663076581549986,0,0
3,10,2c9180975a056a64015a1e1db63072a6,Front,12.0,0.93,41,44,Front,0.2079116908177593,0.9781476007338056,0.2125565616700221,0,0
4,15,2c9180975a056a64015a1df8dc786e26,Front,21.0,1.0,75,75,Front,0.3583679495453002,0.9335804264972016,0.3838640350354157,0,0


In [18]:
df_train_left.head()

Unnamed: 0,index,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,SignFacing (Target),sine_angle,cos_angle,tan_angle,DetectedCamera_encoded,SignFacing (Target)_encoded
0,6,2c9180975a056a64015a1e01fd466f56,Left,25.0,0.93,109,117,Rear,0.4226182617406994,0.90630778703665,0.4663076581549986,0,2
1,14,2c9180975a056a64015a1e04ad9f6f8b,Left,12.0,0.4799999999999999,39,81,Rear,0.2079116908177593,0.9781476007338056,0.2125565616700221,0,2
2,29,2c9180975a056a64015a1e41bbc2775f,Left,17.0,0.69,37,54,Rear,0.2923717047227367,0.9563047559630354,0.3057306814586603,0,2
3,30,2c9180975a056a64015a1e41bbc27758,Left,22.0,0.75,44,59,Rear,0.374606593415912,0.9271838545667874,0.4040262258351567,0,2
4,33,2c9180975a056a64015a1e13b6c7713e,Left,17.0,0.4299999999999999,37,87,Rear,0.2923717047227367,0.9563047559630354,0.3057306814586603,0,2


In [19]:
df_train_rear.head()

Unnamed: 0,index,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,SignFacing (Target),sine_angle,cos_angle,tan_angle,DetectedCamera_encoded,SignFacing (Target)_encoded
0,0,2c9180975a056a64015a1e0a52e57021,Rear,-15.0,1.02,46,45,Rear,-0.2588190451025207,0.9659258262890684,-0.2679491924311226,0,1
1,1,2c9180975a056a64015a1e17b32171e4,Rear,-23.0,1.09,59,54,Rear,-0.3907311284892737,0.9205048534524404,-0.4244748162096047,0,1
2,3,2c9180975a056a64015a1de4deb16bdd,Rear,-19.0,0.81,38,47,Rear,-0.3255681544571566,0.9455185755993168,-0.3443276132896652,0,1
3,4,2c9180975a056a64015a1de4deb16bd6,Rear,-28.0,0.93,54,58,Rear,-0.4694715627858908,0.8829475928589269,-0.5317094316614787,0,1
4,7,2c9180975a056a64015a1e01fd466f50,Rear,-24.0,1.04,56,54,Rear,-0.4067366430758002,0.9135454576426008,-0.4452286853085361,0,1


In [20]:
df_train_right.head()

Unnamed: 0,index,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,SignFacing (Target),sine_angle,cos_angle,tan_angle,DetectedCamera_encoded,SignFacing (Target)_encoded
0,9,2c9180975a056a64015a1e01fd466f51,Right,16.0,0.51,53,103,Front,0.2756373558169991,0.9612616959383188,0.2867453857588079,0,0
1,21,2c9180975a056a64015a1e04ad9f6f83,Right,41.0,1.04,127,122,Front,0.6560590289905072,0.754709580222772,0.8692867378162266,0,0
2,27,2c9180975a056a64015a1e1d735c7293,Right,14.0,0.64,65,102,Front,0.2419218955996677,0.9702957262759964,0.2493280028431807,0,0
3,31,2c9180975a056a64015a1e41bbc27756,Right,18.0,0.55,52,94,Front,0.3090169943749473,0.9510565162951536,0.3249196962329062,0,0
4,37,2c9180975a056a64015a1dceefde69de,Right,47.0,1.2,200,167,Front,0.7313537016191703,0.6819983600624984,1.0723687100246824,0,0


In [21]:
pd.crosstab(df_train_front['DetectedCamera'], df_train_front['SignFacing (Target)'], margins=True)

SignFacing (Target),Front,Left,Right,All
DetectedCamera,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Front,10759,101,50,10910
All,10759,101,50,10910


In [22]:
pd.crosstab(df_train_left['DetectedCamera'], df_train_left['SignFacing (Target)'], margins=True)

SignFacing (Target),Front,Left,Rear,All
DetectedCamera,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Left,1005,541,7752,9298
All,1005,541,7752,9298


In [23]:
pd.crosstab(df_train_rear['DetectedCamera'], df_train_rear['SignFacing (Target)'], margins=True)

SignFacing (Target),Left,Rear,Right,All
DetectedCamera,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rear,33,7657,71,7761
All,33,7657,71,7761


In [24]:
pd.crosstab(df_train_right['DetectedCamera'], df_train_right['SignFacing (Target)'], margins=True)

SignFacing (Target),Front,Rear,Right,All
DetectedCamera,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Right,9563,253,700,10516
All,9563,253,700,10516


In [25]:
df_test[df_test['Id'] == '2c9180975a056a64015a1e10d3f270fe']

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,sine_angle,cos_angle,tan_angle,DetectedCamera_encoded
0,2c9180975a056a64015a1e10d3f270fe,Right,23.0,0.63,107,169,0.3907311284892737,0.9205048534524404,0.4244748162096047,3


In [26]:
req_cols = [
    'sine_angle',
    'cos_angle',
    'tan_angle',
    'Id',
    'AngleOfSign',
    'SignAspectRatio',
    'SignWidth',
    'SignHeight',
    'DetectedCamera_encoded',
    'SignFacing (Target)_encoded'
]

def get_train_test(df):
    df = df[req_cols]
    X = df.drop(['SignFacing (Target)_encoded', 'Id'], axis=1)
    y = df['SignFacing (Target)_encoded']

    return train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)

def train_model(X_train, X_test, y_train, y_test):
    params = {
        'n_estimators': 5000,
        'objective': 'multi:softprob',
        'eval_metric': 'mlogloss',
        'colsample_bytree': 0.75,
        'min_child_weight': 4,
        'subsample': 0.8,
        'eta': 0.01,
        'max_depth': 4,
        'gamma': 0,
        'seed': 1,
        'num_class': 3
    }

    d_train = xgb.DMatrix(X_train, label=y_train)
    d_test = xgb.DMatrix(X_test, label=y_test)

    watchlist = [(d_train, 'train'), (d_test, 'valid')]

    model = xgb.train(params, d_train, 2000, watchlist, early_stopping_rounds=50, verbose_eval=10)
    
    return model

##### Front Camera

In [27]:
df_train_front.columns

Index([u'index', u'Id', u'DetectedCamera', u'AngleOfSign', u'SignAspectRatio',
       u'SignWidth', u'SignHeight', u'SignFacing (Target)', u'sine_angle',
       u'cos_angle', u'tan_angle', u'DetectedCamera_encoded',
       u'SignFacing (Target)_encoded'],
      dtype='object')

In [28]:
df_train_front = df_train_front[req_cols]
df_train_front.head()

Unnamed: 0,sine_angle,cos_angle,tan_angle,Id,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,DetectedCamera_encoded,SignFacing (Target)_encoded
0,0.4383711467890774,0.898794046299167,0.4877325885658613,2c9180975a056a64015a1de4deb16bd5,26.0,0.96,104,108,0,0
1,0.484809620246337,0.8746197071393957,0.5543090514527689,2c9180975a056a64015a1e44ba4377f6,29.0,0.8299999999999998,65,78,0,0
2,0.4226182617406994,0.90630778703665,0.4663076581549986,2c9180975a056a64015a1e01fd466f4f,25.0,0.95,104,109,0,0
3,0.2079116908177593,0.9781476007338056,0.2125565616700221,2c9180975a056a64015a1e1db63072a6,12.0,0.93,41,44,0,0
4,0.3583679495453002,0.9335804264972016,0.3838640350354157,2c9180975a056a64015a1df8dc786e26,21.0,1.0,75,75,0,0


In [29]:
df_train_front = df_train_front[req_cols]
X_train, X_test, y_train, y_test = get_train_test(df_train_front)
X_train.head()

Unnamed: 0,sine_angle,cos_angle,tan_angle,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,DetectedCamera_encoded
3976,0.3255681544571566,0.9455185755993168,0.3443276132896652,19.0,0.8399999999999999,42,50,0
3663,0.1736481776669303,0.984807753012208,0.1763269807084649,10.0,0.94,49,52,0
8251,0.4694715627858908,0.8829475928589269,0.5317094316614787,28.0,0.92,152,165,0
5406,0.3907311284892737,0.9205048534524404,0.4244748162096047,23.0,0.96,65,68,0
8911,0.3907311284892737,0.9205048534524404,0.4244748162096047,23.0,1.04,227,218,0


In [30]:
model_front = train_model(X_train, X_test, y_train, y_test)

[0]	train-mlogloss:1.08409	valid-mlogloss:1.08425
Multiple eval metrics have been passed: 'valid-mlogloss' will be used for early stopping.

Will train until valid-mlogloss hasn't improved in 50 rounds.
[10]	train-mlogloss:0.952353	valid-mlogloss:0.953888
[20]	train-mlogloss:0.841302	valid-mlogloss:0.844033
[30]	train-mlogloss:0.746535	valid-mlogloss:0.750362
[40]	train-mlogloss:0.6649	valid-mlogloss:0.669783
[50]	train-mlogloss:0.594063	valid-mlogloss:0.599908
[60]	train-mlogloss:0.532225	valid-mlogloss:0.538998
[70]	train-mlogloss:0.478005	valid-mlogloss:0.485557
[80]	train-mlogloss:0.430232	valid-mlogloss:0.438636
[90]	train-mlogloss:0.388085	valid-mlogloss:0.397184
[100]	train-mlogloss:0.350731	valid-mlogloss:0.360513
[110]	train-mlogloss:0.317549	valid-mlogloss:0.327929
[120]	train-mlogloss:0.288026	valid-mlogloss:0.299056
[130]	train-mlogloss:0.261697	valid-mlogloss:0.273325
[140]	train-mlogloss:0.238207	valid-mlogloss:0.250367
[150]	train-mlogloss:0.217185	valid-mlogloss:0.22989

#### Left Camera

In [31]:
df_train_left = df_train_left[req_cols]
X_train, X_test, y_train, y_test = get_train_test(df_train_left)
model_left = train_model(X_train, X_test, y_train, y_test)

[0]	train-mlogloss:1.08587	valid-mlogloss:1.08599
Multiple eval metrics have been passed: 'valid-mlogloss' will be used for early stopping.

Will train until valid-mlogloss hasn't improved in 50 rounds.
[10]	train-mlogloss:0.970986	valid-mlogloss:0.971843
[20]	train-mlogloss:0.874233	valid-mlogloss:0.875692
[30]	train-mlogloss:0.791166	valid-mlogloss:0.793301
[40]	train-mlogloss:0.719532	valid-mlogloss:0.722192
[50]	train-mlogloss:0.657459	valid-mlogloss:0.660773
[60]	train-mlogloss:0.603221	valid-mlogloss:0.607069
[70]	train-mlogloss:0.555613	valid-mlogloss:0.560054
[80]	train-mlogloss:0.513764	valid-mlogloss:0.518643
[90]	train-mlogloss:0.476577	valid-mlogloss:0.48202
[100]	train-mlogloss:0.443845	valid-mlogloss:0.449697
[110]	train-mlogloss:0.414824	valid-mlogloss:0.421076
[120]	train-mlogloss:0.389008	valid-mlogloss:0.395693
[130]	train-mlogloss:0.365978	valid-mlogloss:0.373145
[140]	train-mlogloss:0.345357	valid-mlogloss:0.352986
[150]	train-mlogloss:0.327015	valid-mlogloss:0.3351

#### Rear Camera

In [32]:
df_train_rear = df_train_rear[req_cols]
X_train, X_test, y_train, y_test = get_train_test(df_train_rear)
model_rear = train_model(X_train, X_test, y_train, y_test)

[0]	train-mlogloss:1.0841	valid-mlogloss:1.08405
Multiple eval metrics have been passed: 'valid-mlogloss' will be used for early stopping.

Will train until valid-mlogloss hasn't improved in 50 rounds.
[10]	train-mlogloss:0.952652	valid-mlogloss:0.952241
[20]	train-mlogloss:0.841823	valid-mlogloss:0.841087
[30]	train-mlogloss:0.747243	valid-mlogloss:0.746227
[40]	train-mlogloss:0.66575	valid-mlogloss:0.664544
[50]	train-mlogloss:0.595094	valid-mlogloss:0.593713
[60]	train-mlogloss:0.533399	valid-mlogloss:0.531838
[70]	train-mlogloss:0.479298	valid-mlogloss:0.477517
[80]	train-mlogloss:0.431728	valid-mlogloss:0.429792
[90]	train-mlogloss:0.389696	valid-mlogloss:0.387655
[100]	train-mlogloss:0.35248	valid-mlogloss:0.350332
[110]	train-mlogloss:0.319475	valid-mlogloss:0.31721
[120]	train-mlogloss:0.290089	valid-mlogloss:0.287719
[130]	train-mlogloss:0.263882	valid-mlogloss:0.261469
[140]	train-mlogloss:0.240504	valid-mlogloss:0.238048
[150]	train-mlogloss:0.219626	valid-mlogloss:0.217092


#### Right Camera

In [33]:
df_train_right = df_train_right[req_cols]
X_train, X_test, y_train, y_test = get_train_test(df_train_right)
model_right = train_model(X_train, X_test, y_train, y_test)

[0]	train-mlogloss:1.08526	valid-mlogloss:1.08511
Multiple eval metrics have been passed: 'valid-mlogloss' will be used for early stopping.

Will train until valid-mlogloss hasn't improved in 50 rounds.
[10]	train-mlogloss:0.964604	valid-mlogloss:0.963372
[20]	train-mlogloss:0.86277	valid-mlogloss:0.860855
[30]	train-mlogloss:0.775704	valid-mlogloss:0.773175
[40]	train-mlogloss:0.700735	valid-mlogloss:0.697715
[50]	train-mlogloss:0.635548	valid-mlogloss:0.632151
[60]	train-mlogloss:0.578525	valid-mlogloss:0.574871
[70]	train-mlogloss:0.52854	valid-mlogloss:0.524759
[80]	train-mlogloss:0.484533	valid-mlogloss:0.48069
[90]	train-mlogloss:0.445581	valid-mlogloss:0.441836
[100]	train-mlogloss:0.411053	valid-mlogloss:0.407431
[110]	train-mlogloss:0.380355	valid-mlogloss:0.376861
[120]	train-mlogloss:0.353066	valid-mlogloss:0.349711
[130]	train-mlogloss:0.328548	valid-mlogloss:0.32549
[140]	train-mlogloss:0.306692	valid-mlogloss:0.303886
[150]	train-mlogloss:0.287159	valid-mlogloss:0.284612


In [34]:
df_test[df_test['Id'] == '2c9180975a056a64015a1e10d3f270fe']

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,sine_angle,cos_angle,tan_angle,DetectedCamera_encoded
0,2c9180975a056a64015a1e10d3f270fe,Right,23.0,0.63,107,169,0.3907311284892737,0.9205048534524404,0.4244748162096047,3


In [35]:
features = req_cols
features.remove('SignFacing (Target)_encoded')
features.remove('Id')

results = pd.DataFrame()

results['Id'] = df_test['Id']
results['Front'] = None
results['Left'] = None
results['Rear'] = None
results['Right'] = None


def get_prediction(row):
    id = row['Id']
    df_row = df_test[df_test['Id'] == id]
    dtest = xgb.DMatrix(df_row[features])
    
    detecting_camera = df_row['DetectedCamera_encoded'].values[0]
    
    if detecting_camera == 0:
        y_pred = model_front.predict(dtest)[0]
        row['Front'] = y_pred[0]
        row['Left'] = y_pred[1]
        row['Rear'] = 0.0
        row['Right'] = y_pred[2]
    elif detecting_camera == 1:
        y_pred = model_left.predict(dtest)[0]
        row['Front'] = y_pred[0]
        row['Left'] = y_pred[1]
        row['Rear'] = y_pred[2]
        row['Right'] = 0.0
    elif detecting_camera == 2:
        y_pred = model_rear.predict(dtest)[0]
        row['Front'] = 0
        row['Left'] = y_pred[0]
        row['Rear'] = y_pred[1]
        row['Right'] = y_pred[2]
    elif detecting_camera == 3:
        y_pred = model_right.predict(dtest)[0]
        row['Front'] = y_pred[0]
        row['Left'] = 0
        row['Rear'] = y_pred[1]
        row['Right'] = y_pred[2]
    
    return row

df_test.head()

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,sine_angle,cos_angle,tan_angle,DetectedCamera_encoded
0,2c9180975a056a64015a1e10d3f270fe,Right,23.0,0.63,107,169,0.3907311284892737,0.9205048534524404,0.4244748162096047,3
1,2c9180975a056a64015a1de4deb16bdc,Front,16.0,0.88,61,69,0.2756373558169991,0.9612616959383188,0.2867453857588079,0
2,2c9180975a056a64015a1e0e70ea70ce,Right,46.0,1.15,232,202,0.719339800338651,0.6946583704589973,1.0355303137905691,3
3,2c9180975a056a64015a1dfed0c46ec6,Right,40.0,1.1,137,125,0.6427876096865393,0.7660444431189781,0.8390996311772798,3
4,2c9180975a056a64015a1dfed0c46ec7,Front,30.0,0.95,99,104,0.4999999999999999,0.8660254037844387,0.5773502691896256,0


In [36]:
df_test[df_test['Id'] == '2c9180975a056a64015a1e10d3f270fe']

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,sine_angle,cos_angle,tan_angle,DetectedCamera_encoded
0,2c9180975a056a64015a1e10d3f270fe,Right,23.0,0.63,107,169,0.3907311284892737,0.9205048534524404,0.4244748162096047,3


In [37]:
df_test.head()

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth,SignHeight,sine_angle,cos_angle,tan_angle,DetectedCamera_encoded
0,2c9180975a056a64015a1e10d3f270fe,Right,23.0,0.63,107,169,0.3907311284892737,0.9205048534524404,0.4244748162096047,3
1,2c9180975a056a64015a1de4deb16bdc,Front,16.0,0.88,61,69,0.2756373558169991,0.9612616959383188,0.2867453857588079,0
2,2c9180975a056a64015a1e0e70ea70ce,Right,46.0,1.15,232,202,0.719339800338651,0.6946583704589973,1.0355303137905691,3
3,2c9180975a056a64015a1dfed0c46ec6,Right,40.0,1.1,137,125,0.6427876096865393,0.7660444431189781,0.8390996311772798,3
4,2c9180975a056a64015a1dfed0c46ec7,Front,30.0,0.95,99,104,0.4999999999999999,0.8660254037844387,0.5773502691896256,0


In [38]:
results.head()

Unnamed: 0,Id,Front,Left,Rear,Right
0,2c9180975a056a64015a1e10d3f270fe,,,,
1,2c9180975a056a64015a1de4deb16bdc,,,,
2,2c9180975a056a64015a1e0e70ea70ce,,,,
3,2c9180975a056a64015a1dfed0c46ec6,,,,
4,2c9180975a056a64015a1dfed0c46ec7,,,,


In [39]:
results = results.apply(
    get_prediction,
    axis=1
)

In [40]:
results.head(n=20)

Unnamed: 0,Id,Front,Left,Rear,Right
0,2c9180975a056a64015a1e10d3f270fe,0.9960609078407288,0.0,0.0021142859477549,0.0018248042324557
1,2c9180975a056a64015a1de4deb16bdc,0.997665524482727,0.0011386271798983,0.0,0.0011958684772253
2,2c9180975a056a64015a1e0e70ea70ce,0.9941774606704712,0.0,0.0040801777504384,0.0017423455137759
3,2c9180975a056a64015a1dfed0c46ec6,0.9971033930778505,0.0,0.00170045834966,0.0011961108539253
4,2c9180975a056a64015a1dfed0c46ec7,0.9978286623954772,0.0010827840305864,0.0,0.0010885744122788
5,2c9180975a056a64015a1e44ba4377f8,0.0337505899369716,0.0,0.8182708024978637,0.147978588938713
6,2c9180975a056a64015a1e010c8f6ee1,0.9982361793518066,0.0008942832937464,0.0,0.0008694643038325
7,2c9180975a056a64015a1e1db63072aa,0.99413001537323,0.0,0.0035165478475391,0.0023534283973276
8,2c9180975a056a64015a1e1db63072a8,0.0,0.0009388428297825,0.998353362083435,0.0007077967165969
9,2c9180975a056a64015a1e2354f5733c,0.0,0.0009512259275652,0.9983134269714355,0.0007353393011726


In [41]:
results.to_csv(os.path.join(DATA_DIR, 'predict_road_sign_30_06_2017_4_directions_fe.csv'), index=False, float_format='%0.10f')