# Machine Learning

In [45]:
import warnings

import numpy as np
import pandas as pd
import tensorflow as tf, keras
from keras.layers import Dense
from keras.metrics import TopKCategoricalAccuracy
from keras.models import Sequential, load_model
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelBinarizer, LabelEncoder

In [28]:
train = pd.read_csv('./data/train_df.csv', index_col=0)

In [29]:
le = LabelEncoder()
train['target'] = le.fit_transform(train['country_destination'])
lb = LabelBinarizer()
lb.fit(train['target'])

LabelBinarizer()

In [30]:
mapping = dict(zip(range(len(le.classes_)), le.classes_))
mapping

{0: 'AU',
 1: 'CA',
 2: 'DE',
 3: 'ES',
 4: 'FR',
 5: 'GB',
 6: 'IT',
 7: 'NDF',
 8: 'NL',
 9: 'PT',
 10: 'US',
 11: 'other'}

In [31]:
target = train['target']
feature = train.drop(['target', 'country_destination'], axis=1)

In [32]:
print(target.shape, feature.shape)

(213451,) (213451, 125)


### Baseline Model

In [33]:
X_train, X_test, y_train, y_test = train_test_split(
    feature, target, train_size=.25, random_state=42)

In [34]:
def base_model():
    model = Sequential()
    model.add(Dense(120, input_dim=125, activation='relu'))
    model.add(Dense(60, activation='relu'))
    model.add(Dense(12, activation='sigmoid'))
    model.compile(loss='categorical_crossentropy', optimizer='adam',
                  metrics=['accuracy', 'top_k_categorical_accuracy'])
    return model

In [39]:
estimator = KerasClassifier(
    build_fn=base_model, epochs=5, batch_size=128, verbose=0)
kfold = KFold(n_splits=3, shuffle=True)
results = cross_val_score(estimator, X_train, lb.transform(y_train), cv=kfold, scoring='roc_auc_ovr')

In [41]:
print("Baseline: %.2f%%" % (results.mean()*100))

Baseline: 65.11%


In [42]:
estimator.fit(feature, lb.transform(target), verbose=False)

<keras.callbacks.callbacks.History at 0x1fc818980b8>

In [43]:
estimator.model.save(filepath='./data/deep_learn.tf')