# Data Submission to Kaggle

In [25]:
import pickle
import warnings

import numpy as np
import pandas as pd
from keras.models import load_model
from lightgbm import LGBMClassifier
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from sklearn.preprocessing import LabelBinarizer, LabelEncoder

warnings.filterwarnings(action='ignore')

In [2]:
train = pd.read_csv('./data/train_df.csv', index_col=0)

In [3]:
test = pd.read_csv('./data/test_df.csv', index_col=0)

In [4]:
le = LabelEncoder()
train['target'] = le.fit_transform(train['country_destination'])
lb = LabelBinarizer()
lb.fit(train['target'])

LabelBinarizer()

In [5]:
target = train['target']
feature = train.drop(['target', 'country_destination'], axis=1)

In [6]:
def submit_data(name, pred):
    """function to submit data to kaggle 
    in the required form

    Args:
        name: name of the output csv file
        pred: predictions generated from algorithm

    Return:
        csv file in the required kaggle format
    """
    id_test = test.index.to_series()
    ids = []
    cts = []
    for i in range(len(id_test)):
        idx = id_test[i]
        ids += [idx] * 5
        cts += le.inverse_transform(np.argsort(pred[i])[::-1])[:5].tolist()
    sub = pd.DataFrame(np.column_stack((ids, cts)), columns=['id', 'country'])
    sub.to_csv('./data/'+name, index=False)

### Base Models

In [7]:
rfc = RandomForestClassifier()
rfc.fit(feature, target)
rf_pred = rfc.predict_proba(test)

In [8]:
submit_data('rfc_base.csv', rf_pred)

In [9]:
et = ExtraTreesClassifier()
et.fit(feature, target)
et_pred = et.predict_proba(test)

In [10]:
submit_data('et_base.csv', et_pred)

In [11]:
lgb = LGBMClassifier()
lgb.fit(feature, target)
lgb_pred = lgb.predict_proba(test)

In [12]:
submit_data('lgb_base.csv', lgb_pred)

### Tunned Models

In [13]:
rf_tuned = pickle.load(open('./data/rf_tuned.sav', 'rb'))
rf_tuned_pred = rf_tuned.predict_proba(test)

  """Entry point for launching an IPython kernel.


In [14]:
rf_tuned

RandomForestClassifier(max_depth=16, min_impurity_split=8, min_samples_leaf=8,
                       n_estimators=210)

In [15]:
submit_data('rf_tuned.csv', rf_tuned_pred)

In [16]:
et_tuned = pickle.load(open('./data/et_tuned.sav', 'rb'))
et_tuned_pred = et_tuned.predict_proba(test)

  """Entry point for launching an IPython kernel.


In [17]:
et_tuned

ExtraTreesClassifier(max_depth=24, min_samples_leaf=8, min_samples_split=8,
                     n_estimators=1990)

In [18]:
submit_data('et_tuned.csv', et_tuned_pred)

In [19]:
lgb_tuned = pickle.load(open('./data/lgb_tuned.sav', 'rb'))
lgb_tuned_pred = lgb_tuned.predict_proba(test)

  """Entry point for launching an IPython kernel.


In [20]:
lgb_tuned

LGBMClassifier(learning_rate=0.05, max_depth=25, min_child_samples=150,
               n_estimators=220, num_leaves=60)

In [21]:
submit_data('lgb_tuned.csv', lgb_tuned_pred)

### Deep Learning Model

In [22]:
deep_mod = load_model('./data/deep_learn.tf')

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



In [23]:
deep_mod_pred = deep_mod.predict_proba(test)

In [24]:
submit_data('deep_learn.csv', deep_mod_pred)