# Wide and Deep Model

<br>

- [**Wide-and-deep**](https://arxiv.org/abs/1606.07792)
- using TensorFlow high-level Estimator API (v1.12)
- implementation of [MicroSoft Recommneders](https://github.com/microsoft/recommenders)
- please see the source_code of [my version](https://github.com/mjmingd/recommenders)

In [1]:
import sys
sys.path.append("../../")
import itertools
import math
import os
from tempfile import TemporaryDirectory

import numpy as np
import pandas as pd
import sklearn.preprocessing
import tensorflow as tf

from reco_utils.common.constants import (
    DEFAULT_USER_COL as USER_COL,
    DEFAULT_ITEM_COL as ITEM_COL,
    DEFAULT_RATING_COL as RATING_COL,
    DEFAULT_PREDICTION_COL as PREDICT_COL,
    SEED
)
from reco_utils.common import tf_utils, gpu_utils, plot
from reco_utils.dataset import movielens
from reco_utils.dataset.pandas_df_utils import user_item_pairs
from reco_utils.dataset.python_splitters import python_random_split
import reco_utils.evaluation.python_evaluation as evaluator
import reco_utils.recommender.wide_deep.wide_deep_utils as wide_deep

print("Tensorflow Version:", tf.VERSION)
print("GPUs:\n", gpu_utils.get_gpu_info())

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Tensorflow Version: 1.13.1
GPUs:
 []


### 0. Set parameters

In [2]:
# Metrics to use for evaluation
RANKING_METRICS = []
RATING_METRICS = []

# Use session hook to evaluate model while training
EVALUATE_WHILE_TRAINING = True

# set your column name
ITEM_COL = 'jobID'
ITEM_FEAT_COL = 'tagID,companyID,companySize'
ITEM_FEAT_FLAG = True
RATING_COL = 'applied'
# USER_FEAT_FLAG = True

# Set seed for deterministic result
RANDOM_SEED = 3737  

# set directories
DATA_DIR = '../../../train_job/'
RESULT_DIR = '../../../result/'
EXPORT_DIR_BASE = '../../output/'

# Model checkpoints directory. If None, use temp-dir.
MODEL_DIR = None

#### Hyperparameters
MODEL_TYPE = 'wide_deep'
STEPS = 1000  # Number of batches to train
BATCH_SIZE = 32 ##epcohs = steps * batch_size / train_data

# Wide (linear) model hyperparameters
LINEAR_OPTIMIZER = 'adagrad'
LINEAR_OPTIMIZER_LR = 0.0621  # Learning rate
LINEAR_L1_REG = 0.5           # Regularization rate for FtrlOptimizer
LINEAR_L2_REG = 0.0
LINEAR_MOMENTUM = 0.99         # Momentum for MomentumOptimizer or RMSPropOptimizer
# DNN model hyperparameters
DNN_OPTIMIZER = 'adadelta'
DNN_OPTIMIZER_LR = 0.005
DNN_L1_REG = 0.5           # Regularization rate for FtrlOptimizer
DNN_L2_REG = 0.0
DNN_MOMENTUM = 0.99         # Momentum for MomentumOptimizer or RMSPropOptimizer
# Layer dimensions. Defined as follows to make this notebook runnable from Hyperparameter tuning services like AzureML Hyperdrive
DNN_HIDDEN_LAYER_1 = 128     # Set 0 to not use this layer
DNN_HIDDEN_LAYER_2 = 64    # Set 0 to not use this layer
DNN_HIDDEN_LAYER_3 = 8   # Set 0 to not use this layer
DNN_HIDDEN_LAYER_4 = 256   # Note, at least one layer should have nodes.
DNN_HIDDEN_UNITS = [h for h in [DNN_HIDDEN_LAYER_1, DNN_HIDDEN_LAYER_2, DNN_HIDDEN_LAYER_3, DNN_HIDDEN_LAYER_4] if h > 0]
DNN_USER_DIM = 32          # User embedding feature dimension
DNN_ITEM_DIM = 16          # Item embedding feature dimension
DNN_DROPOUT = 0.8
DNN_BATCH_NORM = 1         # 1 to use batch normalization, 0 if not.

In [3]:
if MODEL_DIR is None:
    TMP_DIR = TemporaryDirectory()
    model_dir = TMP_DIR.name
else:
    if os.path.exists(MODEL_DIR) and os.listdir(MODEL_DIR):
        raise ValueError(
            "Model exists in {}. Use different directory name or "
            "remove the existing checkpoint files first".format(MODEL_DIR)
        )
    TMP_DIR = None
    model_dir = MODEL_DIR

### 1. Prepare Data

In [4]:
train_data = pd.read_csv(DATA_DIR + 'train.csv')
train_data.head()

Unnamed: 0,userID,jobID,applied
0,fe292163d06253b716e9a0099b42031d,15de21c670ae7c3f6f3f1f37029303c9,0
1,6377fa90618fae77571e8dc90d98d409,55b37c5c270e5d84c793e486d798c01d,0
2,8ec0888a5b04139be0dfe942c7eb4199,0fcbc61acd0479dc77e3cccc0f5ffca7,1
3,f862b39f767d3a1991bdeb2ea1401c9c,3b5dca501ee1e6d8cd7b905f4e1bf723,0
4,cac14930c65d72c16efac2c51a6b7f71,287e03db1d99e0ec2edb90d079e142f3,0


In [5]:
test_data = pd.read_csv(DATA_DIR + 'test_job.csv')
test_data.head()

Unnamed: 0,userID,jobID
0,ebaee1af0c501f22ddfe242fc16dae53,352407221afb776e3143e8a1a0577885
1,9ab05403ac7808cbfba3da26665f7a9c,96b9bff013acedfb1d140579e2fbeb63
2,33349e909eba71677299d2fc97e158b7,58d4d1e7b1e97b258c9ed0b37e02d087
3,ac985a9db5faeb44c94a334430ccc241,ccb0989662211f61edae2e26d58ea92f
4,d41e0e6f6f1e29098d9d152511503ab2,4a213d37242bdcad8e7300e202e7caa4


In [6]:
test_data.describe()

Unnamed: 0,userID,jobID
count,2435,2435
unique,196,591
top,b052e2e0c0ad1b2d5036bd56e27d061c,da0d1111d2dc5d489242e60ebcbaf988
freq,49,24


#### 1.1 Load features 

##### job tags

In [7]:
job_tags_df = pd.read_csv(DATA_DIR + 'job_tags.csv')
job_tags_df.head(2)

Unnamed: 0,jobID,tagID
0,320722549d1751cf3f247855f937b982,d38901788c533e8286cb6400b40b386d
1,e744f91c29ec99f0e662c9177946c627,3948ead63a9f2944218de038d8934305


##### tagID

In [8]:
tags_df = pd.read_csv(DATA_DIR + 'tags.csv')
tags_df.head(2)

Unnamed: 0,tagID,keyword
0,602d1305678a8d5fdb372271e980da6a,Amazon Web Services(AWS)
1,e3251075554389fe91d17a794861d47b,Tensorflow


##### company

In [9]:
companies_df = pd.read_csv(DATA_DIR + 'job_companies.csv')
companies_df.head(2)

Unnamed: 0,companyID,jobID,companySize
0,00411460f7c92d2124a67ea0f4cb5f85,e5f6ad6ce374177eef023bf5d0c018b6,
1,1905aedab9bf2477edc068a355bba31a,185e65bc40581880c4f2c82958de8cfe,11-50


In [10]:
companies_df['companySize'].fillna(1, inplace = True) # NaN 값을 1로 대체

In [11]:
def companies_resize(s) : # 구간값을 특정 값으로 fix
    if s == 1 : return 1
    elif s == '1-10' : return 10
    elif s == '11-50' : return 50
    elif s == '51-100' : return 100
    elif s == '101-200' : return 200
    elif s == '201-500' : return 500
    elif s == '501-1000' : return 1000
    else : return 1001

In [12]:
companies_df['companySize'] = companies_df['companySize'].apply(lambda s : companies_resize(s))

#### 1.2 start encoding

In [13]:
job_feature = pd.DataFrame(job_tags_df['jobID'].unique(), columns=['jobID'])

In [14]:
job_feature['tagID'] = job_feature['jobID'].apply(lambda jid : 
                                    job_tags_df.loc[job_tags_df['jobID']== jid]['tagID'].to_string(index=False).strip())

In [15]:
# using total tagID
tag_encoder = sklearn.preprocessing.MultiLabelBinarizer(classes=list(tags_df['tagID'].unique()))

In [16]:
job_feature['tagID'] = tag_encoder.fit_transform( job_feature['tagID'].apply(lambda s: s.split("\n "))).tolist()

In [17]:
job_feature['companyID'] = job_feature['jobID'].apply(lambda jid : 
                                                  companies_df.loc[companies_df['jobID']== jid]['companyID'].to_string(index=False).strip())

In [18]:
companyID_encoder = sklearn.preprocessing.LabelEncoder()

In [19]:
job_feature['companyID'] = companyID_encoder.fit_transform(job_feature['companyID'])

In [20]:
companyID_OH = sklearn.preprocessing.OneHotEncoder(sparse=False)

In [21]:
job_feature['companyID'] = companyID_OH.fit_transform(job_feature['companyID'].to_numpy().reshape(-1, 1)).tolist()

In [22]:
job_feature['companySize'] = job_feature['jobID'].apply(lambda jid :
                                                  companies_df.loc[companies_df['jobID']== jid]['companySize'].to_string(index=False).strip())

In [23]:
companySize_encoder = sklearn.preprocessing.LabelEncoder()

In [24]:
job_feature['companySize'] = companySize_encoder.fit_transform(job_feature['companySize'])

In [25]:
companySize_OH = sklearn.preprocessing.OneHotEncoder(sparse=False)

In [26]:
job_feature['companySize'] = companySize_OH.fit_transform(job_feature['companySize'].to_numpy().reshape(-1, 1)).tolist()

##### train data

In [27]:
train_data = pd.merge(train_data, job_feature, how='left')

In [28]:
train_data.head()

Unnamed: 0,userID,jobID,applied,tagID,companyID,companySize
0,fe292163d06253b716e9a0099b42031d,15de21c670ae7c3f6f3f1f37029303c9,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
1,6377fa90618fae77571e8dc90d98d409,55b37c5c270e5d84c793e486d798c01d,0,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]"
2,8ec0888a5b04139be0dfe942c7eb4199,0fcbc61acd0479dc77e3cccc0f5ffca7,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
3,f862b39f767d3a1991bdeb2ea1401c9c,3b5dca501ee1e6d8cd7b905f4e1bf723,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
4,cac14930c65d72c16efac2c51a6b7f71,287e03db1d99e0ec2edb90d079e142f3,0,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]"


##### test data

In [29]:
test_data = pd.merge(test_data, job_feature, how='left')

In [30]:
test_data.head()

Unnamed: 0,userID,jobID,tagID,companyID,companySize
0,ebaee1af0c501f22ddfe242fc16dae53,352407221afb776e3143e8a1a0577885,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
1,9ab05403ac7808cbfba3da26665f7a9c,96b9bff013acedfb1d140579e2fbeb63,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
2,33349e909eba71677299d2fc97e158b7,58d4d1e7b1e97b258c9ed0b37e02d087,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
3,ac985a9db5faeb44c94a334430ccc241,ccb0989662211f61edae2e26d58ea92f,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
4,d41e0e6f6f1e29098d9d152511503ab2,4a213d37242bdcad8e7300e202e7caa4,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"


#### 1.3 Train and Test Split

In [31]:
train, valid = python_random_split(train_data, ratio=0.999, seed=RANDOM_SEED)

print("{} train samples and {} test samples".format(len(train), len(valid)))

5994 train samples and 6 test samples


In [32]:
# Unique items in the dataset
if ITEM_FEAT_COL is None:
    items = train_data.drop_duplicates(ITEM_COL)[[ITEM_COL]].reset_index(drop=True)
    item_feat_shape = None
else:
    if not ITEM_FEAT_FLAG :
        items = train_data.drop_duplicates(ITEM_COL)[[ITEM_COL, ITEM_FEAT_COL]].reset_index(drop=True)
        item_feat_shape = len(items[ITEM_FEAT_COL][0])
    else :
        ITEM_FEAT_COL_LIST = ITEM_FEAT_COL.split(',')
        items = train_data.drop_duplicates(ITEM_COL)[[ITEM_COL] + ITEM_FEAT_COL_LIST].reset_index(drop=True)
        item_feat_shape = [len(items[col][0]) for col in ITEM_FEAT_COL_LIST]
        
# Unique users in the dataset
users = train_data.drop_duplicates(USER_COL)[[USER_COL]].reset_index(drop=True)

print("Total {} items and {} users in the dataset".format(len(items), len(users)))

Total 708 items and 196 users in the dataset


### 2. Build Model

In [33]:
# Create model checkpoint every n steps.
save_checkpoints_steps = max(1, STEPS // 5)

In [34]:
# Define wide (linear) and deep (dnn) features
wide_columns, deep_columns = wide_deep.build_feature_columns(
    users=users[USER_COL].values,
    items=items[ITEM_COL].values,
    user_col=USER_COL,
    item_col=ITEM_COL,
    item_feat_col=ITEM_FEAT_COL,
    crossed_feat_dim=1000,
    user_dim=DNN_USER_DIM,
    item_dim=DNN_ITEM_DIM,
    item_feat_shape=item_feat_shape,
    model_type=MODEL_TYPE,
    item_feat_flag=ITEM_FEAT_FLAG
)

print("Wide feature specs:")
for c in wide_columns:
    print("\t", str(c)[:100], "...")
print("Deep feature specs:")
for c in deep_columns:
    print("\t", str(c)[:100], "...")

Wide feature specs:
	 VocabularyListCategoricalColumn(key='userID', vocabulary_list=('fe292163d06253b716e9a0099b42031d', ' ...
	 VocabularyListCategoricalColumn(key='jobID', vocabulary_list=('15de21c670ae7c3f6f3f1f37029303c9', '5 ...
	 CrossedColumn(keys=(VocabularyListCategoricalColumn(key='userID', vocabulary_list=('fe292163d06253b7 ...
Deep feature specs:
	 EmbeddingColumn(categorical_column=VocabularyListCategoricalColumn(key='userID', vocabulary_list=('f ...
	 EmbeddingColumn(categorical_column=VocabularyListCategoricalColumn(key='jobID', vocabulary_list=('15 ...
	 NumericColumn(key='tagID', shape=(887,), default_value=None, dtype=tf.float32, normalizer_fn=None) ...
	 NumericColumn(key='companyID', shape=(276,), default_value=None, dtype=tf.float32, normalizer_fn=Non ...
	 NumericColumn(key='companySize', shape=(8,), default_value=None, dtype=tf.float32, normalizer_fn=Non ...


In [35]:
# Build a model based on the parameters
model = wide_deep.build_model(
    model_dir=model_dir,
    wide_columns=wide_columns,
    deep_columns=deep_columns,
    linear_optimizer=tf_utils.build_optimizer(LINEAR_OPTIMIZER, LINEAR_OPTIMIZER_LR, **{
        'l1_regularization_strength': LINEAR_L1_REG,
        'l2_regularization_strength': LINEAR_L2_REG,
        'momentum': LINEAR_MOMENTUM,
    }),
    dnn_optimizer=tf_utils.build_optimizer(DNN_OPTIMIZER, DNN_OPTIMIZER_LR, **{
        'l1_regularization_strength': DNN_L1_REG,
        'l2_regularization_strength': DNN_L2_REG,
        'momentum': DNN_MOMENTUM,  
    }),
    dnn_hidden_units=DNN_HIDDEN_UNITS,
    dnn_dropout=DNN_DROPOUT,
    dnn_batch_norm=(DNN_BATCH_NORM==1),
    log_every_n_iter=max(1, STEPS//10),  # log 10 times
    save_checkpoints_steps=save_checkpoints_steps,
    seed=RANDOM_SEED
)

INFO:tensorflow:Using config: {'_model_dir': '/var/folders/_q/czkytp194_s6kb2464zr1n980000gn/T/tmp9qxkpeah', '_tf_random_seed': 3737, '_save_summary_steps': 100, '_save_checkpoints_steps': 200, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1338e9470>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [36]:
model

<tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifier at 0x1338e9a20>

### 3. Train and Evaluate Model

In [37]:
cols = {
    'col_user': USER_COL,
    'col_item': ITEM_COL,
    'col_rating': RATING_COL,
    'col_prediction': PREDICT_COL,
}

In [38]:
# Define training hooks to track performance while training
hooks = []
if EVALUATE_WHILE_TRAINING:
    evaluation_logger = tf_utils.MetricsLogger()
    for metrics in (RANKING_METRICS, RATING_METRICS):
        if len(metrics) > 0:
            hooks.append(
                tf_utils.evaluation_log_hook(
                    model,
                    logger=evaluation_logger,
                    true_df=valid,
                    y_col=RATING_COL,
                    eval_df=ranking_pool if metrics==RANKING_METRICS else valid,
                    every_n_iter=save_checkpoints_steps,
                    model_dir=model_dir,
                    eval_fns=[evaluator.metrics[m] for m in metrics],
                    **({**cols, 'k': TOP_K} if metrics==RANKING_METRICS else cols)
                )
            )

# Define training input (sample feeding) function
train_fn = tf_utils.pandas_input_fn(
    df=train,
    y_col=RATING_COL,
    batch_size=BATCH_SIZE,
    num_epochs=None,  # We use steps=TRAIN_STEPS instead.
    shuffle=True,
    seed=RANDOM_SEED,
)

Let's train the model.

In [39]:
print(
    "Training steps = {}, Batch size = {} (num epochs = {})"
    .format(STEPS, BATCH_SIZE, (STEPS*BATCH_SIZE)//len(train))
)
tf.logging.set_verbosity(tf.logging.INFO)

try:
    model.train(
        input_fn=train_fn,
        hooks=hooks,
        steps=STEPS
    )
except tf.train.NanLossDuringTrainingError:
    import warnings
    warnings.warn(
        "Training stopped with NanLossDuringTrainingError. "
        "Try other optimizers, smaller batch size and/or smaller learning rate."
    )

Training steps = 1000, Batch size = 32 (num epochs = 5)
Instructions for updating:
Colocations handled automatically by placer.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/_q/czkytp194_s6kb2464zr1n980000gn/T/tmp9qxkpeah/model.ckpt.
INFO:tensorflow:loss = 26.35226, step = 1
INFO:tensorflow:global_step/sec: 40.5026
INFO:tensorflow:loss = 17.212816, step = 101 (2.46

### 4. Test

In [40]:
predictions = list(model.predict(input_fn=tf_utils.pandas_input_fn(df=test_data)))
prediction_df = test_data.copy()
prediction_df['applied'] = [p['class_ids'][0] for p in predictions]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /var/folders/_q/czkytp194_s6kb2464zr1n980000gn/T/tmp9qxkpeah/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [41]:
prediction_df.head()

Unnamed: 0,userID,jobID,tagID,companyID,companySize,applied
0,ebaee1af0c501f22ddfe242fc16dae53,352407221afb776e3143e8a1a0577885,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]",0
1,9ab05403ac7808cbfba3da26665f7a9c,96b9bff013acedfb1d140579e2fbeb63,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]",0
2,33349e909eba71677299d2fc97e158b7,58d4d1e7b1e97b258c9ed0b37e02d087,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]",0
3,ac985a9db5faeb44c94a334430ccc241,ccb0989662211f61edae2e26d58ea92f,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]",0
4,d41e0e6f6f1e29098d9d152511503ab2,4a213d37242bdcad8e7300e202e7caa4,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]",0


In [42]:
res = prediction_df['applied']

In [43]:
res.loc[res==1].count()

0

In [44]:
res.to_csv(RESULT_DIR+'wideNdeep_multi_final.csv', index=False)

  """Entry point for launching an IPython kernel.
