#  Forked from [Baseline Modeling](https://www.kaggle.com/wrosinski/baselinemodeling)

## Added Image features from [Extract Image features from pretrained NN](https://www.kaggle.com/christofhenkel/extract-image-features-from-pretrained-nn)

## Added Image size features from [Extract Image Features](https://www.kaggle.com/kaerunantoka/extract-image-features)

In [1]:
import gc
import glob
import os
import json
import matplotlib.pyplot as plt
import seaborn as sns
import pprint
import warnings

import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from joblib import Parallel, delayed
from tqdm import tqdm, tqdm_notebook

%matplotlib inline

np.random.seed(seed=1337)
warnings.filterwarnings('ignore')

split_char = '/'
PETFINDER_ROOT_PATH = 'PATHTO/petfinder-adoption-prediction'
KERAS_PRETRAINED_PATH = 'PATHTO/keras-pretrained/densenet-keras'

In [2]:
train = pd.read_csv(PETFINDER_ROOT_PATH + '/train/train.csv')
test = pd.read_csv(PETFINDER_ROOT_PATH + '/test/test.csv')
sample_submission = pd.read_csv(PETFINDER_ROOT_PATH + '/test/sample_submission.csv')

## Image features

In [3]:
import cv2
import os
from keras.applications.densenet import preprocess_input, DenseNet121

Using TensorFlow backend.


In [4]:
def resize_to_square(im):
    old_size = im.shape[:2]
    ratio = float(img_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])
    im = cv2.resize(im, (new_size[1], new_size[0]))
    delta_w = img_size - new_size[1]
    delta_h = img_size - new_size[0]
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)
    color = [0, 0, 0]
    new_im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT,value=color)
    return new_im

def load_image(path, pet_id):
    image = cv2.imread(f'{path}{pet_id}-1.jpg')
    new_image = resize_to_square(image)
    new_image = preprocess_input(new_image)
    return new_image

In [5]:
img_size = 256
batch_size = 256

In [6]:
from keras.models import Model
from keras.layers import GlobalAveragePooling2D, Input, Lambda, AveragePooling1D
import keras.backend as K
inp = Input((256,256,3))
backbone = DenseNet121(input_tensor = inp, 
                       weights=KERAS_PRETRAINED_PATH + "/DenseNet-BC-121-32-no-top.h5",
                       include_top = False)
x = backbone.output
x = GlobalAveragePooling2D()(x)
x = Lambda(lambda x: K.expand_dims(x,axis = -1))(x)
x = AveragePooling1D(4)(x)
out = Lambda(lambda x: x[:,:,0])(x)

m = Model(inp,out)

In [7]:
pet_ids = train['PetID'].values
n_batches = len(pet_ids) // batch_size + 1

features = {}
for b in tqdm(range(n_batches)):
    start = b*batch_size
    end = (b+1)*batch_size
    batch_pets = pet_ids[start:end]
    batch_images = np.zeros((len(batch_pets),img_size,img_size,3))
    for i,pet_id in enumerate(batch_pets):
        try:
            batch_images[i] = load_image(PETFINDER_ROOT_PATH +  + "/train_images/", pet_id)
        except:
            pass
    batch_preds = m.predict(batch_images)
    for i,pet_id in enumerate(batch_pets):
        features[pet_id] = batch_preds[i]

100%|██████████| 59/59 [01:01<00:00,  1.04it/s]


In [8]:
train_feats = pd.DataFrame.from_dict(features, orient='index')
train_feats.columns = [f'pic_{i}' for i in range(train_feats.shape[1])]

In [9]:
pet_ids = test['PetID'].values
n_batches = len(pet_ids) // batch_size + 1

features = {}
for b in tqdm(range(n_batches)):
    start = b*batch_size
    end = (b+1)*batch_size
    batch_pets = pet_ids[start:end]
    batch_images = np.zeros((len(batch_pets),img_size,img_size,3))
    for i,pet_id in enumerate(batch_pets):
        try:
            batch_images[i] = load_image(PETFINDER_ROOT_PATH + "/test_images/", pet_id)
        except:
            pass
    batch_preds = m.predict(batch_images)
    for i,pet_id in enumerate(batch_pets):
        features[pet_id] = batch_preds[i]

100%|██████████| 16/16 [00:26<00:00,  1.46s/it]


In [10]:
test_feats = pd.DataFrame.from_dict(features, orient='index')
test_feats.columns = [f'pic_{i}' for i in range(test_feats.shape[1])]

In [11]:
train_feats = train_feats.reset_index()
train_feats.rename({'index': 'PetID'}, axis='columns', inplace=True)

test_feats = test_feats.reset_index()
test_feats.rename({'index': 'PetID'}, axis='columns', inplace=True)

In [12]:
all_ids = pd.concat([train, test], axis=0, ignore_index=True, sort=False)[['PetID']]
all_ids.shape

(18941, 1)

In [13]:
n_components = 32
svd_ = TruncatedSVD(n_components=n_components, random_state=1337)

features_df = pd.concat([train_feats, test_feats], axis=0)
features = features_df[[f'pic_{i}' for i in range(256)]].values

svd_col = svd_.fit_transform(features)
svd_col = pd.DataFrame(svd_col)
svd_col = svd_col.add_prefix('IMG_SVD_')

img_features = pd.concat([all_ids, svd_col], axis=1)

## About metadata and sentiment

In [14]:
labels_breed = pd.read_csv(PETFINDER_ROOT_PATH + '/breed_labels.csv')
labels_state = pd.read_csv(PETFINDER_ROOT_PATH + '/color_labels.csv')
labels_color = pd.read_csv(PETFINDER_ROOT_PATH + '/state_labels.csv')

In [15]:
train_image_files = sorted(glob.glob(PETFINDER_ROOT_PATH + '/train_images/*.jpg'))
train_metadata_files = sorted(glob.glob(PETFINDER_ROOT_PATH + '/train_metadata/*.json'))
train_sentiment_files = sorted(glob.glob(PETFINDER_ROOT_PATH + '/train_sentiment/*.json'))

print(f'num of train images files: {len(train_image_files)}')
print(f'num of train metadata files: {len(train_metadata_files)}')
print(f'num of train sentiment files: {len(train_sentiment_files)}')


test_image_files = sorted(glob.glob(PETFINDER_ROOT_PATH + '/test_images/*.jpg'))
test_metadata_files = sorted(glob.glob(PETFINDER_ROOT_PATH + '/test_metadata/*.json'))
test_sentiment_files = sorted(glob.glob(PETFINDER_ROOT_PATH + '/test_sentiment/*.json'))

print(f'num of test images files: {len(test_image_files)}')
print(f'num of test metadata files: {len(test_metadata_files)}')
print(f'num of test sentiment files: {len(test_sentiment_files)}')

num of train images files: 58311
num of train metadata files: 58311
num of train sentiment files: 14442
num of test images files: 15040
num of test metadata files: 15040
num of test sentiment files: 3815


### Train

In [16]:
# Images:
train_df_ids = train[['PetID']]
print(train_df_ids.shape)

# Metadata:
train_df_ids = train[['PetID']]
train_df_metadata = pd.DataFrame(train_metadata_files)
train_df_metadata.columns = ['metadata_filename']
train_metadata_pets = train_df_metadata['metadata_filename'].apply(lambda x: x.split(split_char)[-1].split('-')[0])
train_df_metadata = train_df_metadata.assign(PetID=train_metadata_pets)
print(len(train_metadata_pets.unique()))

pets_with_metadatas = len(np.intersect1d(train_metadata_pets.unique(), train_df_ids['PetID'].unique()))
print(f'fraction of pets with metadata: {pets_with_metadatas / train_df_ids.shape[0]:.3f}')

# Sentiment:
train_df_ids = train[['PetID']]
train_df_sentiment = pd.DataFrame(train_sentiment_files)
train_df_sentiment.columns = ['sentiment_filename']
train_sentiment_pets = train_df_sentiment['sentiment_filename'].apply(lambda x: x.split(split_char)[-1].split('.')[0])
train_df_sentiment = train_df_sentiment.assign(PetID=train_sentiment_pets)
print(len(train_sentiment_pets.unique()))

pets_with_sentiments = len(np.intersect1d(train_sentiment_pets.unique(), train_df_ids['PetID'].unique()))
print(f'fraction of pets with sentiment: {pets_with_sentiments / train_df_ids.shape[0]:.3f}')

(14993, 1)
14652
fraction of pets with metadata: 0.977
14442
fraction of pets with sentiment: 0.963


### Test

In [17]:
# Images:
test_df_ids = test[['PetID']]
print(test_df_ids.shape)

# Metadata:
test_df_metadata = pd.DataFrame(test_metadata_files)
test_df_metadata.columns = ['metadata_filename']
test_metadata_pets = test_df_metadata['metadata_filename'].apply(lambda x: x.split(split_char)[-1].split('-')[0])
test_df_metadata = test_df_metadata.assign(PetID=test_metadata_pets)
print(len(test_metadata_pets.unique()))

pets_with_metadatas = len(np.intersect1d(test_metadata_pets.unique(), test_df_ids['PetID'].unique()))
print(f'fraction of pets with metadata: {pets_with_metadatas / test_df_ids.shape[0]:.3f}')

# Sentiment:
test_df_sentiment = pd.DataFrame(test_sentiment_files)
test_df_sentiment.columns = ['sentiment_filename']
test_sentiment_pets = test_df_sentiment['sentiment_filename'].apply(lambda x: x.split(split_char)[-1].split('.')[0])
test_df_sentiment = test_df_sentiment.assign(PetID=test_sentiment_pets)
print(len(test_sentiment_pets.unique()))

pets_with_sentiments = len(np.intersect1d(test_sentiment_pets.unique(), test_df_ids['PetID'].unique()))
print(f'fraction of pets with sentiment: {pets_with_sentiments / test_df_ids.shape[0]:.3f}')

(3948, 1)
3821
fraction of pets with metadata: 0.968
3815
fraction of pets with sentiment: 0.966


## Extract features from json

In [18]:
class PetFinderParser(object):
    
    def __init__(self, debug=False):
        
        self.debug = debug
        self.sentence_sep = ' '
        
        self.extract_sentiment_text = False
    
    def open_json_file(self, filename):
        with open(filename, 'r', encoding='utf-8') as f:
            json_file = json.load(f)
        return json_file
        
    def parse_sentiment_file(self, file):
        """
        Parse sentiment file. Output DF with sentiment features.
        """
        
        file_sentiment = file['documentSentiment']
        file_entities = [x['name'] for x in file['entities']]
        file_entities = self.sentence_sep.join(file_entities)
        
        file_sentences_sentiment = [x['sentiment'] for x in file['sentences']]
        
        file_sentences_sentiment = pd.DataFrame.from_dict(
            file_sentences_sentiment, orient='columns')
        file_sentences_sentiment_df = pd.DataFrame(
            {
                'magnitude_sum': file_sentences_sentiment['magnitude'].sum(axis=0),
                'score_sum': file_sentences_sentiment['score'].sum(axis=0),
                'magnitude_mean': file_sentences_sentiment['magnitude'].mean(axis=0),
                'score_mean': file_sentences_sentiment['score'].mean(axis=0),
                'magnitude_var': file_sentences_sentiment['magnitude'].var(axis=0),
                'score_var': file_sentences_sentiment['score'].var(axis=0),
            }, index=[0]
        )
        
        df_sentiment = pd.DataFrame.from_dict(file_sentiment, orient='index').T
        df_sentiment = pd.concat([df_sentiment, file_sentences_sentiment_df], axis=1)
            
        df_sentiment['entities'] = file_entities
        df_sentiment = df_sentiment.add_prefix('sentiment_')
        
        return df_sentiment
    
    def parse_metadata_file(self, file):
        """
        Parse metadata file. Output DF with metadata features.
        """
        
        file_keys = list(file.keys())
        
        if 'labelAnnotations' in file_keys:
            file_annots = file['labelAnnotations']
            file_top_score = np.asarray([x['score'] for x in file_annots]).mean()
            file_top_desc = [x['description'] for x in file_annots]
        else:
            file_top_score = np.nan
            file_top_desc = ['']
        
        file_colors = file['imagePropertiesAnnotation']['dominantColors']['colors']
        file_crops = file['cropHintsAnnotation']['cropHints']

        file_color_score = np.asarray([x['score'] for x in file_colors]).mean()
        file_color_pixelfrac = np.asarray([x['pixelFraction'] for x in file_colors]).mean()

        file_crop_conf = np.asarray([x['confidence'] for x in file_crops]).mean()
        
        if 'importanceFraction' in file_crops[0].keys():
            file_crop_importance = np.asarray([x['importanceFraction'] for x in file_crops]).mean()
        else:
            file_crop_importance = np.nan

        df_metadata = {
            'annots_score': file_top_score,
            'color_score': file_color_score,
            'color_pixelfrac': file_color_pixelfrac,
            'crop_conf': file_crop_conf,
            'crop_importance': file_crop_importance,
            'annots_top_desc': self.sentence_sep.join(file_top_desc)
        }
        
        df_metadata = pd.DataFrame.from_dict(df_metadata, orient='index').T
        df_metadata = df_metadata.add_prefix('metadata_')
        
        return df_metadata
    

def extract_additional_features(pet_id, mode='train'):
    
    sentiment_filename = f'{PETFINDER_ROOT_PATH}/{mode}_sentiment/{pet_id}.json'
    try:
        sentiment_file = pet_parser.open_json_file(sentiment_filename)
        df_sentiment = pet_parser.parse_sentiment_file(sentiment_file)
        df_sentiment['PetID'] = pet_id
    except FileNotFoundError:
        df_sentiment = []

    dfs_metadata = []
    metadata_filenames = sorted(glob.glob(f'{PETFINDER_ROOT_PATH}/{mode}_metadata/{pet_id}*.json'))
    if len(metadata_filenames) > 0:
        for f in metadata_filenames:
            metadata_file = pet_parser.open_json_file(f)
            df_metadata = pet_parser.parse_metadata_file(metadata_file)
            df_metadata['PetID'] = pet_id
            dfs_metadata.append(df_metadata)
        dfs_metadata = pd.concat(dfs_metadata, ignore_index=True, sort=False)
    dfs = [df_sentiment, dfs_metadata]
    
    return dfs


pet_parser = PetFinderParser()

In [19]:
debug = False
train_pet_ids = train.PetID.unique()
test_pet_ids = test.PetID.unique()

if debug:
    train_pet_ids = train_pet_ids[:1000]
    test_pet_ids = test_pet_ids[:500]


dfs_train = Parallel(n_jobs=-1, verbose=1)(
    delayed(extract_additional_features)(i, mode='train') for i in train_pet_ids)

train_dfs_sentiment = [x[0] for x in dfs_train if isinstance(x[0], pd.DataFrame)]
train_dfs_metadata = [x[1] for x in dfs_train if isinstance(x[1], pd.DataFrame)]

train_dfs_sentiment = pd.concat(train_dfs_sentiment, ignore_index=True, sort=False)
train_dfs_metadata = pd.concat(train_dfs_metadata, ignore_index=True, sort=False)

print(train_dfs_sentiment.shape, train_dfs_metadata.shape)


dfs_test = Parallel(n_jobs=-1, verbose=1)(
    delayed(extract_additional_features)(i, mode='test') for i in test_pet_ids)

test_dfs_sentiment = [x[0] for x in dfs_test if isinstance(x[0], pd.DataFrame)]
test_dfs_metadata = [x[1] for x in dfs_test if isinstance(x[1], pd.DataFrame)]

test_dfs_sentiment = pd.concat(test_dfs_sentiment, ignore_index=True, sort=False)
test_dfs_metadata = pd.concat(test_dfs_metadata, ignore_index=True, sort=False)

print(test_dfs_sentiment.shape, test_dfs_metadata.shape)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:   13.3s
[Parallel(n_jobs=-1)]: Done 1234 tasks      | elapsed:   20.7s
[Parallel(n_jobs=-1)]: Done 1784 tasks      | elapsed:   29.5s
[Parallel(n_jobs=-1)]: Done 2434 tasks      | elapsed:   39.8s
[Parallel(n_jobs=-1)]: Done 3184 tasks      | elapsed:   51.8s
[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 4984 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 6034 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 7184 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 8434 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 9784 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 11234 tasks      |

(14442, 10) (58311, 7)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 1024 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done 2524 tasks      | elapsed:   14.5s
[Parallel(n_jobs=-1)]: Done 3948 out of 3948 | elapsed:   22.2s finished


(3815, 10) (15040, 7)


### group extracted features by PetID:

In [20]:
aggregates = ['sum', 'mean', 'var']
sent_agg = ['sum']


# Train
train_metadata_desc = train_dfs_metadata.groupby(['PetID'])['metadata_annots_top_desc'].unique()
train_metadata_desc = train_metadata_desc.reset_index()
train_metadata_desc[
    'metadata_annots_top_desc'] = train_metadata_desc[
    'metadata_annots_top_desc'].apply(lambda x: ' '.join(x))

prefix = 'metadata'
train_metadata_gr = train_dfs_metadata.drop(['metadata_annots_top_desc'], axis=1)
for i in train_metadata_gr.columns:
    if 'PetID' not in i:
        train_metadata_gr[i] = train_metadata_gr[i].astype(float)
train_metadata_gr = train_metadata_gr.groupby(['PetID']).agg(aggregates)
train_metadata_gr.columns = pd.Index([f'{c[0]}_{c[1].upper()}' for c in train_metadata_gr.columns.tolist()])
train_metadata_gr = train_metadata_gr.reset_index()


train_sentiment_desc = train_dfs_sentiment.groupby(['PetID'])['sentiment_entities'].unique()
train_sentiment_desc = train_sentiment_desc.reset_index()
train_sentiment_desc[
    'sentiment_entities'] = train_sentiment_desc[
    'sentiment_entities'].apply(lambda x: ' '.join(x))

prefix = 'sentiment'
train_sentiment_gr = train_dfs_sentiment.drop(['sentiment_entities'], axis=1)
for i in train_sentiment_gr.columns:
    if 'PetID' not in i:
        train_sentiment_gr[i] = train_sentiment_gr[i].astype(float)
train_sentiment_gr = train_sentiment_gr.groupby(['PetID']).agg(sent_agg)
train_sentiment_gr.columns = pd.Index([f'{c[0]}' for c in train_sentiment_gr.columns.tolist()])
train_sentiment_gr = train_sentiment_gr.reset_index()


# Test
test_metadata_desc = test_dfs_metadata.groupby(['PetID'])['metadata_annots_top_desc'].unique()
test_metadata_desc = test_metadata_desc.reset_index()
test_metadata_desc[
    'metadata_annots_top_desc'] = test_metadata_desc[
    'metadata_annots_top_desc'].apply(lambda x: ' '.join(x))

prefix = 'metadata'
test_metadata_gr = test_dfs_metadata.drop(['metadata_annots_top_desc'], axis=1)
for i in test_metadata_gr.columns:
    if 'PetID' not in i:
        test_metadata_gr[i] = test_metadata_gr[i].astype(float)
test_metadata_gr = test_metadata_gr.groupby(['PetID']).agg(aggregates)
test_metadata_gr.columns = pd.Index([f'{c[0]}_{c[1].upper()}' for c in test_metadata_gr.columns.tolist()])
test_metadata_gr = test_metadata_gr.reset_index()


test_sentiment_desc = test_dfs_sentiment.groupby(['PetID'])['sentiment_entities'].unique()
test_sentiment_desc = test_sentiment_desc.reset_index()
test_sentiment_desc[
    'sentiment_entities'] = test_sentiment_desc[
    'sentiment_entities'].apply(lambda x: ' '.join(x))

prefix = 'sentiment'
test_sentiment_gr = test_dfs_sentiment.drop(['sentiment_entities'], axis=1)
for i in test_sentiment_gr.columns:
    if 'PetID' not in i:
        test_sentiment_gr[i] = test_sentiment_gr[i].astype(float)
test_sentiment_gr = test_sentiment_gr.groupby(['PetID']).agg(sent_agg)
test_sentiment_gr.columns = pd.Index([f'{c[0]}' for c in test_sentiment_gr.columns.tolist()])
test_sentiment_gr = test_sentiment_gr.reset_index()

### merge processed DFs with base train/test DF:

In [21]:
# Train merges:
train_proc = train.copy()
train_proc = train_proc.merge(
    train_sentiment_gr, how='left', on='PetID')
train_proc = train_proc.merge(
    train_metadata_gr, how='left', on='PetID')
train_proc = train_proc.merge(
    train_metadata_desc, how='left', on='PetID')
train_proc = train_proc.merge(
    train_sentiment_desc, how='left', on='PetID')

# Test merges:
test_proc = test.copy()
test_proc = test_proc.merge(
    test_sentiment_gr, how='left', on='PetID')
test_proc = test_proc.merge(
    test_metadata_gr, how='left', on='PetID')
test_proc = test_proc.merge(
    test_metadata_desc, how='left', on='PetID')
test_proc = test_proc.merge(
    test_sentiment_desc, how='left', on='PetID')

print(train_proc.shape, test_proc.shape)
assert train_proc.shape[0] == train.shape[0]
assert test_proc.shape[0] == test.shape[0]

(14993, 49) (3948, 48)


In [22]:
train_breed_main = train_proc[['Breed1']].merge(
    labels_breed, how='left',
    left_on='Breed1', right_on='BreedID',
    suffixes=('', '_main_breed'))

train_breed_main = train_breed_main.iloc[:, 2:]
train_breed_main = train_breed_main.add_prefix('main_breed_')

train_breed_second = train_proc[['Breed2']].merge(
    labels_breed, how='left',
    left_on='Breed2', right_on='BreedID',
    suffixes=('', '_second_breed'))

train_breed_second = train_breed_second.iloc[:, 2:]
train_breed_second = train_breed_second.add_prefix('second_breed_')


train_proc = pd.concat(
    [train_proc, train_breed_main, train_breed_second], axis=1)


test_breed_main = test_proc[['Breed1']].merge(
    labels_breed, how='left',
    left_on='Breed1', right_on='BreedID',
    suffixes=('', '_main_breed'))

test_breed_main = test_breed_main.iloc[:, 2:]
test_breed_main = test_breed_main.add_prefix('main_breed_')

test_breed_second = test_proc[['Breed2']].merge(
    labels_breed, how='left',
    left_on='Breed2', right_on='BreedID',
    suffixes=('', '_second_breed'))

test_breed_second = test_breed_second.iloc[:, 2:]
test_breed_second = test_breed_second.add_prefix('second_breed_')


test_proc = pd.concat(
    [test_proc, test_breed_main, test_breed_second], axis=1)

print(train_proc.shape, test_proc.shape)

(14993, 53) (3948, 52)


In [23]:
X = pd.concat([train_proc, test_proc], ignore_index=True, sort=False)

In [24]:
X_temp = X.copy()

text_columns = ['Description', 'metadata_annots_top_desc', 'sentiment_entities']
categorical_columns = ['main_breed_BreedName', 'second_breed_BreedName']

to_drop_columns = ['PetID', 'Name', 'RescuerID']

In [25]:
rescuer_count = X.groupby(['RescuerID'])['PetID'].count().reset_index()
rescuer_count.columns = ['RescuerID', 'RescuerID_COUNT']

X_temp = X_temp.merge(rescuer_count, how='left', on='RescuerID')

In [26]:
for i in categorical_columns:
    X_temp.loc[:, i] = pd.factorize(X_temp.loc[:, i])[0]

In [27]:
X_text = X_temp[text_columns]

for i in X_text.columns:
    X_text.loc[:, i] = X_text.loc[:, i].fillna('none')

In [28]:
X_temp['Length_Description'] = X_text['Description'].map(len)
X_temp['Length_metadata_annots_top_desc'] = X_text['metadata_annots_top_desc'].map(len)
X_temp['Lengths_sentiment_entities'] = X_text['sentiment_entities'].map(len)

### TFIDF

In [29]:
n_components = 16
text_features = []

# Generate text features:
for i in X_text.columns:
    
    # Initialize decomposition methods:
    print(f'generating features from: {i}')
    tfv = TfidfVectorizer(min_df=2,  max_features=None,
                          strip_accents='unicode', analyzer='word', token_pattern=r'(?u)\b\w+\b',
                          ngram_range=(1, 3), use_idf=1, smooth_idf=1, sublinear_tf=1)
    svd_ = TruncatedSVD(
        n_components=n_components, random_state=1337)
    
    tfidf_col = tfv.fit_transform(X_text.loc[:, i].values)
    
    svd_col = svd_.fit_transform(tfidf_col)
    svd_col = pd.DataFrame(svd_col)
    svd_col = svd_col.add_prefix('TFIDF_{}_'.format(i))
    
    text_features.append(svd_col)
    
text_features = pd.concat(text_features, axis=1)

X_temp = pd.concat([X_temp, text_features], axis=1)

for i in X_text.columns:
    X_temp = X_temp.drop(i, axis=1)

generating features from: Description
generating features from: metadata_annots_top_desc
generating features from: sentiment_entities


### Merge image features

In [30]:
X_temp = X_temp.merge(img_features, how='left', on='PetID')

### Add image_size features

In [31]:
from PIL import Image
train_df_ids = train[['PetID']]
test_df_ids = test[['PetID']]

train_df_imgs = pd.DataFrame(train_image_files)
train_df_imgs.columns = ['image_filename']
train_imgs_pets = train_df_imgs['image_filename'].apply(lambda x: x.split(split_char)[-1].split('-')[0])

test_df_imgs = pd.DataFrame(test_image_files)
test_df_imgs.columns = ['image_filename']
test_imgs_pets = test_df_imgs['image_filename'].apply(lambda x: x.split(split_char)[-1].split('-')[0])

train_df_imgs = train_df_imgs.assign(PetID=train_imgs_pets)
test_df_imgs = test_df_imgs.assign(PetID=test_imgs_pets)

def getSize(filename):
    st = os.stat(filename)
    return st.st_size

def getDimensions(filename):
    img_size = Image.open(filename).size
    return img_size 

train_df_imgs['image_size'] = train_df_imgs['image_filename'].apply(getSize)
train_df_imgs['temp_size'] = train_df_imgs['image_filename'].apply(getDimensions)
train_df_imgs['width'] = train_df_imgs['temp_size'].apply(lambda x : x[0])
train_df_imgs['height'] = train_df_imgs['temp_size'].apply(lambda x : x[1])
train_df_imgs = train_df_imgs.drop(['temp_size'], axis=1)

test_df_imgs['image_size'] = test_df_imgs['image_filename'].apply(getSize)
test_df_imgs['temp_size'] = test_df_imgs['image_filename'].apply(getDimensions)
test_df_imgs['width'] = test_df_imgs['temp_size'].apply(lambda x : x[0])
test_df_imgs['height'] = test_df_imgs['temp_size'].apply(lambda x : x[1])
test_df_imgs = test_df_imgs.drop(['temp_size'], axis=1)

aggs = {
    'image_size': ['sum', 'mean', 'var'],
    'width': ['sum', 'mean', 'var'],
    'height': ['sum', 'mean', 'var'],
}

agg_train_imgs = train_df_imgs.groupby('PetID').agg(aggs)
new_columns = [
    k + '_' + agg for k in aggs.keys() for agg in aggs[k]
]
agg_train_imgs.columns = new_columns
agg_train_imgs = agg_train_imgs.reset_index()

agg_test_imgs = test_df_imgs.groupby('PetID').agg(aggs)
new_columns = [
    k + '_' + agg for k in aggs.keys() for agg in aggs[k]
]
agg_test_imgs.columns = new_columns
agg_test_imgs = agg_test_imgs.reset_index()

agg_imgs = pd.concat([agg_train_imgs, agg_test_imgs], axis=0).reset_index(drop=True)

In [32]:
X_temp = X_temp.merge(agg_imgs, how='left', on='PetID')

### Drop ID, name and rescuerID

In [33]:
X_temp = X_temp.drop(to_drop_columns, axis=1)

In [34]:
X_train = X_temp.loc[np.isfinite(X_temp.AdoptionSpeed), :]
X_test = X_temp.loc[~np.isfinite(X_temp.AdoptionSpeed), :]

X_test = X_test.drop(['AdoptionSpeed'], axis=1)

assert X_train.shape[0] == train.shape[0]
assert X_test.shape[0] == test.shape[0]

train_cols = X_train.columns.tolist()
train_cols.remove('AdoptionSpeed')

test_cols = X_test.columns.tolist()

assert np.all(train_cols == test_cols)

In [35]:
X_train_non_null = X_train.fillna(-1)
X_test_non_null = X_test.fillna(-1)

In [36]:
X_train_non_null.isnull().any().any(), X_test_non_null.isnull().any().any()

(False, False)

In [37]:
X_train_non_null.shape, X_test_non_null.shape

((14993, 140), (3948, 139))

In [38]:
Y_train_non_null = X_train_non_null['AdoptionSpeed']
X_train_non_null = X_train_non_null.drop(['AdoptionSpeed'], axis=1)

In [39]:
Y_train_non_null_one_hot = pd.DataFrame(np.zeros((len(Y_train_non_null), 5)))

In [40]:
for i in range(len(Y_train_non_null)):
    b = np.zeros((1, 5))
    b[0,int(Y_train_non_null[i])] = 1
    Y_train_non_null_one_hot.iloc[i] = b

In [41]:
X_valid = X_train_non_null.tail(1500)
Y_valid = Y_train_non_null_one_hot.tail(1500)
X_train_non_null = X_train_non_null.head(len(X_train_non_null) - 1500)
Y_train_non_null_one_hot = Y_train_non_null_one_hot.head(len(Y_train_non_null_one_hot) - 1500)

## Print final table of features

In [42]:
import scipy as sp

from collections import Counter
from functools import partial
from math import sqrt

from sklearn.metrics import cohen_kappa_score, mean_squared_error
from sklearn.metrics import confusion_matrix as sk_cmatrix


# FROM: https://www.kaggle.com/myltykritik/simple-lgbm-image-features

# The following 3 functions have been taken from Ben Hamner's github repository
# https://github.com/benhamner/Metrics
def confusion_matrix(rater_a, rater_b, min_rating=None, max_rating=None):
    """
    Returns the confusion matrix between rater's ratings
    """
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(rater_a + rater_b)
    if max_rating is None:
        max_rating = max(rater_a + rater_b)
    num_ratings = int(max_rating - min_rating + 1)
    conf_mat = [[0 for i in range(num_ratings)]
                for j in range(num_ratings)]
    for a, b in zip(rater_a, rater_b):
        conf_mat[a - min_rating][b - min_rating] += 1
    return conf_mat


def histogram(ratings, min_rating=None, max_rating=None):
    """
    Returns the counts of each type of rating that a rater made
    """
    if min_rating is None:
        min_rating = min(ratings)
    if max_rating is None:
        max_rating = max(ratings)
    num_ratings = int(max_rating - min_rating + 1)
    hist_ratings = [0 for x in range(num_ratings)]
    for r in ratings:
        hist_ratings[r - min_rating] += 1
    return hist_ratings


def quadratic_weighted_kappa(y, y_pred):
    """
    Calculates the quadratic weighted kappa
    axquadratic_weighted_kappa calculates the quadratic weighted kappa
    value, which is a measure of inter-rater agreement between two raters
    that provide discrete numeric ratings.  Potential values range from -1
    (representing complete disagreement) to 1 (representing complete
    agreement).  A kappa value of 0 is expected if all agreement is due to
    chance.
    quadratic_weighted_kappa(rater_a, rater_b), where rater_a and rater_b
    each correspond to a list of integer ratings.  These lists must have the
    same length.
    The ratings should be integers, and it is assumed that they contain
    the complete range of possible ratings.
    quadratic_weighted_kappa(X, min_rating, max_rating), where min_rating
    is the minimum possible rating, and max_rating is the maximum possible
    rating
    """
    rater_a = y
    rater_b = y_pred
    min_rating=None
    max_rating=None
    rater_a = np.array(rater_a, dtype=int)
    rater_b = np.array(rater_b, dtype=int)
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(min(rater_a), min(rater_b))
    if max_rating is None:
        max_rating = max(max(rater_a), max(rater_b))
    conf_mat = confusion_matrix(rater_a, rater_b,
                                min_rating, max_rating)
    num_ratings = len(conf_mat)
    num_scored_items = float(len(rater_a))

    hist_rater_a = histogram(rater_a, min_rating, max_rating)
    hist_rater_b = histogram(rater_b, min_rating, max_rating)

    numerator = 0.0
    denominator = 0.0

    for i in range(num_ratings):
        for j in range(num_ratings):
            expected_count = (hist_rater_a[i] * hist_rater_b[j]
                              / num_scored_items)
            d = pow(i - j, 2.0) / pow(num_ratings - 1, 2.0)
            numerator += d * conf_mat[i][j] / num_scored_items
            denominator += d * expected_count / num_scored_items

    return (1.0 - numerator / denominator)

### OptimizeRounder from [OptimizedRounder() - Improved](https://www.kaggle.com/naveenasaithambi/optimizedrounder-improved)

In [43]:
class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0
    
    def _kappa_loss(self, coef, X, y):
        preds = pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3, 4])
        return -cohen_kappa_score(y, preds, weights='quadratic')
    
    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X = X, y = y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')
    
    def predict(self, X, coef):
        preds = pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3, 4])
        return preds
    
    def coefficients(self):
        return self.coef_['x']

### Train Network

In [44]:
import keras
import sklearn.model_selection
import numpy as np
import pandas as pd

# Split train/valid datasets
x_train, x_valid, y_train, y_valid = sklearn.model_selection.train_test_split(X_train_non_null, Y_train_non_null_one_hot, test_size=0.1, random_state=0)

# Define model
model = keras.models.Sequential()
model.add(keras.layers.normalization.BatchNormalization(input_shape=tuple([x_train.shape[1]])))
model.add(keras.layers.core.Dense(139, activation='relu'))
model.add(keras.layers.core.Dropout(rate=0.8))
model.add(keras.layers.normalization.BatchNormalization())
model.add(keras.layers.core.Dense(200, activation='relu'))
model.add(keras.layers.core.Dropout(rate=0.8))
model.add(keras.layers.normalization.BatchNormalization())
model.add(keras.layers.core.Dense(200, activation='relu'))
model.add(keras.layers.core.Dropout(rate=0.8))
model.add(keras.layers.normalization.BatchNormalization())
model.add(keras.layers.core.Dense(200, activation='relu'))
model.add(keras.layers.core.Dropout(rate=0.8))
model.add(keras.layers.core.Dense(5, activation='softmax'))
model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
print(model.summary())

# Use Early-Stopping
callback_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=1000, verbose=0, mode='auto')

# Train model
model.fit(x_train, y_train, batch_size=1024, epochs=50000, validation_data=(x_valid, y_valid), verbose=1, callbacks=[callback_early_stopping])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_1 (Batch (None, 139)               556       
_________________________________________________________________
dense_1 (Dense)              (None, 139)               19460     
_________________________________________________________________
dropout_1 (Dropout)          (None, 139)               0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 139)               556       
_________________________________________________________________
dense_2 (Dense)              (None, 200)               28000     
_________________________________________________________________
dropout_2 (Dropout)          (None, 200)               0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 200)               800       
__________

Epoch 103/50000
Epoch 104/50000
Epoch 105/50000
Epoch 106/50000
Epoch 107/50000
Epoch 108/50000
Epoch 109/50000
Epoch 110/50000
Epoch 111/50000
Epoch 112/50000
Epoch 113/50000
Epoch 114/50000
Epoch 115/50000
Epoch 116/50000
Epoch 117/50000
Epoch 118/50000
Epoch 119/50000
Epoch 120/50000
Epoch 121/50000
Epoch 122/50000
Epoch 123/50000
Epoch 124/50000
Epoch 125/50000
Epoch 126/50000
Epoch 127/50000
Epoch 128/50000
Epoch 129/50000
Epoch 130/50000
Epoch 131/50000
Epoch 132/50000
Epoch 133/50000
Epoch 134/50000
Epoch 135/50000
Epoch 136/50000
Epoch 137/50000
Epoch 138/50000
Epoch 139/50000
Epoch 140/50000
Epoch 141/50000
Epoch 142/50000
Epoch 143/50000
Epoch 144/50000
Epoch 145/50000
Epoch 146/50000
Epoch 147/50000
Epoch 148/50000
Epoch 149/50000
Epoch 150/50000
Epoch 151/50000
Epoch 152/50000
Epoch 153/50000
Epoch 154/50000
Epoch 155/50000
Epoch 156/50000
Epoch 157/50000
Epoch 158/50000
Epoch 159/50000
Epoch 160/50000
Epoch 161/50000
Epoch 162/50000
Epoch 163/50000
Epoch 164/50000
Epoch 16

Epoch 219/50000
Epoch 220/50000
Epoch 221/50000
Epoch 222/50000
Epoch 223/50000
Epoch 224/50000
Epoch 225/50000
Epoch 226/50000
Epoch 227/50000
Epoch 228/50000
Epoch 229/50000
Epoch 230/50000
Epoch 231/50000
Epoch 232/50000
Epoch 233/50000
Epoch 234/50000
Epoch 235/50000
Epoch 236/50000
Epoch 237/50000
Epoch 238/50000
Epoch 239/50000
Epoch 240/50000
Epoch 241/50000
Epoch 242/50000
Epoch 243/50000
Epoch 244/50000
Epoch 245/50000
Epoch 246/50000
Epoch 247/50000
Epoch 248/50000
Epoch 249/50000
Epoch 250/50000
Epoch 251/50000
Epoch 252/50000
Epoch 253/50000
Epoch 254/50000
Epoch 255/50000
Epoch 256/50000
Epoch 257/50000
Epoch 258/50000
Epoch 259/50000
Epoch 260/50000
Epoch 261/50000
Epoch 262/50000
Epoch 263/50000
Epoch 264/50000
Epoch 265/50000
Epoch 266/50000
Epoch 267/50000
Epoch 268/50000
Epoch 269/50000
Epoch 270/50000
Epoch 271/50000
Epoch 272/50000
Epoch 273/50000
Epoch 274/50000
Epoch 275/50000
Epoch 276/50000
Epoch 277/50000
Epoch 278/50000
Epoch 279/50000
Epoch 280/50000
Epoch 28

Epoch 335/50000
Epoch 336/50000
Epoch 337/50000
Epoch 338/50000
Epoch 339/50000
Epoch 340/50000
Epoch 341/50000
Epoch 342/50000
Epoch 343/50000
Epoch 344/50000
Epoch 345/50000
Epoch 346/50000
Epoch 347/50000
Epoch 348/50000
Epoch 349/50000
Epoch 350/50000
Epoch 351/50000
Epoch 352/50000
Epoch 353/50000
Epoch 354/50000
Epoch 355/50000
Epoch 356/50000
Epoch 357/50000
Epoch 358/50000
Epoch 359/50000
Epoch 360/50000
Epoch 361/50000
Epoch 362/50000
Epoch 363/50000
Epoch 364/50000
Epoch 365/50000
Epoch 366/50000
Epoch 367/50000
Epoch 368/50000
Epoch 369/50000
Epoch 370/50000
Epoch 371/50000
Epoch 372/50000
Epoch 373/50000
Epoch 374/50000
Epoch 375/50000
Epoch 376/50000
Epoch 377/50000
Epoch 378/50000
Epoch 379/50000
Epoch 380/50000
Epoch 381/50000
Epoch 382/50000
Epoch 383/50000
Epoch 384/50000
Epoch 385/50000
Epoch 386/50000
Epoch 387/50000
Epoch 388/50000
Epoch 389/50000
Epoch 390/50000
Epoch 391/50000
Epoch 392/50000
Epoch 393/50000
Epoch 394/50000
Epoch 395/50000
Epoch 396/50000
Epoch 39

Epoch 451/50000
Epoch 452/50000
Epoch 453/50000
Epoch 454/50000
Epoch 455/50000
Epoch 456/50000
Epoch 457/50000
Epoch 458/50000
Epoch 459/50000
Epoch 460/50000
Epoch 461/50000
Epoch 462/50000
Epoch 463/50000
Epoch 464/50000
Epoch 465/50000
Epoch 466/50000
Epoch 467/50000
Epoch 468/50000
Epoch 469/50000
Epoch 470/50000
Epoch 471/50000
Epoch 472/50000
Epoch 473/50000
Epoch 474/50000
Epoch 475/50000
Epoch 476/50000
Epoch 477/50000
Epoch 478/50000
Epoch 479/50000
Epoch 480/50000
Epoch 481/50000
Epoch 482/50000
Epoch 483/50000
Epoch 484/50000
Epoch 485/50000
Epoch 486/50000
Epoch 487/50000
Epoch 488/50000
Epoch 489/50000
Epoch 490/50000
Epoch 491/50000
Epoch 492/50000
Epoch 493/50000
Epoch 494/50000
Epoch 495/50000
Epoch 496/50000
Epoch 497/50000
Epoch 498/50000
Epoch 499/50000
Epoch 500/50000
Epoch 501/50000
Epoch 502/50000
Epoch 503/50000
Epoch 504/50000
Epoch 505/50000
Epoch 506/50000
Epoch 507/50000
Epoch 508/50000
Epoch 509/50000
Epoch 510/50000
Epoch 511/50000
Epoch 512/50000
Epoch 51

Epoch 567/50000
Epoch 568/50000
Epoch 569/50000
Epoch 570/50000
Epoch 571/50000
Epoch 572/50000
Epoch 573/50000
Epoch 574/50000
Epoch 575/50000
Epoch 576/50000
Epoch 577/50000
Epoch 578/50000
Epoch 579/50000
Epoch 580/50000
Epoch 581/50000
Epoch 582/50000
Epoch 583/50000
Epoch 584/50000
Epoch 585/50000
Epoch 586/50000
Epoch 587/50000
Epoch 588/50000
Epoch 589/50000
Epoch 590/50000
Epoch 591/50000
Epoch 592/50000
Epoch 593/50000
Epoch 594/50000
Epoch 595/50000
Epoch 596/50000
Epoch 597/50000
Epoch 598/50000
Epoch 599/50000
Epoch 600/50000
Epoch 601/50000
Epoch 602/50000
Epoch 603/50000
Epoch 604/50000
Epoch 605/50000
Epoch 606/50000
Epoch 607/50000
Epoch 608/50000
Epoch 609/50000
Epoch 610/50000
Epoch 611/50000
Epoch 612/50000
Epoch 613/50000
Epoch 614/50000
Epoch 615/50000
Epoch 616/50000
Epoch 617/50000
Epoch 618/50000
Epoch 619/50000
Epoch 620/50000
Epoch 621/50000
Epoch 622/50000
Epoch 623/50000
Epoch 624/50000
Epoch 625/50000
Epoch 626/50000
Epoch 627/50000
Epoch 628/50000
Epoch 62

Epoch 683/50000
Epoch 684/50000
Epoch 685/50000
Epoch 686/50000
Epoch 687/50000
Epoch 688/50000
Epoch 689/50000
Epoch 690/50000
Epoch 691/50000
Epoch 692/50000
Epoch 693/50000
Epoch 694/50000
Epoch 695/50000
Epoch 696/50000
Epoch 697/50000
Epoch 698/50000
Epoch 699/50000
Epoch 700/50000
Epoch 701/50000
Epoch 702/50000
Epoch 703/50000
Epoch 704/50000
Epoch 705/50000
Epoch 706/50000
Epoch 707/50000
Epoch 708/50000
Epoch 709/50000
Epoch 710/50000
Epoch 711/50000
Epoch 712/50000
Epoch 713/50000
Epoch 714/50000
Epoch 715/50000
Epoch 716/50000
Epoch 717/50000
Epoch 718/50000
Epoch 719/50000
Epoch 720/50000
Epoch 721/50000
Epoch 722/50000
Epoch 723/50000
Epoch 724/50000
Epoch 725/50000
Epoch 726/50000
Epoch 727/50000
Epoch 728/50000
Epoch 729/50000
Epoch 730/50000
Epoch 731/50000
Epoch 732/50000
Epoch 733/50000
Epoch 734/50000
Epoch 735/50000
Epoch 736/50000
Epoch 737/50000
Epoch 738/50000
Epoch 739/50000
Epoch 740/50000
Epoch 741/50000
Epoch 742/50000
Epoch 743/50000
Epoch 744/50000
Epoch 74

Epoch 799/50000
Epoch 800/50000
Epoch 801/50000
Epoch 802/50000
Epoch 803/50000
Epoch 804/50000
Epoch 805/50000
Epoch 806/50000
Epoch 807/50000
Epoch 808/50000
Epoch 809/50000
Epoch 810/50000
Epoch 811/50000
Epoch 812/50000
Epoch 813/50000
Epoch 814/50000
Epoch 815/50000
Epoch 816/50000
Epoch 817/50000
Epoch 818/50000
Epoch 819/50000
Epoch 820/50000
Epoch 821/50000
Epoch 822/50000
Epoch 823/50000
Epoch 824/50000
Epoch 825/50000
Epoch 826/50000
Epoch 827/50000
Epoch 828/50000
Epoch 829/50000
Epoch 830/50000
Epoch 831/50000
Epoch 832/50000
Epoch 833/50000
Epoch 834/50000
Epoch 835/50000
Epoch 836/50000
Epoch 837/50000
Epoch 838/50000
Epoch 839/50000
Epoch 840/50000
Epoch 841/50000
Epoch 842/50000
Epoch 843/50000
Epoch 844/50000
Epoch 845/50000
Epoch 846/50000
Epoch 847/50000
Epoch 848/50000
Epoch 849/50000
Epoch 850/50000
Epoch 851/50000
Epoch 852/50000
Epoch 853/50000
Epoch 854/50000
Epoch 855/50000
Epoch 856/50000
Epoch 857/50000
Epoch 858/50000
Epoch 859/50000
Epoch 860/50000
Epoch 86

Epoch 915/50000
Epoch 916/50000
Epoch 917/50000
Epoch 918/50000
Epoch 919/50000
Epoch 920/50000
Epoch 921/50000
Epoch 922/50000
Epoch 923/50000
Epoch 924/50000
Epoch 925/50000
Epoch 926/50000
Epoch 927/50000
Epoch 928/50000
Epoch 929/50000
Epoch 930/50000
Epoch 931/50000
Epoch 932/50000
Epoch 933/50000
Epoch 934/50000
Epoch 935/50000
Epoch 936/50000
Epoch 937/50000
Epoch 938/50000
Epoch 939/50000
Epoch 940/50000
Epoch 941/50000
Epoch 942/50000
Epoch 943/50000
Epoch 944/50000
Epoch 945/50000
Epoch 946/50000
Epoch 947/50000
Epoch 948/50000
Epoch 949/50000
Epoch 950/50000
Epoch 951/50000
Epoch 952/50000
Epoch 953/50000
Epoch 954/50000
Epoch 955/50000
Epoch 956/50000
Epoch 957/50000
Epoch 958/50000
Epoch 959/50000
Epoch 960/50000
Epoch 961/50000
Epoch 962/50000
Epoch 963/50000
Epoch 964/50000
Epoch 965/50000
Epoch 966/50000
Epoch 967/50000
Epoch 968/50000
Epoch 969/50000
Epoch 970/50000
Epoch 971/50000
Epoch 972/50000
Epoch 973/50000
Epoch 974/50000
Epoch 975/50000
Epoch 976/50000
Epoch 97

Epoch 1031/50000
Epoch 1032/50000
Epoch 1033/50000
Epoch 1034/50000
Epoch 1035/50000
Epoch 1036/50000
Epoch 1037/50000
Epoch 1038/50000
Epoch 1039/50000
Epoch 1040/50000
Epoch 1041/50000
Epoch 1042/50000
Epoch 1043/50000
Epoch 1044/50000
Epoch 1045/50000
Epoch 1046/50000
Epoch 1047/50000
Epoch 1048/50000
Epoch 1049/50000
Epoch 1050/50000
Epoch 1051/50000
Epoch 1052/50000
Epoch 1053/50000
Epoch 1054/50000
Epoch 1055/50000
Epoch 1056/50000
Epoch 1057/50000
Epoch 1058/50000
Epoch 1059/50000
Epoch 1060/50000
Epoch 1061/50000
Epoch 1062/50000
Epoch 1063/50000
Epoch 1064/50000
Epoch 1065/50000
Epoch 1066/50000
Epoch 1067/50000
Epoch 1068/50000
Epoch 1069/50000
Epoch 1070/50000
Epoch 1071/50000
Epoch 1072/50000
Epoch 1073/50000
Epoch 1074/50000
Epoch 1075/50000
Epoch 1076/50000
Epoch 1077/50000
Epoch 1078/50000
Epoch 1079/50000
Epoch 1080/50000
Epoch 1081/50000
Epoch 1082/50000
Epoch 1083/50000
Epoch 1084/50000
Epoch 1085/50000
Epoch 1086/50000
Epoch 1087/50000


Epoch 1088/50000
Epoch 1089/50000
Epoch 1090/50000
Epoch 1091/50000
Epoch 1092/50000
Epoch 1093/50000
Epoch 1094/50000
Epoch 1095/50000
Epoch 1096/50000
Epoch 1097/50000
Epoch 1098/50000
Epoch 1099/50000
Epoch 1100/50000
Epoch 1101/50000
Epoch 1102/50000
Epoch 1103/50000
Epoch 1104/50000
Epoch 1105/50000
Epoch 1106/50000
Epoch 1107/50000
Epoch 1108/50000
Epoch 1109/50000
Epoch 1110/50000
Epoch 1111/50000
Epoch 1112/50000
Epoch 1113/50000
Epoch 1114/50000
Epoch 1115/50000
Epoch 1116/50000
Epoch 1117/50000
Epoch 1118/50000
Epoch 1119/50000
Epoch 1120/50000
Epoch 1121/50000
Epoch 1122/50000
Epoch 1123/50000
Epoch 1124/50000
Epoch 1125/50000
Epoch 1126/50000
Epoch 1127/50000
Epoch 1128/50000
Epoch 1129/50000
Epoch 1130/50000
Epoch 1131/50000
Epoch 1132/50000
Epoch 1133/50000
Epoch 1134/50000
Epoch 1135/50000
Epoch 1136/50000
Epoch 1137/50000
Epoch 1138/50000
Epoch 1139/50000
Epoch 1140/50000
Epoch 1141/50000
Epoch 1142/50000
Epoch 1143/50000
Epoch 1144/50000
Epoch 1145/50000
Epoch 1146/500

Epoch 1203/50000
Epoch 1204/50000
Epoch 1205/50000
Epoch 1206/50000
Epoch 1207/50000
Epoch 1208/50000
Epoch 1209/50000
Epoch 1210/50000
Epoch 1211/50000
Epoch 1212/50000
Epoch 1213/50000
Epoch 1214/50000
Epoch 1215/50000
Epoch 1216/50000
Epoch 1217/50000
Epoch 1218/50000
Epoch 1219/50000
Epoch 1220/50000
Epoch 1221/50000
Epoch 1222/50000
Epoch 1223/50000
Epoch 1224/50000
Epoch 1225/50000
Epoch 1226/50000
Epoch 1227/50000
Epoch 1228/50000
Epoch 1229/50000
Epoch 1230/50000
Epoch 1231/50000
Epoch 1232/50000
Epoch 1233/50000
Epoch 1234/50000
Epoch 1235/50000
Epoch 1236/50000
Epoch 1237/50000
Epoch 1238/50000
Epoch 1239/50000
Epoch 1240/50000
Epoch 1241/50000
Epoch 1242/50000
Epoch 1243/50000
Epoch 1244/50000
Epoch 1245/50000
Epoch 1246/50000
Epoch 1247/50000
Epoch 1248/50000
Epoch 1249/50000
Epoch 1250/50000
Epoch 1251/50000
Epoch 1252/50000
Epoch 1253/50000
Epoch 1254/50000
Epoch 1255/50000
Epoch 1256/50000
Epoch 1257/50000
Epoch 1258/50000
Epoch 1259/50000
Epoch 1260/50000
Epoch 1261/500

Epoch 1318/50000
Epoch 1319/50000
Epoch 1320/50000
Epoch 1321/50000
Epoch 1322/50000
Epoch 1323/50000
Epoch 1324/50000
Epoch 1325/50000
Epoch 1326/50000
Epoch 1327/50000
Epoch 1328/50000
Epoch 1329/50000
Epoch 1330/50000
Epoch 1331/50000
Epoch 1332/50000
Epoch 1333/50000
Epoch 1334/50000
Epoch 1335/50000
Epoch 1336/50000
Epoch 1337/50000
Epoch 1338/50000
Epoch 1339/50000
Epoch 1340/50000
Epoch 1341/50000
Epoch 1342/50000
Epoch 1343/50000
Epoch 1344/50000
Epoch 1345/50000
Epoch 1346/50000
Epoch 1347/50000
Epoch 1348/50000
Epoch 1349/50000
Epoch 1350/50000
Epoch 1351/50000
Epoch 1352/50000
Epoch 1353/50000
Epoch 1354/50000
Epoch 1355/50000
Epoch 1356/50000
Epoch 1357/50000
Epoch 1358/50000
Epoch 1359/50000
Epoch 1360/50000
Epoch 1361/50000
Epoch 1362/50000
Epoch 1363/50000
Epoch 1364/50000
Epoch 1365/50000
Epoch 1366/50000
Epoch 1367/50000
Epoch 1368/50000
Epoch 1369/50000
Epoch 1370/50000
Epoch 1371/50000
Epoch 1372/50000
Epoch 1373/50000
Epoch 1374/50000
Epoch 1375/50000
Epoch 1376/500

<keras.callbacks.History at 0x7f177cc2c940>

In [45]:
keras_pred = model.predict(X_valid)

In [46]:
def onehotToAdoptionSpeed(one_hot_data):
    Y_pred = np.zeros((len(one_hot_data),))
    for rowIdx in range(len(one_hot_data)):
        maxNumIdx = np.where(one_hot_data[rowIdx] == np.amax(one_hot_data[rowIdx]))[0][0]
        Y_pred[rowIdx] = maxNumIdx
    return Y_pred
valid_pred = onehotToAdoptionSpeed(keras_pred)
Y_valid = onehotToAdoptionSpeed(Y_valid.values)

In [47]:
valid_pred = pd.Series(valid_pred, dtype='int32')
Y_valid = pd.Series(Y_valid, dtype='int32')

## Calculate final quadratic weighted kappa

In [48]:
qwk = quadratic_weighted_kappa(Y_valid, valid_pred)
print("CV QWK = ", qwk)

CV QWK =  0.33866061924592417
