# One vs. Rest Classifier
---

## Reference
* [Simple Keras embedding in 10 folds](https://www.kaggle.com/pourchot/simple-keras-embedding-in-10-folds) by [@pourchot](https://www.kaggle.com/pourchot)
* [Combining discrete and continuous features in neural networks](https://www.kaggle.com/hiro5299834/tps06-nn-w-discrete-and-continuous-features) by [@bizen](https://www.kaggle.com/hiro5299834)

For a one vs. rest classifier, it makes sense to get probability of belonging to a class by multiplying the probabilities of not belonging to other classes.
Since, in one vs rest classifier, there are more samples for 'rest of the classes', hence a model trained for one vs rest is good at predicting the probability of 'not belonging to a class'. Hence, I believe that to improve prediction, we can calculate the probability of belonging to a class as follows:

Prob['Class_1'] =  Prob['Class_1']  * (1 - Prob['Class_2']) * ( 1 - Prob['Class_3']) * ( 1 - Prob['Class_4']) * ( 1 - Prob['Class_5']) * ( 1 - Prob['Class_6']) * ( 1 - Prob['Class_7']) * ( 1 - Prob['Class_8']) * ( 1 - Prob['Class_9'])

Here, Prob['Class_1'] is the probability of belonging to Class_1. 

# **LIBRARIES**

In [None]:
import pandas as pd
import numpy as np
import datetime
import random
import time
import os
import gc

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss, confusion_matrix, classification_report
from sklearn.cluster import KMeans
from scipy.stats import mode, skew, kurtosis

from tensorflow.keras import backend as K
import tensorflow as tf
import tensorflow_addons as tfa

import matplotlib.pyplot as plt
import seaborn as sns

#----------
pd.options.display.max_rows = 50
pd.options.display.max_columns = 50

import warnings
warnings.simplefilter('ignore')
from IPython.display import clear_output 

# Load Data

In [None]:
train = pd.read_csv('../input/tabular-playground-series-jun-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-jun-2021/test.csv')
submission = pd.read_csv('../input/tabular-playground-series-jun-2021/sample_submission.csv')

all_df = pd.concat([train, test]).reset_index(drop=True)

In [None]:
train.value_counts('target')

# Create Targets for one vs. Rest learning

In [None]:
all_features = ['feature_' + str(i) for i in range(0,75)]

# Assign training and test data

In [None]:
res = np.array(np.ones((9,9), int))
np.fill_diagonal(res, 0)

map = [{}, {}, {},  {}, {}, {}, {}, {}, {}]
for i in range(0,9):
    map = {'Class_' + str(j+1) : res[i][j] for j in range(0,9)}
    all_df['target' + str(i)] = all_df.replace({'target': map})['target']

all_df

# Define Model

In [None]:
X = (all_df.iloc[:train.shape[0]])[all_features].to_numpy()
test_npy = all_df.iloc[train.shape[0]:][all_features].to_numpy()
t0 = all_df[['target0']].to_numpy()
t1 = all_df[['target1']].to_numpy()
t2 = all_df[['target2']].to_numpy()
t3 = all_df[['target3']].to_numpy()
t4 = all_df[['target4']].to_numpy()
t5 = all_df[['target5']].to_numpy()
t6 = all_df[['target6']].to_numpy()
t7 = all_df[['target7']].to_numpy()
t8 = all_df[['target8']].to_numpy()

In [None]:
def create_model(shape=(75,)):
    cat_input = tf.keras.layers.Input(shape=shape, name='cat_input')
    
    x = tf.keras.layers.Embedding(400, 16, name='Embedding')(cat_input)
    x = tf.keras.layers.Flatten(name='Flatten')(x)
    
    x = tf.keras.layers.Dropout(0.4, name='dropout_concatenated')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(128, activation='relu', name='dense1')(x)
    x = tf.keras.layers.Dense(64, activation='relu', name='dense2')(x)
    x = tf.keras.layers.Dense(32, activation='relu', name='dense3')(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid', name='output')(x)
    
    model = tf.keras.Model(cat_input, outputs)
    
    metrics = ['accuracy', tf.keras.metrics.BinaryCrossentropy(
        from_logits=False,
        label_smoothing=0,
        name='binary_crossentropy'
    )]
    
    loss = tf.keras.losses.BinaryCrossentropy(
                from_logits=False,
                label_smoothing=0,
                reduction='auto',
                name='binary_crossentropy'
    )
    
    optimizer = tfa.optimizers.AdamW(
        weight_decay=1e-7,
        learning_rate=0.0001,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-07,
        amsgrad=True,
        name='AdamW',
    )
    
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    
    return model

model().summary()

# Train model

In [None]:
scheduler_cb = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=2,
    verbose=0,
    mode='auto',
    min_delta=0.0001,
    cooldown=0,
    min_lr=0
)

early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=5,
    verbose=1,
    mode='auto',
    baseline=None,
    restore_best_weights=True
)

In [None]:
history = []
y = [t0, t1, t2, t3, t4, t5, t6, t7, t8]
df = pd.DataFrame()
K.clear_session()

for i in range(0,9):
    print('Training for Class_' + str(i+1) + " :")
    model = create_model(shape=(75,))

    history.append(
        model.fit(
            x=X[:, :(len(all_features))],
            y=y[i],
            steps_per_epoch=1000,
            batch_size=256,
            epochs=50,
            validation_split=0.2,
            callbacks=[scheduler_cb, early_stopping_cb]
        )
    )
    
    print('Predicting for Class_' + str(i+1) + "\n\n")
    df['Class_' + str(i+1)] = 1 - model.predict(all_df[all_features].to_numpy()).flatten()

df

# Get probabilities for belonging to a class

In [None]:
class_probs = pd.read_csv('/kaggle/input/max-score-ensemble/ensemble_top_scores4.csv')
#classes = ['Class_' + str(i) for i in range(1,10)] 
#result = pd.DataFrame(tf.keras.utils.to_categorical(class_probs[classes].to_numpy(), axis=1), num_classes=9), columns=classes)
#log_loss(result, class_probs[classes])
#display(class_probs)
#display(df[200000:])

# Multiply by probabilities for not belonging to other classes

In [None]:
rdf = df.iloc[200000:]
rdf['id'] = [i for i in range(0,100000)]
rdf = rdf.set_index('id')
display(rdf)
result = pd.DataFrame()
result['Class_1'] =  10 * (class_probs['Class_1']) * (1 - rdf['Class_2']) * (1 - rdf['Class_3']) * (1 - rdf['Class_4']) * (1 - rdf['Class_5']) * (1 - rdf['Class_6']) * (1 - rdf['Class_7']) * (1 - rdf['Class_8']) * (1 - rdf['Class_9'])
result['Class_2'] =  10 * (class_probs['Class_2']) * (1 - rdf['Class_1']) * (1 - rdf['Class_3']) * (1 - rdf['Class_4']) * (1 - rdf['Class_5']) * (1 - rdf['Class_6']) * (1 - rdf['Class_7']) * (1 - rdf['Class_8']) * (1 - rdf['Class_9'])
result['Class_3'] =  10 * (class_probs['Class_3']) * (1 - rdf['Class_2']) * (1 - rdf['Class_1']) * (1 - rdf['Class_4']) * (1 - rdf['Class_5']) * (1 - rdf['Class_6']) * (1 - rdf['Class_7']) * (1 - rdf['Class_8']) * (1 - rdf['Class_9'])
result['Class_4'] =  10 * (class_probs['Class_4']) * (1 - rdf['Class_2']) * (1 - rdf['Class_3']) * (1 - rdf['Class_1']) * (1 - rdf['Class_5']) * (1 - rdf['Class_6']) * (1 - rdf['Class_7']) * (1 - rdf['Class_8']) * (1 - rdf['Class_9'])
result['Class_5'] =  10 * (class_probs['Class_5']) * (1 - rdf['Class_2']) * (1 - rdf['Class_3']) * (1 - rdf['Class_4']) * (1 - rdf['Class_1']) * (1 - rdf['Class_6']) * (1 - rdf['Class_7']) * (1 - rdf['Class_8']) * (1 - rdf['Class_9'])
result['Class_6'] =  10 * (class_probs['Class_6']) * (1 - rdf['Class_2']) * (1 - rdf['Class_3']) * (1 - rdf['Class_4']) * (1 - rdf['Class_5']) * (1 - rdf['Class_1']) * (1 - rdf['Class_7']) * (1 - rdf['Class_8']) * (1 - rdf['Class_9'])
result['Class_7'] =  10 * (class_probs['Class_7']) * (1 - rdf['Class_2']) * (1 - rdf['Class_3']) * (1 - rdf['Class_4']) * (1 - rdf['Class_5']) * (1 - rdf['Class_6']) * (1 - rdf['Class_1']) * (1 - rdf['Class_8']) * (1 - rdf['Class_9'])
result['Class_8'] =  10 * (class_probs['Class_8']) * (1 - rdf['Class_2']) * (1 - rdf['Class_3']) * (1 - rdf['Class_4']) * (1 - rdf['Class_5']) * (1 - rdf['Class_6']) * (1 - rdf['Class_7']) * (1 - rdf['Class_1']) * (1 - rdf['Class_9'])
result['Class_9'] =  10 * (class_probs['Class_9']) * (1 - rdf['Class_2']) * (1 - rdf['Class_3']) * (1 - rdf['Class_4']) * (1 - rdf['Class_5']) * (1 - rdf['Class_6']) * (1 - rdf['Class_7']) * (1 - rdf['Class_8']) * (1 - rdf['Class_1'])
#result['tg'] = all_df['target']
display(result)
class_probs

In [None]:
class display(object):
    """Display HTML representation of multiple objects"""
    template = """<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
    </div>"""
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)
    
    def __repr__(self):
        return '\n\n'.join(a + '\n' + repr(eval(a))
                           for a in self.args)

# Submission

In [None]:
#test_pred.shape
#sample = read_csv('/kaggle/input/tabular-playground-series-jun-2021/sample_submission.csv')
#print(sample)

In [None]:
#sub = pd.DataFrame(test_pred.data, columns=['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6', 'Class_7', 'Class_8', 'Class_9'])
sub = result
sub.insert(0, 'id', [id for id in range(200000,300000,1)])
print(sub)
csv = 'submission_ovr.csv'
sub.to_csv(csv, index = False)

In [None]:
from IPython.display import FileLink
FileLink(csv)

In [None]:
#from scipy.special import softmax
#newresult = softmax(10 * result[['Class_' + str(i) for i in range(1,10)]], axis=1)
#display('rdf', 'result', 'newresult', 'class_probs')
#log_loss(tf.keras.utils.to_categorical((train['target'].str.split('_', expand=True)[1].astype(int) -1), num_classes=9), myresult.to_numpy())
#myresult

In [None]:
#scaler = MinMaxScaler()
#classes = ['Class_' + str(i) for i in range(1,10)]
#result = pd.DataFrame((scaler.fit_transform(result.T)).T, columns=classes)

#display(result[result['Class_5'] > 0].head(20))
#display(class_probs.head(20))
#df['tg'] = all_df['target']
#display(df.iloc[:200000])

In [None]:
#pd.options.display.max_rows = 2000
#train_result = pd.DataFrame()
#result['Class_8'] = 1 - df['86']
#result['Class_6'] = df['86']
#result['Class_2'] = 1 - df['29']
#result['Class_9'] = df['29']
#result['Class_7'] = 1 - df['73']
#result['Class_3'] = df['73']
#result['Class_5'] = 1 - df['54']
#result['Class_4'] = df['54']
#result['Class_1'] = df['101']
#train_result['tg'] = train['target'].apply(lambda x: int(x.split("_")[-1])-1).to_numpy()
#train_result[classes] = result[classes]
#df = df.drop('id')
#train_result.head(50)

In [None]:
#result['tg'] = train['target'].apply(lambda x: int(x.split("_")[-1])-1).to_numpy()
#result.head(50)

In [None]:
#tmp = pd.DataFrame()
#tmp['pred'] = (((rdf[classes])[rdf['tg'] == 0])['Class_1']).astype(float) 
#tmp['tg'] = rdf[rdf['tg'] == 0]['tg'].astype(float) + 1
#(tmp['tg'] - tmp['pred']).describe()

In [None]:
#!pip3 install -U lightautoml

In [None]:
#from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML
#from lightautoml.tasks import Task
#from sklearn.metrics import log_loss

In [None]:
#N_THREADS = 4 # threads cnt for lgbm and linear models
#N_FOLDS = 5 # folds cnt for AutoML
#RANDOM_STATE = 1 # fixed random state for various reasons
#TEST_SIZE = 0.2 # Test size for metric check
#TIMEOUT =  60 * 60 # Time in seconds for automl run

In [None]:
#automl = TabularUtilizedAutoML(task = Task('multiclass',), 
#                               timeout = TIMEOUT,
#                               cpu_limit = N_THREADS,
#                               reader_params = {'n_jobs': N_THREADS},
#                              )

In [None]:
#target_column = 'tg'

#roles = {
#    'target': target_column,
#}

#lightml_pred = automl.fit_predict(rdf, roles = roles)
#print('lightml_pred:\n{}\nShape = {}'.format(lightml_pred[:10], lightml_pred.shape))

In [None]:
#test_pred = automl.predict(test)