In this notebook I try to gather up 3 basic ideas in various kernels with a simple sklearn RF model. Thanks to work in [PetFinder Simple LGBM Baseline](https://www.kaggle.com/skooch/petfinder-simple-lgbm-baseline)
1. Use a regressor instead of a classifier. 
  * That's simply because in many standard classifiers misclassification of 0 as 1 and 0 as 4 have the same 0/1 penalty.
  * However `weights=quadratic` makes $0 \rightarrow 1$ less costly than $0 \rightarrow 4$, which can better be represented by a RMSE like error function
2. Since you will implement a regressor, we need to define `cv`manually to have proper strafied sampling.
3. Use `OptimizedRounder` idea that everybody uses to optimize class boundries.
  * Natural idea is to classify a regressor outcome `0.44` as class 0 but it turns out to be classifying it as 1 yields a higher $\kappa$-score
  
  Following those 3 ideas, I increase my $\kappa$-score from `0.310` (RF Classifier) to `0.360`(RF Regressor + OptimizedRounder + manual cv)

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import cohen_kappa_score,confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from functools import partial 
import scipy as sp

In [None]:
df = pd.read_csv('../input/train/train.csv')

In [None]:
dfDropped = df.drop(['Name','RescuerID','Description','PetID','AdoptionSpeed'],axis=1)
X = dfDropped.values
y = df["AdoptionSpeed"]

In [None]:
class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _kappa_loss(self, coef, X, y):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4

        ll = cohen_kappa_score(y, X_p,weights='quadratic')
        return -ll

    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')

    def predict(self, X, coef):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4
        return X_p

    def coefficients(self):
        return self.coef_['x']

In [None]:
#from sklearn.model_selection import StratifiedKFold
#def skfold_gen(X,y,n_splits=5, random_state=42, shuffle=True):
#    for train, test in skf.split(X, y):
#        yield train, test

In [None]:
#clf = GridSearchCV(RandomForestRegressor()
#                   , dict(max_depth=[5,10,15,20], n_estimators=[ 200, 250,300,350,400,450]), cv=skfold_gen(X,y),
#                 scoring='neg_mean_squared_error',verbose=2,n_jobs=3)

#clf.fit(X,y)

In [None]:
# (-1.1598095155985777, {'max_depth': 10, 'n_estimators': 300})
# clf.best_score_,clf.best_params_

In [None]:
# optr.fit(clf.predict(X),y)
# optr.coefficients()
# array([0.44385399, 2.07305103, 2.47256317, 2.93407633])

In [None]:
rfr = RandomForestRegressor(max_depth=10, n_estimators=300)
rfr.fit(X,y)

In [None]:
optr = OptimizedRounder()
optr.fit(rfr.predict(X),y)

In [None]:
optr.coefficients()

In [None]:
df_t = pd.read_csv('../input/test/test.csv')
X_t = df_t.drop(['Name','RescuerID','Description','PetID'],axis=1).values

In [None]:
X_t.shape

In [None]:
import pandas as pd

submission = pd.DataFrame(dict(PetID=df_t['PetID'], AdoptionSpeed=optr.predict(rfr.predict(X_t), optr.coefficients()).astype(int)))
submission.to_csv('submission.csv', index=False)

In [None]:
!head submission.csv