In [1]:
import numpy as np
from featurelowrank import FeatureLowRankInnerClassifier
from sklearn.datasets import load_svmlight_file
from sklearn.utils import shuffle
from sklearn.model_selection import GridSearchCV

In [2]:
X, y = load_svmlight_file("janken_20_200_100.svm")
print(X.shape, y.shape)

(19900, 6) (19900,)


In [3]:
n_train = 100
scores = []
for i in range(10):
    X, y = shuffle(X, y, random_state=i)    
    y_binary = np.sign(y*2-1)
    y_binary[y_binary==0] = np.random.randint(0, 2, size=np.sum(y_binary==0))*2-1
    X_train, y_train = X[:n_train], y_binary[:n_train]
    X_test, y_test = X[n_train:], y_binary[n_train:]
    flr = FeatureLowRankInnerClassifier(n_components=2, max_iter=100, random_state=i)
    
    cv = GridSearchCV(flr,
                      param_grid={"alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "beta": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "degree": [2],
                                  "fit_linear" : [True, False]}, cv=5)
    cv.fit(X_train, y_train)
    scores += [cv.best_estimator_.score(X_test, y_test)]



In [4]:
print(np.mean(scores), np.std(scores))
print(cv.best_params_)

0.9424595959595958 0.003435980744587455
{'alpha': 0.0001, 'beta': 0.0001, 'degree': 2, 'fit_linear': True}


In [5]:
print(cv.best_estimator_.w_)
print(cv.best_estimator_.U_.T)
print(cv.best_estimator_.V_.T)
print(np.dot(cv.best_estimator_.U_[0].T, cv.best_estimator_.V_[0])-np.dot(cv.best_estimator_.U_[0].T, cv.best_estimator_.V_[0]).T)

[-6.82509418e-04 -2.37909085e-05  8.02868055e-04]
[[[-1.47150994]
  [ 0.02443286]]

 [[ 0.01276801]
  [ 0.44172794]]

 [[-0.0282607 ]
  [-0.03722557]]]
[[[-1.24513576]
  [ 0.00679804]]

 [[-0.00742936]
  [ 0.9061006 ]]

 [[ 0.00546368]
  [ 0.03211559]]]
[[ 0.          0.04596602 -0.04219053]
 [-0.04596602  0.          0.04777627]
 [ 0.04219053 -0.04777627  0.        ]]


# Logistic Loss vs Soft Cross Etropy

In [6]:
from sklearn.metrics import accuracy_score
# cross entropy loss function for evaluation
# lower is better
def cross_entropy(y, y_pred):
    return np.mean(-(y*np.log(y_pred+1e-10)) - (1-y)*np.log(1-y_pred+1e-10))


## Logistic Loss

In [7]:
n_train = 100
scores = []
accs = []
for i in range(10):
    X, y = shuffle(X, y, random_state=i)    
    y_binary = np.sign(y*2-1)
    y_binary[y_binary==0] = np.random.randint(0, 2, size=np.sum(y_binary==0))*2-1
    X_train, y_train = X[:n_train], y_binary[:n_train]
    X_test, y_test = X[n_train:], y[n_train:]
    flr = FeatureLowRankInnerClassifier(n_components=2, max_iter=100, random_state=i, loss='logistic')
    
    cv = GridSearchCV(flr,
                      param_grid={"alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "beta": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "degree": [2],
                                  "fit_linear" : [True, False]}, cv=5)
    cv.fit(X_train, y_train)
    scores += [cross_entropy(y_test, cv.best_estimator_.predict_proba(X_test))]
    accs += [accuracy_score(y_binary[n_train:], cv.best_estimator_.predict(X_test))]



Score is not accuracy, cross entropy loss (a.k.a negative log-likelihood).
Lower is better.

In [8]:
print(np.mean(scores), np.std(scores)) # lower is better
print(np.mean(accs), np.std(accs)) # higher is better
print(cv.best_params_)

0.7533820802056935 0.20086777209514287
0.9407020202020202 0.004056876145105843
{'alpha': 0.0001, 'beta': 1, 'degree': 2, 'fit_linear': True}


## Soft Cross Entropy

In [9]:
n_train = 100
scores = []
accs = []
for i in range(10):
    X, y = shuffle(X, y, random_state=i)
    y_binary = np.sign(y*2-1)
    y_binary[y_binary==0] = np.random.randint(0, 2, size=np.sum(y_binary==0))*2-1
    X_train, y_train = X[:n_train], y[:n_train]
    X_test, y_test = X[n_train:], y[n_train:]
    flr = FeatureLowRankInnerClassifier(n_components=2, max_iter=100, random_state=i, loss='soft_cross_entropy')
    
    cv = GridSearchCV(flr,
                      param_grid={"alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "beta": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "degree": [2],
                                  "fit_linear" : [True, False]}, cv=5)
    cv.fit(X_train, y_train)
    scores += [cross_entropy(y_test, cv.best_estimator_.predict_proba(X_test))]
    accs += [accuracy_score(y_binary[n_train:], cv.best_estimator_.predict(X_test)*2-1)]



In [10]:
print(np.mean(scores), np.std(scores)) # lower is better
print(np.mean(accs), np.std(accs)) # higher is better
print(cv.best_params_)

0.44457158102260363 0.0004518376742706133
0.9457525252525253 0.0003741807362018453
{'alpha': 10, 'beta': 0.01, 'degree': 2, 'fit_linear': True}


The cross entroy score and accuracy with loss='soft_cross_entropy' were better than those with loss='logistic`!

In [12]:
# if the prediction is 'all instances are positive'
print(cross_entropy(y_test, np.ones(len(y_test))))
# if the prediction is 'all instances are negative'
print(cross_entropy(y_test, np.zeros(len(y_test))))
# if the prediction is 'all instances are draw'
print(cross_entropy(y_test, np.ones(len(y_test))*0.5))

11.517094539444685
11.508756390395774
0.6931471803599454
