In [1]:
import numpy as np
from featurelowrank import FeatureLowRankDistanceClassifier
from sklearn.datasets import load_svmlight_file
from sklearn.utils import shuffle
from sklearn.model_selection import GridSearchCV

In [2]:
X, y = load_svmlight_file("janken_20_200_100.svm")
print(X.shape, y.shape)

(19900, 6) (19900,)


In [3]:
n_train = 100
scores = []
for i in range(10):
    X, y = shuffle(X, y, random_state=i)    
    y_binary = np.sign(y*2-1)
    y_binary[y_binary==0] = np.random.randint(0, 2, size=np.sum(y_binary==0))*2-1
    X_train, y_train = X[:n_train], y_binary[:n_train]
    X_test, y_test = X[n_train:], y_binary[n_train:]
    flr = FeatureLowRankDistanceClassifier(n_components=2, max_iter=100, random_state=i)
    
    cv = GridSearchCV(flr,
                      param_grid={"alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "beta": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "degree": [2],
                                  "fit_linear" : [True, False]}, cv=5)
    cv.fit(X_train, y_train)
    scores += [cv.best_estimator_.score(X_test, y_test)]



In [4]:
print(np.mean(scores), np.std(scores))
print(cv.best_params_)

0.9403787878787879 0.004449907684454057
{'alpha': 0.0001, 'beta': 0.01, 'degree': 2, 'fit_linear': True}


In [5]:
print(cv.best_estimator_.w_)
print(cv.best_estimator_.U_.T)
print(cv.best_estimator_.V_.T)
print(np.dot(cv.best_estimator_.U_[0].T, cv.best_estimator_.V_[0])-np.dot(cv.best_estimator_.U_[0].T, cv.best_estimator_.V_[0]).T)

[ 1.36185906  0.65823736 -1.11126125]
[[[ 0.65773961]
  [-0.50705595]]

 [[-0.32081598]
  [-0.70570382]]

 [[-0.9182258 ]
  [ 0.19383596]]]
[[[ 0.07510999]
  [ 0.35182008]]

 [[ 0.71329728]
  [-0.1828427 ]]

 [[-0.4195444 ]
  [ 0.59008994]]]
[[ 0.          0.8342526  -0.57438704]
 [-0.8342526   0.          0.40857728]
 [ 0.57438704 -0.40857728  0.        ]]


In [6]:
flr = FeatureLowRankDistanceClassifier(n_components=2, max_iter=10, random_state=0)
flr.fit(X, y_binary)
print(flr.w_)
print(flr.U_)
print(flr.acc_grad_norm_U_)

[-0.14119067 -0.26022017  0.21353816]
[[[-0.49890818  0.35409392 -0.44510649]
  [-1.17531073 -0.49634391  0.79582011]]]
[[[2.05965684e+10 3.92647408e+11 1.62542328e+12]
  [5.89384459e+11 4.40592814e+11 2.30296184e+12]]]




# Logistic Loss vs Soft Cross Etropy

In [7]:
from sklearn.metrics import accuracy_score
# cross entropy loss function for evaluation
# lower is better
def cross_entropy(y, y_pred):
    return np.mean(-(y*np.log(y_pred+1e-10)) - (1-y)*np.log(1-y_pred+1e-10))

## Logistic Loss

In [8]:
n_train = 100
scores = []
accs = []
for i in range(10):
    X, y = shuffle(X, y, random_state=i)    
    y_binary = np.sign(y*2-1)
    y_binary[y_binary==0] = np.random.randint(0, 2, size=np.sum(y_binary==0))*2-1
    X_train, y_train = X[:n_train], y_binary[:n_train]
    X_test, y_test = X[n_train:], y[n_train:]
    flr = FeatureLowRankDistanceClassifier(n_components=2, max_iter=100, random_state=i, loss='logistic')
    
    cv = GridSearchCV(flr,
                      param_grid={"alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "beta": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "degree": [2],
                                  "fit_linear" : [True, False]}, cv=5)
    cv.fit(X_train, y_train)
    scores += [cross_entropy(y_test, cv.best_estimator_.predict_proba(X_test))]
    accs += [accuracy_score(y_binary[n_train:], cv.best_estimator_.predict(X_test))]



In [10]:
print(np.mean(scores), np.std(scores)) # lower is better
print(np.mean(accs), np.std(accs)) # higher is better
print(cv.best_params_)

0.8327099657999331 0.6236923288139884
0.9392373737373738 0.003913241995065955
{'alpha': 0.1, 'beta': 1, 'degree': 2, 'fit_linear': True}


## Soft Cross Entropy

In [11]:
n_train = 100
scores = []
accs = []
for i in range(10):
    X, y = shuffle(X, y, random_state=i)
    y_binary = np.sign(y*2-1)
    y_binary[y_binary==0] = np.random.randint(0, 2, size=np.sum(y_binary==0))*2-1
    X_train, y_train = X[:n_train], y[:n_train]
    X_test, y_test = X[n_train:], y[n_train:]
    flr = FeatureLowRankDistanceClassifier(n_components=2, max_iter=100, 
                                           random_state=i, 
                                           loss='soft_cross_entropy')
    cv = GridSearchCV(flr,
                      param_grid={"alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "beta": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                                  "degree": [2],
                                  "fit_linear" : [True, False]}, cv=5)
    cv.fit(X_train, y_train)
    scores += [cross_entropy(y_test, cv.best_estimator_.predict_proba(X_test))]
    accs += [accuracy_score(y_binary[n_train:], cv.best_estimator_.predict(X_test)*2-1)]



In [12]:
print(np.mean(scores), np.std(scores))
print(np.mean(accs), np.std(accs))
print(cv.best_params_)

0.4458067487508595 0.0012433461849259999
0.9443030303030303 0.002249805850436123
{'alpha': 0.0001, 'beta': 0.001, 'degree': 2, 'fit_linear': False}


The cross entroy score and accuracy with loss='soft_cross_entropy' were better than those with loss='logistic`!

In [13]:
# if the prediction is 'all instances are positive'
print(cross_entropy(y_test, np.ones(len(y_test))))
# if the prediction is 'all instances are negative'
print(cross_entropy(y_test, np.zeros(len(y_test))))
# if the prediction is 'all instances are draw'
print(cross_entropy(y_test, np.ones(len(y_test))*0.5))

11.517094539444685
11.508756390395774
0.6931471803599454
