# FETA-Net-Choice

In [1]:
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from csrank import *
from keras.optimizers import SGD

Using TensorFlow backend.


## The medoid problem

In [2]:
from csrank import ChoiceDatasetGenerator

In the medoid problem the goal of the discrete choice algorithms for the medoid problem is to find the most central object for the given set.
This problem is inspired by solving the task of finding a good representation of the given data using the most central point of the data points

We will generate a random dataset where each instance contains 30 objects and 2 features for easy plotting.

In [3]:
import logging
rootLogger = logging.getLogger('')
rootLogger.setLevel(logging.DEBUG)
logFormatter = logging.Formatter("%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s")
consoleHandler = logging.StreamHandler()
consoleHandler.setFormatter(logFormatter)
rootLogger.addHandler(consoleHandler)

In [4]:
seed = 123
n_train = 10000
n_test = 10000
n_features = 2
n_objects = 30
gen = MNISTChoiceDatasetReader(dataset_type='unique', random_state=seed,
                                n_train_instances=n_train,
                                n_test_instances=n_test,
                                n_objects=n_objects,
                                n_features=n_features)

2019-01-16 00:49:14,049 [MainThread  ] [INFO ]  Learning Problem: choice_function
2019-01-16 00:49:14,071 [MainThread  ] [INFO ]  Done loading the dataset
2019-01-16 00:49:14,072 [MainThread  ] [INFO ]  Dataset type unique


In [5]:
X_train, Y_train, X_test, Y_test = gen.get_single_train_test_split()

2019-01-16 00:49:17,481 [MainThread  ] [INFO ]  Unique Dataset
2019-01-16 00:50:15,118 [MainThread  ] [INFO ]  Done


In [6]:
from experiments.util import get_dataset_reader, log_test_train_data, metrics_on_predictions, lp_metric_dict, \
    create_optimizer_parameters
def get_results(model):
    batch_size = X_test.shape[0]
    s_pred = None
    while s_pred is None:
        try:
            if batch_size == 0:
                break
            logger.info("Batch_size {}".format(batch_size))
            s_pred = model.predict_scores(X_test, batch_size=batch_size)
        except:
            logger.error("Unexpected Error {}".format(sys.exc_info()[0]))
            s_pred = None
            batch_size = int(batch_size / 10)
    y_pred = model.predict_for_scores(s_pred)

    results = {'job_id': str(job_id), 'cluster_id': str(cluster_id)}
    for name, evaluation_metric in lp_metric_dict['choice_function'].items():
        predictions = s_pred
        if evaluation_metric in metrics_on_predictions:
            logger.info("Metric on predictions")
            predictions = y_pred
        if "NDCG" in name:
            evaluation_metric = make_ndcg_at_k_loss(k=n_objects)
            predictions = y_pred
        if isinstance(Y_test, dict):
            metric_loss = get_mean_loss(evaluation_metric, Y_test, predictions)
        else:
            metric_loss = eval_loss(evaluation_metric, Y_test, predictions)
        logger.info(ERROR_OUTPUT_STRING % (name, metric_loss))
        if np.isnan(metric_loss):
            results[name] = "\'Infinity\'"
        else:
            results[name] = "{0:.4f}".format(metric_loss)
    print(results)

In [None]:
r = RankSVMChoiceFunction(n_object_features=X_train.shape[-1])
r.fit(X_train, Y_train)

2019-01-16 00:50:15,347 [MainThread  ] [DEBUG]  Creating the Dataset
2019-01-16 00:50:25,428 [MainThread  ] [DEBUG]  Finished the Dataset with instances 1159704
2019-01-16 00:50:25,430 [MainThread  ] [INFO ]  Linear SVC model 
2019-01-16 00:50:31,360 [MainThread  ] [DEBUG]  Finished Creating the model, now fitting started


In [None]:
get_results(r)

Let us plot a random instance. The pareto points are marked as P.

In [None]:
def get_name(d):
    if d ==0:
        return ""
    else:
        return "P"
fig, ax = plt.subplots(figsize=(5,5))
inst = np.random.choice(n_train)
choices = np.where(Y_train[inst]==1)[0]
ax.scatter(X_train[inst][:, 0], X_train[inst][:, 1])
ax.scatter(X_train[inst][choices, 0], X_train[inst][choices, 1])
for i in range(n_objects):
    ax.text(X_train[inst, i, 0]+0.02,
            X_train[inst, i, 1]+0.02,
            s=get_name(int(Y_train[inst, i])))

## The FATE network
The first-aggregate-then-evaluate approach learns an embedding of each object and then aggregates that into a _context_:
\begin{equation}
	\mu_{C(\vec{x})} = \frac{1}{|C(\vec{x})|} \sum_{\vec{y} \in C(\vec{x})} \phi(\vec{y})
\end{equation}
and then scores each object $\vec{x}$ using a generalized utility function $U (\vec{x}, \mu_{C(\vec{x})})$

In [None]:
fate = FATEChoiceFunction(
    n_object_features=X_train.shape[-1],
    optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9))

We will run the training for only 10 epochs to get an idea of the convergence:

In [None]:
fate.fit(X_train, Y_train, verbose=True, epochs=10)

In [None]:
scores = fate.predict_scores(X_test)
y_pred = fate.predict_for_scores(scores)

In [None]:
get_results(r)

In [None]:
from csrank.metrics_np import f1_measure
f1_measure(Y_test, y_pred)

Not converged yet, but let us visualize the scores it assigns to test instances:

In [None]:
fig, ax = plt.subplots(figsize=(5,5))
inst = np.random.choice(n_test)
choices = np.where(Y_test[inst]==1)[0]
ax.scatter(X_test[inst][:, 0], X_test[inst][:, 1])
ax.scatter(X_test[inst][choices, 0], X_test[inst][choices, 1])
for i in range(n_objects):
    if Y_test[inst, i]:
        color = 'r'
    else:
        color = 'b'
    ax.text(X_test[inst, i, 0]-0.2,
            X_test[inst, i, 1]-0.2,
            s='{:.1f}'.format(scores[inst][i]),
            color=color)