# Pointwise Evaluation

### Imports

In [1]:
import os
import sys
from skopt.space import Integer
from skopt.space import Real
from skopt.space import Categorical
sys.path.append(os.path.dirname((os.path.abspath(""))))

In [2]:
from src.pipeline import Pipeline

[nltk_data] Downloading package punkt to /Users/tim/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/tim/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/tim/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/tim/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [3]:
pipeline = Pipeline(
    collection='data/processed/30_5000_1000_collection.pkl',
    queries='data/processed/30_5000_1000_queries.pkl',
    queries_val='data/processed/30_5000_1000_queries_val.pkl',
    queries_test='data/processed/30_5000_1000_queries_test.pkl',
    features='data/processed/30_5000_1000_features.pkl',
    qrels_val='data/processed/30_5000_1000_qrels_val.pkl',
    qrels_test='data/processed/30_5000_1000_qrels_test.pkl',
    features_test='data/processed/30_5000_1000_features_test.pkl',
    features_val='data/processed/30_5000_1000_features_val.pkl',
)

In [4]:
pipeline.features

Unnamed: 0,qID,pID,y,w2v_cosine,w2v_euclidean,w2v_manhattan,w2v_tfidf_cosine,w2v_tfidf_euclidean,w2v_tfidf_manhattan,tfidf_cosine,...,polarity_doc,subjectivity_query,polarity_query,bm25,doc_nouns,doc_adjectives,doc_verbs,query_nouns,query_adjectives,query_verbs
0,603195,7050012,1,0.972107,144.641830,1124.871630,0.938781,2.765727,22.236694,0.537439,...,0.000000,0.00,0.00,-24.655536,23,6,4,3,1,1
1,474183,325505,1,0.971866,131.960266,1033.670312,0.985675,1.360485,11.347487,0.745907,...,0.450000,0.00,0.00,-33.129796,18,9,3,4,0,0
2,320545,1751825,1,0.947701,94.900002,756.378183,0.959522,2.236971,17.352688,0.409509,...,0.500000,0.20,0.20,-16.699603,20,2,14,2,1,1
3,89798,5069949,1,0.972710,161.470459,1273.643564,0.933304,1.714253,13.493497,0.541627,...,0.066667,0.25,0.00,-27.678576,25,10,5,3,1,0
4,1054603,2869106,1,0.965680,155.648453,1216.564726,0.941391,1.799412,14.369308,0.438115,...,0.000000,0.00,0.00,-28.497519,20,9,6,2,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,128401,6127598,0,0.796978,85.670822,678.466760,0.555981,3.027138,24.841764,0.185056,...,-0.520833,0.00,0.00,-8.866170,16,6,13,2,1,0
4996,1044540,4616118,0,0.922095,157.044754,1238.354322,0.603788,2.167866,17.812756,0.140057,...,0.156250,0.00,0.00,-7.852468,25,9,16,0,0,1
4997,486146,1137390,0,0.946438,125.126984,972.330644,0.882998,4.161341,34.815641,0.314505,...,-0.100000,0.10,0.00,-15.909103,12,1,10,2,0,2
4998,532697,5161847,0,0.938939,99.808395,790.453814,0.893834,1.977307,16.122506,0.344173,...,0.284375,0.00,0.00,-16.617979,18,8,9,3,1,0


### Logistic Regression

##### Without Hyperparameter Optimization

In [8]:
pipeline.evaluate(
    model='lr', 
    pca=0
)

MRR: 0.03875800010215545


##### With Hyperparameter Optimization

In [None]:
logistic_regression_search_space: list = []
logistic_regression_search_space.append(Categorical(['l2', 'none'], name='penalty'))
logistic_regression_search_space.append(Real(0.1, 100.0, name='C'))
logistic_regression_search_space.append(Real(1e-6, 0.1, name='tol'))

In [7]:
pipeline.evaluate(
    model='lr', 
    pca=5, 
    search_space=logistic_regression_search_space
)



Best MRR: 0.0004853499004727582
Best Hyperparameters: ['none', 37.07012583093926, 0.05146727586830882]




MRR on test set: 0.0009793445012636875


### Naive Bayes

In [5]:
pipeline.evaluate(
    model='nb', 
    pca=15
)

MRR: 0.021862321092733287


### Multi-layer Perceptron

##### Without Hyperparameter Optimization

In [11]:
pipeline.evaluate(
    model='mlp', 
    pca=0
)



MRR: 0.033422307290518836


##### With Hyperparameter Optimization

In [10]:
mlp_search_space: list = []
mlp_search_space.append(Categorical(['identity', 'logistic', 'tanh', 'relu'], name='activation'))
mlp_search_space.append(Real(1e-6, 0.1, name='alpha'))
mlp_search_space.append(Real(1e-6, 0.1, name='learning_rate_init'))

In [11]:
pipeline.evaluate(
    model='mlp', 
    pca=0,
    search_space=mlp_search_space
)

3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
Best MRR: 0.0003063416810727735
Best Hyperparameters: ['identity', 0.1, 1e-06]




3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
3267
MRR on test set: 0.024734659139595713


### Support Vector Machine

##### Without Hyperparameter Optimization

In [8]:
pipeline.evaluate(
    model='svm', 
    pca=0
)

KeyboardInterrupt: 

##### With Hyperparameter Optimization

In [13]:
svm_search_space: list = []
svm_search_space.append(Categorical(['poly', 'rbf', 'sigmoid'], name='kernel'))
svm_search_space.append(Real(0.1, 100.0, name='C'))

In [15]:
pipeline.evaluate(
    model='svm', 
    pca=20,
    search_space=svm_search_space
)



Best MRR: 1.5332357622152552e-05
Best Hyperparameters: ['sigmoid', 45.26733112854406]
MRR on test set: 1.5322789775107556e-05


### Decision Tree

##### Without Hyperparameter Optimization

In [6]:
pipeline.evaluate(
    model='dt', 
    pca=0
)

MRR: 0.002454529837434195


##### With Hyperparameter Optimization

In [18]:
decision_tree_search_space: list = []
decision_tree_search_space.append(Categorical(['gini', 'entropy'], name='criterion'))
decision_tree_search_space.append(Integer(2, 15, name='min_samples_split'))
decision_tree_search_space.append(Integer(1, 10, name='min_samples_leaf'))
decision_tree_search_space.append(Integer(5, 100, name='max_leaf_nodes'))
decision_tree_search_space.append(Integer(10, 50, name='max_depth'))
decision_tree_search_space.append(Real(0.0, 0.2, name='min_weight_fraction_leaf'))

In [19]:
pipeline.evaluate(
    model='dt', 
    pca=0,
    search_space=decision_tree_search_space
)

Best MRR: 1.5334924410731145e-05
Best Hyperparameters: ['entropy', 14, 4, 94, 26, 0.028655692414108577]


TypeError: Object of type int64 is not JSON serializable

### Random Forest

##### Without Hyperparameter Optimization

In [13]:
pipeline.evaluate(
    model='rf', 
    pca=0
)

MRR: 0.018799472370008098


### Adaptive Boosting

##### Without Hyperparameter Optimization

In [9]:
pipeline.evaluate(
    model='ada', 
    pca=10
)

MRR: 0.038329344268102784


### Results Overview

In [7]:
import pandas as pd
from src.utils.utils import load

results = load('data/results/results.pkl')
results

Unnamed: 0,name,model,hyperparameters,pairwise_model,pairwise_k,features,sampling_training,sampling_test,pca,MRR,MAP,nDCG,accuracy,precision,recall,f1,accuracy@50,precision@50,recall@50,f1@50
0,,LogisticRegression(),"{""C"": 1.0, ""class_weight"": null, ""dual"": false...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.03806,0.86509,0.859446,0.997135,0.993724,0.268969,0.423351,0.690667,0.993289,0.490608,0.656805
1,,LogisticRegression(),"{""C"": 1.0, ""class_weight"": null, ""dual"": false...",,,"[""pca_comp_0"", ""pca_comp_1"", ""pca_comp_2"", ""pc...",9977,451680,5,0.037385,0.884383,0.875099,0.996925,0.994751,0.214609,0.353051,0.642,0.994751,0.414661,0.585328
2,,LogisticRegression(),"{""C"": 1.0, ""class_weight"": null, ""dual"": false...",,,"[""pca_comp_0"", ""pca_comp_1"", ""pca_comp_2"", ""pc...",9977,451680,20,0.03917,0.869707,0.86706,0.99719,0.994036,0.283126,0.440723,0.712,0.993711,0.524917,0.686957
3,,LogisticRegression(),"{""C"": 1.0, ""class_weight"": null, ""dual"": false...",,,"[""pca_comp_0"", ""pca_comp_1"", ""pca_comp_2"", ""pc...",9977,451680,25,0.038758,0.870135,0.866985,0.997106,0.997831,0.260476,0.413112,0.685333,0.997701,0.479558,0.647761
4,,GaussianNB(),"{""priors"": null, ""var_smoothing"": 1e-09}",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.036072,0.874882,0.875631,0.997173,0.995943,0.278029,0.434706,0.718,0.995851,0.532741,0.694143
5,,GaussianNB(),"{""priors"": null, ""var_smoothing"": 1e-09}",,,"[""pca_comp_0"", ""pca_comp_1"", ""pca_comp_2"", ""pc...",9977,451680,20,0.021907,0.644065,0.800962,0.99721,0.890432,0.326727,0.478045,0.824,0.886288,0.730028,0.800604
6,,MLPClassifier(),"{""activation"": ""relu"", ""alpha"": 0.0001, ""batch...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.033422,0.763474,0.840705,0.99714,0.972112,0.276331,0.430335,0.742,0.971014,0.557007,0.707925
7,,DecisionTreeClassifier(),"{""ccp_alpha"": 0.0, ""class_weight"": null, ""crit...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.008408,0.287001,0.547333,0.996444,0.576775,0.340317,0.428063,0.729333,0.558342,0.998051,0.716084
8,,RandomForestClassifier(),"{""bootstrap"": true, ""ccp_alpha"": 0.0, ""class_w...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.018799,0.674797,0.791424,0.997452,0.992,0.351076,0.518611,0.859333,0.993056,0.734275,0.84428
9,,AdaBoostClassifier(),"{""algorithm"": ""SAMME.R"", ""base_estimator"": nul...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.032657,0.827516,0.854278,0.997219,0.992278,0.291053,0.450088,0.718667,0.991667,0.532438,0.692868
