In [None]:
import requests
import sys
sys.path.append('..')

import numpy as np

# Load judgments & pointwise training set

Load the dataset generated [from the previous section](http://localhost:8888/notebooks/ch10/3.ch10-pairwise-transform.ipynb).

In [None]:
from ltr.judgments import judgments_open

pointwise_predictors = np.load('data/pointwise_predictors.npy')
feature_data = np.load('data/feature_data.npy')

std_devs = feature_data[-1]
means = feature_data[-2]
pointwise_features = feature_data[:-2]

normed_judgments = []
with judgments_open('data/normed_judgments.txt') as judg_list:
    for j in judg_list:
        normed_judgments.append(j)

In [None]:
import numpy as np
from ltr.judgments import judgments_from_file, judgments_to_nparray

def pairwise_transform(normed_judgments):
        
    from itertools import groupby
    pointwise_predictors = []
    pointwise_features = []
    
    # For each query's judgments
    for qid, query_judgments in groupby(normed_judgments, key=lambda j: j.qid):

        # Annoying issue consuming python iterators, we ensure we have two
        # full copies of each query's judgments
        query_judgments_copy_1 = list(query_judgments) 
        query_judgments_copy_2 = list(query_judgments_copy_1)

        # Examine every judgment combo for this query, 
        # if they're different, store the pairwise difference:
        # +1 if judgment1 more relevant
        # -1 if judgment2 more relevant
        for judgment1 in query_judgments_copy_1:
            for judgment2 in query_judgments_copy_2:
                
                j1_features=np.array(judgment1.features)
                j2_features=np.array(judgment2.features)
                
                if judgment1.grade > judgment2.grade:
                    pointwise_predictors.append(+1)
                    pointwise_features.append(j1_features-j2_features)
                elif judgment1.grade < judgment2.grade:
                    pointwise_predictors.append(-1)
                    pointwise_features.append(j1_features-j2_features)

    return np.array(pointwise_features), np.array(pointwise_predictors)

## Listing 10.16

Train the model with the fully transformed dataset

In [None]:
from sklearn import svm
model = svm.LinearSVC(max_iter=10000, verbose=1)
model.fit(pointwise_features, pointwise_predictors)
model.coef_

## Features to eval model on some movies...

In [None]:
# If you wanted to confirm Wrath of Khans features
import requests

logging_solr_query = {
    "fl": "id,title,[features store=movies efi.keywords=\"wrath of khan\"]",
    'q': "id:154", #social network graded documents
    'rows': 10,
    'wt': 'json'  
}

resp = requests.post('http://aips-solr:8983/solr/tmdb/select',
                     data=logging_solr_query)

# Features Solr returns
# Wrath of Khan
wok_features = [5.9217176, 3.401492, 1982.0]
# Search For Spock
spock_features = [0.0,0.0,1984.0]

# Wrath of Khan
normed_wok_features = [0,0,0]
for idx, f in enumerate(wok_features):
    normed_wok_features[idx] = (f - means[idx]) / std_devs[idx]

normed_spock_features = [0,0,0]
for idx, f in enumerate(spock_features):
    normed_spock_features[idx] = (f - means[idx]) / std_devs[idx]
    
normed_spock_features

# Taking the model for test drive...

Here we score a few documents with the model. This code is omitted from the book, but is explored in section 10.6.2

In [None]:
def score_one(features, model):
    score = 0.0
    for idx, f in enumerate(features):
        this_coef = model.coef_[0][idx].item()
        score += f * this_coef
    
    return score

def rank(query_judgments, model):
    for j in query_judgments:
        j.score = score_one(j.features, model)
    
    return sorted(query_judgments, key=lambda j: j.score, reverse=True)

score_one(normed_spock_features, model)

Wrath of Khan should score higher

In [None]:
score_one(normed_wok_features, model)

# Listing 10.17 Test Training Split

In [None]:
import random

all_qids = list(set([j.qid for j in normed_judgments]))
random.shuffle(all_qids)

proportion_train=0.1

test_train_split_idx = int(len(all_qids) * proportion_train)
test_qids=all_qids[:test_train_split_idx]
train_qids=all_qids[test_train_split_idx:]

test_qids,train_qids

train_data = []; test_data=[]
for j in normed_judgments:
    if j.qid in train_qids:
        train_data.append(j)
    elif j.qid in test_qids:
        test_data.append(j)

# Listing 10.18 - train on just train data

In [None]:
train_data_features, train_data_predictors = pairwise_transform(train_data)

from sklearn import svm
model = svm.LinearSVC(max_iter=10000, verbose=1)
model.fit(train_data_features, train_data_predictors)
model.coef_

## Listing 10.19 - eval model on test data

In [None]:
def eval_model(test_data, model):
    from itertools import groupby
    
    tot_prec = 0
    num_queries = 0

    for qid, query_judgments in groupby(test_data, key=lambda j: j.qid):
        query_judgments = list(query_judgments)

        ranked = rank(query_judgments, model)

        tot_relevant = 0
        for j in ranked[:4]:
            if j.grade == 1:
                tot_relevant += 1
        query_prec = tot_relevant/4.0
        tot_prec += query_prec
        num_queries += 1
    
    return tot_prec / num_queries

eval_model(test_data, model)

# Listing 10.20 - Solr model

This turns the model into one usable by Solr

In [None]:
import json

linear_model = {
  "store": "movies",
  "class": "org.apache.solr.ltr.model.LinearModel",
  "name": "movie_model",
  "features": [
  ],
  "params": {
      "weights": {
      }
  }
}

import math
ftr_model = {}
ftr_names = ['title_bm25', 'overview_bm25', 'release_year']
for idx, ftr_name in enumerate(ftr_names):
    config = {
        "name": ftr_name,
        "norm": {
            "class": "org.apache.solr.ltr.norm.StandardNormalizer",
            "params": {
                "avg": str(means[idx]),
                "std": str(std_devs[idx])
            }
        }
    }
    linear_model['features'].append(config)
    linear_model['params']['weights'][ftr_name] =  model.coef_[0][idx] 

print("PUT http://aips-solr:8983/solr/tmdb/schema/model-store")
print(json.dumps(linear_model, indent=2))

# Upload the model
requests.put('http://aips-solr:8983/solr/tmdb/schema/model-store', json=linear_model)

# Listing 10.21 - Solr query w/ model

In [None]:
request = {
    "fields": ["title", "id", "score"],
    "limit": 5,
    "params": {
      "q": "{!ltr reRankDocs=60000 model=movie_model efi.keywords=\"harry potter\"}",
     
    }
}

resp = requests.post('http://aips-solr:8983/solr/tmdb/select', json=request)

resp.json()["response"]["docs"]

# Listing 10.22 - Solr query w/ model and reranking

In [None]:
request = {
    "fields": ["title", "id", "score"],
    "limit": 5,
    "params": {
      "rq": "{!ltr reRankDocs=500 model=movie_model efi.keywords=\"harry potter\"}",
      "qf": "title overview",
      "defType": "edismax",
      "q": "harry potter"
    }
}

resp = requests.post('http://aips-solr:8983/solr/tmdb/select', json=request)

resp.json()["response"]["docs"]