In [None]:
import requests
import json

from ltr.client import OpenSearchClient
client = OpenSearchClient()

host = client.get_host()
port = 9201

In [None]:
# Rebuild the index if necessary
from ltr import download
from ltr.index import rebuild
from ltr.helpers.movies import indexable_movies

corpus='http://es-learn-to-rank.labs.o19s.com/tmdb.json'
download([corpus], dest='data/');

movies=indexable_movies(movies='data/tmdb.json')
rebuild(client, index='tmdb', doc_src=movies)

## Init Default Feature Store
The feature store can be removed by sending a DELETE request to `_ltr` endpoint.

In [None]:
url = f'http://{host}:{port}/_ltr/'
print(url)
requests.delete(url)

To initialize the LTR plugin, issue a PUT request to the `_ltr` endpoint.

In [None]:
print(url)
requests.put(url)

## Create Feature Set

A feature set can be created by issuing a PUT to `_ltr/featureset/[feature_name]`

In [None]:
feature_set = {
   "featureset": {
      "features": [
         {
            "name": "title_bm25",
            "params": [
               "keywords"
            ],
            "template": {
                     "match": {
                        "title": "{{keywords}}"
                     }
               }
         },
         {
            "name": "overview_bm25",
            "params": [
               "keywords"
            ],
            "template": {
                     "match": {
                        "overview": "{{keywords}}"
                     }
               }
         }
      ]
   },
     "validation": {
      "index": "tmdb",
      "params": {
         "keywords": "rambo"
      }
   }
}

feature_set_url = f'{url}_featureset/my_feature_set'
print(feature_set_url)
requests.put(feature_set_url, json=feature_set)


## Log Some Judged Queries To Build Training Set

If we have 4 judged documents: 7555,1370, 1369, and 1368 for keywords rambo:

```
doc_id, relevant?, keywords
1368, 1, rambo
1369, 1, rambo
1370, 1, rambo
7555, 0, rambo
```


We need to get feature value for each row.

To do this, we utilize the logging extension to populate the judgment list with features for training.

In [None]:
search_with_log = {
  "query": {
    "bool": {
      "filter": [
        {
          "sltr": {
            "_name": "logged_features",
            "featureset": "my_feature_set",
            "params": {
              "keywords": "rambo"
            }
          }
        },
         {
          "terms": {
            "_id": [
              "7555","1370", "1369", "1368"
            ]
          }
        }
      ]
    }
  },
  "ext": {
    "ltr_log": {
      "log_specs": {
        "name": "ltr_features",
        "named_query": "logged_features"
      }
    }
  }
}

search_with_log_url = f'http://{host}:{port}/tmdb/_search'
print(search_with_log_url)
resp = requests.get(search_with_log_url, json=search_with_log).json()
print(json.dumps(resp['hits']['hits'][0], indent=2))


## Training Set Now...



```
doc_id, relevant?, keywords, title_bm25, overview_bm25
1368, 1, rambo, 0, 11.113943
1369, 1, rambo, 11.657, 10.08
1370, 1, rambo, 9.456, 13.265
7555, 0, rambo, 6.037, 11.114
```



# Train a model

We won't do this here, but if you like you can try out training a model using Ranklib 

```
cd notebooks/elasticsearch/tmdb
java -jar data/RankyMcRankFace.jar -train data/title_judgments.txt -save data/model.txt

```

## Uploading a Model
Once features have been logged and training data has been generated, a model can be pushed into OpenSearch.  The following shows what a request to PUT a new model looks like.

In [None]:
model = """## LambdaMART
## No. of trees = 10
## No. of leaves = 10
## No. of threshold candidates = 256
## Learning rate = 0.1
## Stop early = 100

<ensemble>
	<tree id="1" weight="0.1">
		<split>
			<feature> 2 </feature>
			<threshold> 10.664251 </threshold>
			<split pos="left">
				<feature> 1 </feature>
				<threshold> 0.0 </threshold>
				<split pos="left">
					<output> -1.8305741548538208 </output>
				</split>
				<split pos="right">
					<feature> 2 </feature>
					<threshold> 9.502127 </threshold>
					<split pos="left">
						<feature> 1 </feature>
						<threshold> 7.0849166 </threshold>
						<split pos="left">
							<output> 0.23645669221878052 </output>
						</split>
						<split pos="right">
							<output> 1.7593677043914795 </output>
						</split>
					</split>
					<split pos="right">
						<output> 1.9719607830047607 </output>
					</split>
				</split>
			</split>
			<split pos="right">
				<feature> 2 </feature>
				<threshold> 0.0 </threshold>
				<split pos="left">
					<output> 1.3728954792022705 </output>
				</split>
				<split pos="right">
					<feature> 2 </feature>
					<threshold> 8.602512 </threshold>
					<split pos="left">
						<feature> 1 </feature>
						<threshold> 0.0 </threshold>
						<split pos="left">
							<feature> 2 </feature>
							<threshold> 13.815164 </threshold>
							<split pos="left">
								<output> 1.9401178359985352 </output>
							</split>
							<split pos="right">
								<output> 1.99532949924469 </output>
							</split>
						</split>
						<split pos="right">
							<feature> 1 </feature>
							<threshold> 11.085816 </threshold>
							<split pos="left">
								<output> 2.0 </output>
							</split>
							<split pos="right">
								<output> 1.99308180809021 </output>
							</split>
						</split>
					</split>
					<split pos="right">
						<output> 1.9870178699493408 </output>
					</split>
				</split>
			</split>
		</split>
	</tree>
</ensemble>
"""


create_model = {
  "model": {
     "name": "my_model",
     "model": {
         "type": "model/ranklib",
         "definition": model
    }
  }
}

url = 'http://{}:9201/_ltr/_featureset/my_feature_set/_createmodel'.format(host)
print(url)
requests.post(url, json=create_model).json()

## Searching with a Model
Now that a model has been uploaded to Elasticsearch we can use it to re-rank the results of a query.

In [None]:
search = {
  "query": {
      "sltr": {
          "params": {
              "keywords": "rambo"
          },
          "model": "my_model"
      }
  }
}

url = 'http://{}:9201/tmdb/_search'.format(host)
resp = requests.get(url, json=search).json()

In [None]:
print(url)
for hit in resp['hits']['hits']:
    print(hit['_source']['title'])
