# Sandbox
This notebook covers the main steps in deploying a model.  Experimentation with features is encouraged.  Can you increase the NDCG reported during model training?

## Setup Client

In [None]:
from ltr.client import ElasticClient
client = ElasticClient()

## Step 1 - Create a Feature Set

In [None]:
'''
  TASK:
  Experiment with this featureset, the judgments are all title searches so what features could be useful?
  
  Ideas:
    - Search other fields
    - Phrase matches
    - Fuzzy matches
'''

client.reset_ltr(index='tmdb')

config = {
   "featureset": {
        "features": [
            {
                "name": "title_match",
                "params": ["keywords"],
                "template": {
                    "match": {
                        "title": "{{keywords}}"
                    }
                }
            }
        ]
    }
}

client.create_featureset(index='tmdb', name='sandbox', ftr_config=config)

## Step 2 - Log Features for Training

In [None]:
from ltr.log import FeatureLogger
from ltr.judgments import judgments_open
from itertools import groupby

ftr_logger=FeatureLogger(client, index='tmdb', feature_set='sandbox')
with judgments_open('data/title_judgments.txt') as judgment_list:
    for qid, query_judgments in groupby(judgment_list, key=lambda j: j.qid):
        ftr_logger.log_for_qid(judgments=query_judgments, 
                               qid=qid,
                               keywords=judgment_list.keywords(qid))


## Step 3 - Train a Model

In [None]:
'''
  TASK:
  Experiment with the leafs and trees variables, how do they affect NGCG?
  Does a high leaf value increase your NDCG?  What could be the potential downfalls?
'''
from ltr.ranklib import train
trainResponse  = train(client,
                  index='tmdb',
                  training_set=ftr_logger.logged,
                  metric2t='NDCG@10',
                  leafs=20,
                  trees=20,
                  featureSet='sandbox',
                  modelName='sandbox')

trainLog = trainResponse.trainingLogs[0]
print()
print("Impact of each feature on the model")
for ftrId, impact in trainLog.impacts.items():
    print("{} - {}".format(client.get_feature_name(config, ftrId), impact))
    
for roundDcg in trainLog.rounds:
    print(roundDcg)
    
print("Train NDCG@10 %s" % trainLog.rounds[-1])

## Search

In [None]:
from ltr import search
search(client, "rambo", modelName='sandbox')