In [None]:
from ltr import download
download();

In [None]:
import json

articles = []

with open('data/blog.jsonl') as f:
    for line in f:
        blog = json.loads(line)
        articles.append(blog)

articles[-7]

In [None]:
from ltr.client import ElasticClient
client=ElasticClient()

In [None]:
from ltr.index import rebuild
rebuild(client, index='blog', doc_type='post', doc_src=articles)

In [None]:
config = {
    "featureset": {
        "features": [
            {
                "name": "title_term_match",
                "params": ["keywords"],
                "template": {
                    "constant_score": {
                       "filter": {
                            "match": {
                                "title": "{{keywords}}"
                            }
                       },
                       "boost": 1.0
                    }
                }
            },
           {
                "name": "content_bm25",
                "params": ["keywords"],
                "template": {
                    "match": {
                       "content": {
                          "query": "{{keywords}}"
                        }
                    }
                }
            },
            {
                "name": "title_phrase_bm25",
                "params": ["keywords"],
                "template": {
                    "match_phrase": {
                       "title": "{{ keywords }}"
                    }
                }
            },
            {
                "name": "title_phrase_match",
                "params": ["keywords"],
                "template": {
                    "constant_score": {
                       "filter": {
                            "match_phrase": {
                                "title": "{{keywords}}"
                            }
                       },
                       "boost": 1.0
                    }
                }
            },
            
            {
                "name": "stepwise_post_date",
                "params": ["keywords"],
                "template": {
                  "function_score": {
                     "query": {
                        "match_all": {
                        }
                     },
                     "boost_mode": "replace",
                     "score_mode": "sum",
                     "functions": [
                        {
                            "filter": {
                                "range": {
                                    "post_date": {
                                        "gte": "now-180d"
                                    }
                                }
                            },
                            "weight": "100"               
                        },
                        {
                            "filter": {
                                "range": {
                                    "post_date": {
                                        "gte": "now-360d"
                                    }
                                }
                            },
                            "weight": "100"               
                        },
                          {
                            "filter": {
                                "range": {
                                    "post_date": {
                                        "gte": "now-90d"
                                    }
                                }
                            },
                            "weight": "100"               
                        }

                     ]
                  }
                }
            },
            {
                "name": "category_phrase_bm25",
                "params": ["keywords"],
                "template": {
                    "match_phrase": {
                       "categories": "{{ keywords }}"
                    }
                }
            },
            {
                "name": "excerpt_bm25",
                "params": ["keywords"],
                "template": {
                    "match": {
                       "excerpt": "{{ keywords }}"
                    }
                }
            },
            {
                "name": "excerpt_phrase_bm25",
                "params": ["keywords"],
                "template": {
                    "match_phrase": {
                       "excerpt": "{{ keywords }}"
                    }
                }
            },
        ]
    },
    "validation": {
      "index": "blog",
      "params": {
          "keywords": "rambo"
      }

   }
}


from ltr import setup
setup(client, config=config, index='blog', featureset='test')

In [None]:
from ltr.log import judgments_to_training_set
trainingSet = judgments_to_training_set(client,
                                        index='blog',
                                        judgmentInFile='data/osc_judgments.txt', 
                                        trainingOutFile='data/osc_judgments_train.txt', 
                                        featureSet='test')

In [None]:
from ltr.train import train
trainLog = train(client,
                 trainingInFile='data/osc_judgments_train.txt',
                 metric2t='NDCG@10',
                 featureSet='test',
                 index='blog',
                 modelName='test')

print("Train NDCG@10 %s" % trainLog.rounds[-1])

In [None]:
blog_fields = {
    'title': 'title',
    'display_fields': ['url', 'author', 'categories', 'post_date']
}

from ltr import search
search(client, "beer", modelName='test', 
       index='blog', fields=blog_fields)