diff --git a/docs/src/apiref.rst b/docs/src/apiref.rst
index 29f91d9f47..a7d3b19ded 100644
--- a/docs/src/apiref.rst
+++ b/docs/src/apiref.rst
@@ -68,6 +68,11 @@ Modules:
     models/deprecated/keyedvectors
     models/deprecated/fasttext_wrapper
     models/base_any2vec
+    models/experimental/drmm_tks
+    models/experimental/custom_callbacks
+    models/experimental/custom_layers
+    models/experimental/custom_losses
+    models/experimental/evaluation_metrics
     similarities/docsim
     similarities/index
     sklearn_api/atmodel
diff --git a/docs/src/models/experimental/custom_callbacks.rst b/docs/src/models/experimental/custom_callbacks.rst
new file mode 100644
index 0000000000..4fdf371992
--- /dev/null
+++ b/docs/src/models/experimental/custom_callbacks.rst
@@ -0,0 +1,9 @@
+:mod:`models.experimental.custom_callbacks` -- Custom Callbacks for Similarity Learning
+=======================================================================================
+
+.. automodule:: gensim.models.experimental.custom_callbacks
+    :synopsis: Custom Callbacks for Similarity Learning
+    :members:
+    :inherited-members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/src/models/experimental/custom_layers.rst b/docs/src/models/experimental/custom_layers.rst
new file mode 100644
index 0000000000..51cc70f63e
--- /dev/null
+++ b/docs/src/models/experimental/custom_layers.rst
@@ -0,0 +1,9 @@
+:mod:`models.experimental.custom_layers` -- Custom Layers for Similarity Learning
+=================================================================================
+
+.. automodule:: gensim.models.experimental.custom_layers
+    :synopsis: Custom Layers for Similarity Learning
+    :members:
+    :inherited-members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/src/models/experimental/custom_losses.rst b/docs/src/models/experimental/custom_losses.rst
new file mode 100644
index 0000000000..f59afcfaa5
--- /dev/null
+++ b/docs/src/models/experimental/custom_losses.rst
@@ -0,0 +1,9 @@
+:mod:`models.experimental.custom_losses` -- Loss for Similarity Learning
+========================================================================
+
+.. automodule:: gensim.models.experimental.custom_losses
+    :synopsis: Loss functions for Similarity Learning
+    :members:
+    :inherited-members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/src/models/experimental/drmm_tks.rst b/docs/src/models/experimental/drmm_tks.rst
new file mode 100644
index 0000000000..d569eac61c
--- /dev/null
+++ b/docs/src/models/experimental/drmm_tks.rst
@@ -0,0 +1,9 @@
+:mod:`models.experimental.drmm_tks` -- Neural Nets for Similarity Learning
+==========================================================================
+
+.. automodule:: gensim.models.experimental.drmm_tks
+    :synopsis: Neural Network Similarity Learning
+    :members:
+    :inherited-members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/src/models/experimental/evaluation_metrics.rst b/docs/src/models/experimental/evaluation_metrics.rst
new file mode 100644
index 0000000000..2d47acd9c7
--- /dev/null
+++ b/docs/src/models/experimental/evaluation_metrics.rst
@@ -0,0 +1,9 @@
+:mod:`models.experimental.evaluation_metrics` -- Evaluation Metrics for Similarity Learning
+===========================================================================================
+
+.. automodule:: gensim.models.experimental.evaluation_metrics
+    :synopsis: Evaluation Metrics for Similarity Learning
+    :members:
+    :inherited-members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/gensim/models/__init__.py b/gensim/models/__init__.py
index 4114724027..5e48318a9e 100644
--- a/gensim/models/__init__.py
+++ b/gensim/models/__init__.py
@@ -24,6 +24,7 @@
 
 from . import wrappers  # noqa:F401
 from . import deprecated  # noqa:F401
+from . import experimental  # noqa:F401
 
 from gensim import interfaces, utils
 
diff --git a/gensim/models/experimental/UI_Example.ipynb b/gensim/models/experimental/UI_Example.ipynb
new file mode 100644
index 0000000000..25c9e0a658
--- /dev/null
+++ b/gensim/models/experimental/UI_Example.ipynb
@@ -0,0 +1,795 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Getting the dataset\n",
+    "A script has been provided to download all the datasets required for running the below examples.\n",
+    "It will dowload and unzip the WikiQA Corpus and the Quora Duplicate Questions dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!python experimental_data/get_data.py"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Installing dependencies for running the Similarity Learning task"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n",
+      "2018-07-06 00:34:19,104 : INFO : 'pattern' package not found; tag filters are not available for English\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import csv\n",
+    "import re\n",
+    "from gensim.models.experimental import DRMM_TKS\n",
+    "from gensim.utils import simple_preprocess"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Format"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We have to provide data in a format which is understood by the model.\n",
+    "The model understands sentences as a list of words. \n",
+    "Further, we need to give a :\n",
+    " 1. Queries List\n",
+    " 2. Candidate Document List\n",
+    " 3. Correct Label List\n",
+    "\n",
+    "1 is a list of list of words\n",
+    "2 and 3 is actually a list of list of list of words/ints\n",
+    "\n",
+    "Example:\n",
+    "```\n",
+    "queries = [\"When was Abraham Lincoln born ?\".split(), \n",
+    "            \"When was the first World War ?\".split()]\n",
+    "docs = [\n",
+    "\t\t [\"Abraham Lincoln was the president of the United States of America\".split(),\n",
+    "\t\t \"He was born in 1809\".split()],\n",
+    "\t\t [\"The first world war was bad\".split(),\n",
+    "\t\t \"It was fought in 1914\".split(),\n",
+    "\t\t \"There were over a million deaths\".split()]\n",
+    "       ]\n",
+    "labels = [[0,\n",
+    "           1],\n",
+    "\t\t  [0,\n",
+    "           1,\n",
+    "           0]\n",
+    "          ]\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## About the dataset : WikiQA\n",
+    "\n",
+    "The WikiQA corpus is a set of question-answer pairs in which for every query there are several candidate documents of which none, one or more documents might be relevant.\n",
+    "Relevance is purely binary, i.e., 1: relavant, 0: not relevant\n",
+    "\n",
+    "Sample data:\n",
+    "\n",
+    "QuestionID | Question | DocumentID | DocumentTitle | SentenceID | Sentence | Label\n",
+    "-- | -- | -- | -- | -- | -- | --\n",
+    "Q1 | how are glacier caves formed? | D1 | Glacier cave | D1-0 | A partly submerged glacier cave on Perito Moreno Glacier . | 0\n",
+    "Q1 | how are glacier caves formed? | D1 | Glacier cave | D1-1 | The ice facade is approximately 60 m high | 0\n",
+    "Q1 | how are glacier caves formed? | D1 | Glacier cave | D1-2 | Ice formations in the Titlis glacier cave | 0\n",
+    "Q1 | how are glacier caves formed? | D1 | Glacier cave | D1-3 | A glacier cave is a cave formed within the ice of a glacier . | 1\n",
+    "Q1 | how are glacier caves formed? | D1 | Glacier cave | D1-4 | Glacier caves are often called ice caves , but this term is properly used to describe bedrock caves that contain year-round ice. | 0\n",
+    "Q2 | How are the directions of the velocity and force vectors related in a circular motion | D2 | Circular motion | D2-0 | In physics , circular motion is a movement of an object along the circumference of a circle or rotation along a circular path. | 0\n",
+    "Q2 | How are the directions of the velocity and force vectors related in a circular motion | D2 | Circular motion | D2-1 | It can be uniform, with constant angular rate of rotation (and constant speed), or non-uniform with a changing rate of rotation. | 0\n",
+    "Q2 | How are the directions of the velocity and force vectors related in a circular motion | D2 | Circular motion | D2-2 | The rotation around a fixed axis of a three-dimensional body involves circular motion of its parts. | 0\n",
+    "Q2 | How are the directions of the velocity and force vectors related in a circular motion | D2 | Circular motion | D2-3 | The equations of motion describe the movement of the center of mass of a body. | 0\n",
+    "Q2 | How are the directions of the velocity and force vectors related in a circular motion | D2 | Circular motion | D2-4 | Examples of circular motion include: an artificial satellite orbiting the Earth at constant height, a stone which is tied to a rope and is being swung in circles, a car turning through a curve in a race track , an electron moving perpendicular to a uniform magnetic field , and a gear turning inside a mechanism. | 0\n",
+    "Q2 | How are the directions of the velocity and force vectors related in a circular motion | D2 | Circular motion | D2-5 | Since the object's velocity vector is constantly changing direction, the moving object is undergoing acceleration by a centripetal force in the direction of the center of rotation. | 0\n",
+    "Q2 | How are the directions of the velocity and force vectors related in a circular motion | D2 | Circular motion | D2-6 | Without this acceleration, the object would move in a straight line, according to Newton's laws of motion . | 0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Preprocessing\n",
+    "We need to take the above text and make it into `queries, docs, labels` form. For this, we will create an iterable object with the below class which will allow the data to be streamed into the model as the need arises."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MyWikiIterable:\n",
+    "    \"\"\"\"Yields the next data point in the data set based on the `iter_type`\n",
+    "    \n",
+    "    Based on `iter_type` the object can yield the following:\n",
+    "        'query' : list of str words\n",
+    "        'doc' : list of docs\n",
+    "                    where a doc is a list of str words\n",
+    "        'label' : list of int\n",
+    "                  The relevance between adjacent queries and docs\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(self, iter_type, fpath):\n",
+    "        \"\"\"\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        iter_type : {'query', 'doc', 'label'}\n",
+    "            the type of iterable to be yielded\n",
+    "        fpath : str\n",
+    "            path to the dataset\n",
+    "        \"\"\"\n",
+    "\n",
+    "        # To map the `iter_type` to an index\n",
+    "        self.type_translator = {'query': 0, 'doc': 1, 'label': 2}\n",
+    "        self.iter_type = iter_type\n",
+    "\n",
+    "        with open(fpath, encoding='utf8') as tsv_file:\n",
+    "            tsv_reader = csv.reader(tsv_file, delimiter='\\t', quoting=csv.QUOTE_NONE)\n",
+    "            self.data_rows = []\n",
+    "            self.data_rows = [row for row in tsv_reader]\n",
+    "\n",
+    "    def preprocess_sent(self, sent):\n",
+    "        \"\"\"Utility function to lower, strip and tokenize each sentence\n",
+    "        Replace this function if you want to handle preprocessing differently\"\"\"\n",
+    "\n",
+    "        return simple_preprocess(sent)\n",
+    "\n",
+    "    def __iter__(self):\n",
+    "        # Defining some consants for .tsv reading\n",
+    "        # They represent the columns of the respective values\n",
+    "        QUESTION_ID_INDEX = 0\n",
+    "        QUESTION_INDEX = 1\n",
+    "        ANSWER_INDEX = 5\n",
+    "        LABEL_INDEX = 6\n",
+    "\n",
+    "\n",
+    "        # The group of documents and labels that belong to one question\n",
+    "        document_group = []\n",
+    "        label_group = []\n",
+    "\n",
+    "        # Number of relevant documents per query\n",
+    "        n_relevant_docs = 0\n",
+    "        # Number of filtered docs (query-doc pairs which have zero relevant docs)\n",
+    "        n_filtered_docs = 0\n",
+    "\n",
+    "        # The data\n",
+    "        queries = []\n",
+    "        docs = []\n",
+    "        labels = []\n",
+    "\n",
+    "        # The code below goes through the data line by line\n",
+    "        # It checks the current document id with the next document id\n",
+    "        for i, line in enumerate(self.data_rows[1:], start=1):\n",
+    "            if i < len(self.data_rows) - 1:  # check if out of bounds might occur\n",
+    "                if self.data_rows[i][QUESTION_ID_INDEX] == self.data_rows[i + 1][QUESTION_ID_INDEX]:\n",
+    "                    document_group.append(self.preprocess_sent(self.data_rows[i][ANSWER_INDEX]))\n",
+    "                    label_group.append(int(self.data_rows[i][LABEL_INDEX]))\n",
+    "                    n_relevant_docs += int(self.data_rows[i][LABEL_INDEX])\n",
+    "                else:\n",
+    "                    document_group.append(self.preprocess_sent(self.data_rows[i][ANSWER_INDEX]))\n",
+    "                    label_group.append(int(self.data_rows[i][LABEL_INDEX]))\n",
+    "\n",
+    "                    n_relevant_docs += int(self.data_rows[i][LABEL_INDEX])\n",
+    "\n",
+    "                    if n_relevant_docs > 0:\n",
+    "                        docs.append(document_group)\n",
+    "                        labels.append(label_group)\n",
+    "                        queries.append(self.preprocess_sent(self.data_rows[i][QUESTION_INDEX]))\n",
+    "\n",
+    "                        yield [queries[-1], document_group, label_group][self.type_translator[self.iter_type]]\n",
+    "                    else:\n",
+    "                        n_filtered_docs += 1\n",
+    "\n",
+    "                    n_relevant_docs = 0\n",
+    "                    document_group = []\n",
+    "                    label_group = []\n",
+    "\n",
+    "            else:\n",
+    "                # If we are on the last line\n",
+    "                document_group.append(self.preprocess_sent(self.data_rows[i][ANSWER_INDEX]))\n",
+    "                label_group.append(int(self.data_rows[i][LABEL_INDEX]))\n",
+    "                n_relevant_docs += int(self.data_rows[i][LABEL_INDEX])\n",
+    "\n",
+    "                if n_relevant_docs > 0:\n",
+    "                    docs.append(document_group)\n",
+    "                    labels.append(label_group)\n",
+    "                    queries.append(self.preprocess_sent(self.data_rows[i][QUESTION_INDEX]))\n",
+    "                    yield [queries[-1], document_group, label_group][self.type_translator[self.iter_type]]\n",
+    "                else:\n",
+    "                    n_filtered_docs += 1\n",
+    "                    n_relevant_docs = 0\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, will use the class to create objects of the training iterable"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "q_iterable = MyWikiIterable('query', os.path.join( 'experimental_data', 'WikiQACorpus', 'WikiQA-train.tsv'))\n",
+    "d_iterable = MyWikiIterable('doc', os.path.join('experimental_data', 'WikiQACorpus', 'WikiQA-train.tsv'))\n",
+    "l_iterable = MyWikiIterable('label', os.path.join('experimental_data', 'WikiQACorpus', 'WikiQA-train.tsv'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will also initialize some validation iterables\n",
+    "Note: the path has `dev` in it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "q_val_iterable = MyWikiIterable('query', os.path.join( 'experimental_data', 'WikiQACorpus', 'WikiQA-dev.tsv'))\n",
+    "d_val_iterable = MyWikiIterable('doc', os.path.join('experimental_data', 'WikiQACorpus', 'WikiQA-dev.tsv'))\n",
+    "l_val_iterable = MyWikiIterable('label', os.path.join('experimental_data', 'WikiQACorpus', 'WikiQA-dev.tsv'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Using word embeddings\n",
+    "We also need to get the word embeddings for the training. For this, we will use the Glove Embeddings.\n",
+    "Luckily, [gensim-data](https://github.com/RaRe-Technologies/gensim-data) provides an easy interface for it.\n",
+    "\n",
+    "We will use the [KeyedVectors](https://radimrehurek.com/gensim/models/keyedvectors.html) object that we for from gensim-data api and pass it as the `word_embedding` parameter in the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2018-07-06 00:34:23,010 : INFO : loading projection weights from /home/aneeshj/gensim-data/glove-wiki-gigaword-300/glove-wiki-gigaword-300.gz\n",
+      "2018-07-06 00:36:07,145 : INFO : loaded (400000, 300) matrix from /home/aneeshj/gensim-data/glove-wiki-gigaword-300/glove-wiki-gigaword-300.gz\n"
+     ]
+    }
+   ],
+   "source": [
+    "import gensim.downloader as api\n",
+    "kv_model = api.load(\"glove-wiki-gigaword-300\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training the Model\n",
+    "Now that we have the preprocessed extracted data and word embeddings, training the model just takes one line:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2018-07-06 00:36:07,151 : INFO : Starting Vocab Build\n",
+      "2018-07-06 00:36:08,602 : INFO : Vocab Build Complete\n",
+      "2018-07-06 00:36:08,603 : INFO : Vocab Size is 18814\n",
+      "2018-07-06 00:36:08,605 : INFO : Building embedding index using KeyedVector pretrained word embeddings\n",
+      "2018-07-06 00:36:08,605 : INFO : The embeddings_index built from the given file has 400000 words of 300 dimensions\n",
+      "2018-07-06 00:36:08,606 : INFO : Building the Embedding Matrix for the model's Embedding Layer\n",
+      "2018-07-06 00:36:08,836 : INFO : There are 642 words out of 18814 (3.41%) not in the embeddings. Setting them to random\n",
+      "2018-07-06 00:36:08,836 : INFO : Adding additional words from the embedding file to embedding matrix\n",
+      "2018-07-06 00:36:10,775 : INFO : Normalizing the word embeddings\n",
+      "2018-07-06 00:36:59,403 : INFO : Embedding Matrix build complete. It now has shape (400644, 300)\n",
+      "2018-07-06 00:37:06,320 : INFO : Pad word has been set to index 400642\n",
+      "2018-07-06 00:37:06,815 : INFO : Unknown word has been set to index 400643\n",
+      "2018-07-06 00:37:06,901 : INFO : Embedding index build complete\n",
+      "2018-07-06 00:37:22,881 : INFO : Input is an iterable amd will be streamed\n",
+      "2018-07-06 00:38:24,108 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,203 : INFO : Layer (type)                    Output Shape         Param #     Connected to                     \n",
+      "2018-07-06 00:38:24,236 : INFO : ==================================================================================================\n",
+      "2018-07-06 00:38:24,342 : INFO : query (InputLayer)              (None, 200)          0                                            \n",
+      "2018-07-06 00:38:24,343 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,343 : INFO : doc (InputLayer)                (None, 200)          0                                            \n",
+      "2018-07-06 00:38:24,344 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,345 : INFO : embedding_1 (Embedding)         (None, 200, 300)     120193200   query[0][0]                      \n",
+      "2018-07-06 00:38:24,346 : INFO :                                                                  doc[0][0]                        \n",
+      "2018-07-06 00:38:24,346 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,432 : INFO : dot_1 (Dot)                     (None, 200, 200)     0           embedding_1[0][0]                \n",
+      "2018-07-06 00:38:24,455 : INFO :                                                                  embedding_1[1][0]                \n",
+      "2018-07-06 00:38:24,456 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,478 : INFO : top_k_layer_1 (TopKLayer)       (None, 200, 20)      0           dot_1[0][0]                      \n",
+      "2018-07-06 00:38:24,479 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,496 : INFO : dense_2 (Dense)                 (None, 200, 100)     2100        top_k_layer_1[0][0]              \n",
+      "2018-07-06 00:38:24,517 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,518 : INFO : dense_3 (Dense)                 (None, 200, 1)       101         dense_2[0][0]                    \n",
+      "2018-07-06 00:38:24,519 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,553 : INFO : dropout_1 (Dropout)             (None, 200, 1)       0           dense_3[0][0]                    \n",
+      "2018-07-06 00:38:24,554 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,554 : INFO : dense_1 (Dense)                 (None, 200, 1)       301         embedding_1[0][0]                \n",
+      "2018-07-06 00:38:24,555 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,555 : INFO : reshape_2 (Reshape)             (None, 200)          0           dropout_1[0][0]                  \n",
+      "2018-07-06 00:38:24,556 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,557 : INFO : reshape_1 (Reshape)             (None, 200)          0           dense_1[0][0]                    \n",
+      "2018-07-06 00:38:24,557 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,558 : INFO : dot_2 (Dot)                     (None, 1)            0           reshape_2[0][0]                  \n",
+      "2018-07-06 00:38:24,575 : INFO :                                                                  reshape_1[0][0]                  \n",
+      "2018-07-06 00:38:24,576 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:24,577 : INFO : reshape_3 (Reshape)             (None, 1)            0           dot_2[0][0]                      \n",
+      "2018-07-06 00:38:24,577 : INFO : ==================================================================================================\n",
+      "2018-07-06 00:38:24,588 : INFO : Total params: 120,195,702\n",
+      "2018-07-06 00:38:24,588 : INFO : Trainable params: 2,502\n",
+      "2018-07-06 00:38:24,589 : INFO : Non-trainable params: 120,193,200\n",
+      "2018-07-06 00:38:24,590 : INFO : __________________________________________________________________________________________________\n",
+      "2018-07-06 00:38:33,655 : INFO : Found 14 unknown words. Set them to unknown word index : 400643\n",
+      "2018-07-06 00:38:53,729 : INFO : Found 90 unknown words. Set them to unknown word index : 400643\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/3\n",
+      "900/900 [==============================] - 85s 95ms/step - loss: 1.0646 - acc: 0.0197\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2018-07-06 00:41:17,729 : INFO : MAP: 0.55\n",
+      "2018-07-06 00:41:17,735 : INFO : nDCG@1 : 0.38\n",
+      "2018-07-06 00:41:17,740 : INFO : nDCG@3 : 0.54\n",
+      "2018-07-06 00:41:17,746 : INFO : nDCG@5 : 0.60\n",
+      "2018-07-06 00:41:17,751 : INFO : nDCG@10 : 0.66\n",
+      "2018-07-06 00:41:17,756 : INFO : nDCG@20 : 0.67\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 2/3\n",
+      "900/900 [==============================] - 84s 94ms/step - loss: 0.9310 - acc: 0.0321\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2018-07-06 00:42:46,586 : INFO : MAP: 0.61\n",
+      "2018-07-06 00:42:46,592 : INFO : nDCG@1 : 0.46\n",
+      "2018-07-06 00:42:46,597 : INFO : nDCG@3 : 0.61\n",
+      "2018-07-06 00:42:46,604 : INFO : nDCG@5 : 0.67\n",
+      "2018-07-06 00:42:46,616 : INFO : nDCG@10 : 0.71\n",
+      "2018-07-06 00:42:46,621 : INFO : nDCG@20 : 0.72\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 3/3\n",
+      "900/900 [==============================] - 85s 94ms/step - loss: 0.8035 - acc: 0.1486\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2018-07-06 00:44:15,788 : INFO : MAP: 0.62\n",
+      "2018-07-06 00:44:15,793 : INFO : nDCG@1 : 0.46\n",
+      "2018-07-06 00:44:15,800 : INFO : nDCG@3 : 0.60\n",
+      "2018-07-06 00:44:15,809 : INFO : nDCG@5 : 0.67\n",
+      "2018-07-06 00:44:15,815 : INFO : nDCG@10 : 0.71\n",
+      "2018-07-06 00:44:15,821 : INFO : nDCG@20 : 0.72\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Train the model\n",
+    "drmm_tks_model = DRMM_TKS(\n",
+    "                    queries=q_iterable, docs=d_iterable, labels=l_iterable, word_embedding=kv_model, epochs=3,\n",
+    "                    validation_data=[q_val_iterable, d_val_iterable, l_val_iterable], topk=20\n",
+    "                )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Testing the model on new data\n",
+    "\n",
+    "The testing of the data can be done on completely unseen data using `model.predict(queries, docs)` where\n",
+    "queries: list of list of words\n",
+    "docs: list of list of list of words"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "queries = [simple_preprocess(\"how are glacier caves formed\"),\n",
+    "           simple_preprocess(\"What is AWS\")]\n",
+    "\n",
+    "docs = [[simple_preprocess(\"A partly submerged glacier cave on Perito Moreno Glacier\"),\n",
+    "        simple_preprocess(\"A glacier cave is a cave formed within the ice of a glacier\")],\n",
+    "       [simple_preprocess(\"AWS stands for Amazon Web Services\"),\n",
+    "        simple_preprocess(\"AWS was established in 2001\"),\n",
+    "        simple_preprocess(\"It is a cloud service\")]]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The predict function returns the similarity between a query-document pair in a list format\n",
+    "\n",
+    "For example\n",
+    "```\n",
+    "queries = [q1, q2]\n",
+    "docs = [[d1_1, d1_2],\n",
+    "        [d2_1, d2_2, d2_3]]\n",
+    "\n",
+    "model.predict(queries, docs)\n",
+    "\n",
+    "Output\n",
+    "------\n",
+    "q1-d1_1 similarity\n",
+    "q1-d1_2 similarity\n",
+    "q2-d2_1 similarity\n",
+    "q2-d2_2 similarity\n",
+    "q2-d2_3 similarity\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2018-07-06 00:46:33,249 : INFO : Found 0 unknown words. Set them to unknown word index : 400643\n",
+      "2018-07-06 00:46:33,283 : INFO : Found 0 unknown words. Set them to unknown word index : 400643\n",
+      "2018-07-06 00:46:33,778 : INFO : Predictions in the format query, doc, similarity\n",
+      "2018-07-06 00:46:33,800 : INFO : ['how', 'are', 'glacier', 'caves', 'formed']\t['partly', 'submerged', 'glacier', 'cave', 'on', 'perito', 'moreno', 'glacier']\t0.75623834\n",
+      "2018-07-06 00:46:33,801 : INFO : ['how', 'are', 'glacier', 'caves', 'formed']\t['glacier', 'cave', 'is', 'cave', 'formed', 'within', 'the', 'ice', 'of', 'glacier']\t0.88229656\n",
+      "2018-07-06 00:46:33,802 : INFO : ['what', 'is', 'aws']\t['aws', 'stands', 'for', 'amazon', 'web', 'services']\t0.5922452\n",
+      "2018-07-06 00:46:33,802 : INFO : ['what', 'is', 'aws']\t['aws', 'was', 'established', 'in']\t0.581025\n",
+      "2018-07-06 00:46:33,803 : INFO : ['what', 'is', 'aws']\t['it', 'is', 'cloud', 'service']\t0.65737\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([[0.75623834],\n",
+       "       [0.88229656],\n",
+       "       [0.5922452 ],\n",
+       "       [0.581025  ],\n",
+       "       [0.65737   ]], dtype=float32)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "drmm_tks_model.predict(queries, docs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As can be seen from the logs and results above, within each query-document group, the correct answer has the highest score\n",
+    "\n",
+    "For example,\n",
+    "In the first group\n",
+    "```\n",
+    "['how', 'are', 'glacier', 'caves', 'formed'] ['partly', 'submerged', 'glacier', 'cave', 'on', 'perito', 'moreno', 'glacier']\t0.7\n",
+    "['how', 'are', 'glacier', 'caves', 'formed'] ['glacier', 'cave', 'is', 'cave', 'formed', 'within', 'the', 'ice', 'of', 'glacier']\t0.8\n",
+    "```\n",
+    "\n",
+    "The correct answer, \"glacier cave is cave ...\" has the higher score as compared to the first answer\n",
+    "The same can be seen for the second part"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Testing on a test set\n",
+    "We can pass a whole dataset and get evaluations based on that. Let's try with the test set of WikiQA Corpus"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "q_test_iterable = MyWikiIterable('query', os.path.join( 'experimental_data', 'WikiQACorpus', 'WikiQA-test.tsv'))\n",
+    "d_test_iterable = MyWikiIterable('doc', os.path.join('experimental_data', 'WikiQACorpus', 'WikiQA-test.tsv'))\n",
+    "l_test_iterable = MyWikiIterable('label', os.path.join('experimental_data', 'WikiQACorpus', 'WikiQA-test.tsv'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2018-07-06 00:48:00,129 : INFO : Found 21 unknown words. Set them to unknown word index : 400643\n",
+      "2018-07-06 00:48:00,202 : INFO : Found 253 unknown words. Set them to unknown word index : 400643\n",
+      "2018-07-06 00:48:09,461 : INFO : MAP: 0.60\n",
+      "2018-07-06 00:48:09,523 : INFO : nDCG@1 : 0.47\n",
+      "2018-07-06 00:48:09,541 : INFO : nDCG@3 : 0.60\n",
+      "2018-07-06 00:48:09,567 : INFO : nDCG@5 : 0.66\n",
+      "2018-07-06 00:48:09,591 : INFO : nDCG@10 : 0.70\n",
+      "2018-07-06 00:48:09,607 : INFO : nDCG@20 : 0.71\n"
+     ]
+    }
+   ],
+   "source": [
+    "drmm_tks_model.evaluate(q_test_iterable, d_test_iterable, l_test_iterable)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Comparing DRMM TKS with other models\n",
+    "\n",
+    "It would be good to get an idea of how our model works against some unsupervised models like word2vec and FastText.\n",
+    "For this, we will, given a query-document pair, we will get a vector for the query and document. We can get the similarity between them using the cosine similarity between their vectors.\n",
+    "\n",
+    "### For word2vec\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cosine_similarity(vec1, vec2):\n",
+    "    return np.dot(vec1, vec2)/(np.linalg.norm(vec1)* np.linalg.norm(vec2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from gensim.models.experimental import mapk, mean_ndcg\n",
+    "\n",
+    "def eval_model(queries, docs, labels, model):\n",
+    "    long_doc_list = []\n",
+    "    long_label_list = []\n",
+    "    long_query_list = []\n",
+    "    doc_lens = []\n",
+    "\n",
+    "    def sent2vec(sentence):\n",
+    "        vec = np.zeros((model.vector_size))\n",
+    "        for word in sentence:\n",
+    "            if word in model:\n",
+    "                vec += model[word]\n",
+    "        return vec/len(sentence)\n",
+    "    \n",
+    "    for query, doc, label in zip(queries, docs, labels):\n",
+    "        i = 0\n",
+    "        for d, l in zip(doc, label):\n",
+    "            if len(d) == 0 or len(query) == 0:\n",
+    "                print(\"skipping query-doc pair due to no words in vocab\")\n",
+    "                continue\n",
+    "            long_query_list.append(sent2vec(query))\n",
+    "            long_doc_list.append(sent2vec(d))\n",
+    "            long_label_list.append(l)\n",
+    "            i += 1\n",
+    "        doc_lens.append(len(doc))\n",
+    "\n",
+    "    doc_lens = np.array(doc_lens)\n",
+    "\n",
+    "    predictions = []\n",
+    "    for q, d in zip(long_query_list, long_doc_list):\n",
+    "        predictions.append(cosine_similarity(q, d))\n",
+    "\n",
+    "    Y_pred = []\n",
+    "    Y_true = []\n",
+    "    offset = 0\n",
+    "\n",
+    "    for doc_size in doc_lens:\n",
+    "        Y_pred.append(predictions[offset: offset + doc_size])\n",
+    "        Y_true.append(long_label_list[offset: offset + doc_size])\n",
+    "        offset += doc_size\n",
+    "        \n",
+    "    print(\"MAP: %.2f\"% mapk(Y_true, Y_pred))\n",
+    "    for k in [1, 3, 5, 10, 20]:\n",
+    "        print(\"nDCG@%d : %.2f \" % (k, mean_ndcg(Y_true, Y_pred, k=k)))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "skipping query-doc pair due to no words in vocab\n",
+      "skipping query-doc pair due to no words in vocab\n",
+      "MAP: 0.58\n",
+      "nDCG@1 : 0.43 \n",
+      "nDCG@3 : 0.60 \n",
+      "nDCG@5 : 0.66 \n",
+      "nDCG@10 : 0.70 \n",
+      "nDCG@20 : 0.71 \n"
+     ]
+    }
+   ],
+   "source": [
+    "eval_model(q_test_iterable, d_test_iterable, l_test_iterable, kv_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's compare that with our model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2018-07-06 00:49:11,315 : INFO : Found 21 unknown words. Set them to unknown word index : 400643\n",
+      "2018-07-06 00:49:11,379 : INFO : Found 253 unknown words. Set them to unknown word index : 400643\n",
+      "2018-07-06 00:49:21,218 : INFO : MAP: 0.60\n",
+      "2018-07-06 00:49:21,229 : INFO : nDCG@1 : 0.47\n",
+      "2018-07-06 00:49:21,246 : INFO : nDCG@3 : 0.60\n",
+      "2018-07-06 00:49:21,263 : INFO : nDCG@5 : 0.66\n",
+      "2018-07-06 00:49:21,274 : INFO : nDCG@10 : 0.70\n",
+      "2018-07-06 00:49:21,286 : INFO : nDCG@20 : 0.71\n"
+     ]
+    }
+   ],
+   "source": [
+    "drmm_tks_model.evaluate(q_test_iterable, d_test_iterable, l_test_iterable)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "While the accuracy isn't any better, it is worse, this is still a Work In Progress and we hope to improve it further soon."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Saving and loading the model\n",
+    "The trained model can be saved and loaded from memory for future use."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2018-07-06 00:44:16,527 : INFO : saving DRMM_TKS object under drmm_tks_model, separately None\n",
+      "2018-07-06 00:44:16,529 : INFO : storing np array 'vectors' to drmm_tks_model.word_embedding.vectors.npy\n",
+      "2018-07-06 00:45:09,654 : INFO : storing np array 'embedding_matrix' to drmm_tks_model.embedding_matrix.npy\n",
+      "2018-07-06 00:45:18,682 : INFO : not storing attribute model\n",
+      "2018-07-06 00:45:18,684 : INFO : not storing attribute _get_pair_list\n",
+      "2018-07-06 00:45:18,685 : INFO : not storing attribute _get_full_batch_iter\n",
+      "2018-07-06 00:45:18,687 : INFO : not storing attribute queries\n",
+      "2018-07-06 00:45:18,688 : INFO : not storing attribute docs\n",
+      "2018-07-06 00:45:18,690 : INFO : not storing attribute labels\n",
+      "2018-07-06 00:45:18,691 : INFO : not storing attribute pair_list\n",
+      "2018-07-06 00:45:36,062 : INFO : saved drmm_tks_model\n"
+     ]
+    }
+   ],
+   "source": [
+    "drmm_tks_model.save('drmm_tks_model')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "del drmm_tks_model\n",
+    "drmm_tks_model = DRMM_TKS.load('drmm_tks_model')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/gensim/models/experimental/__init__.py b/gensim/models/experimental/__init__.py
new file mode 100644
index 0000000000..d9459d307a
--- /dev/null
+++ b/gensim/models/experimental/__init__.py
@@ -0,0 +1,7 @@
+"""This package will host some experimental modules for Similarity Learning"""
+
+from .drmm_tks import DRMM_TKS  # noqa:F401
+from .custom_losses import rank_hinge_loss  # noqa:F401
+from .custom_layers import TopKLayer  # noqa:F401
+from .custom_callbacks import ValidationCallback  # noqa:F401
+from .evaluation_metrics import mean_ndcg, mapk  # noqa:F401
diff --git a/gensim/models/experimental/custom_callbacks.py b/gensim/models/experimental/custom_callbacks.py
new file mode 100644
index 0000000000..737fa21f02
--- /dev/null
+++ b/gensim/models/experimental/custom_callbacks.py
@@ -0,0 +1,72 @@
+import logging
+try:
+    from keras.callbacks import Callback
+    KERAS_AVAILABLE = True
+except ImportError:
+    KERAS_AVAILABLE = False
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+    format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+
+
+class ValidationCallback(Callback):
+    """Callback for providing validation metrics on the model trained so far"""
+    def __init__(self, test_data):
+        """
+        Parameters
+        ----------
+        test_data : dict
+            A dictionary which holds the validation data. It consists of the following keys:
+                - "X1" : numpy array
+                    The queries as a numpy array of shape (n_samples, text_maxlen)
+                - "X2" : numpy array
+                    The candidate docs as a numpy array of shape (n_samples, text_maxlen)
+                - "y" : list of int
+                    It is the labels for each of the query-doc pairs as a 1 or 0 with shape (n_samples,)
+                    where 1 : doc is relevant to query, 0 : doc is not relevant to query
+                - "doc_lengths" : list of int
+                    It contains the length of each document group. I.e., the number of queries
+                    which represent one topic. It is needed for calculating the metrics.
+
+        """
+
+        if not KERAS_AVAILABLE:
+            raise ImportError("Please install Keras to use this class")
+
+        # Check if all test_data is a dicitonary with all the right keys
+        try:
+            # If an empty dict is passed
+            if len(test_data.keys()) == 0:
+                raise ValueError(
+                      "test_data dictionary is empty. It doesn't have the keys: 'X1', 'X2', 'y', 'doc_lengths'"
+                    )
+            for key in test_data.keys():
+                if key not in ['X1', 'X2', 'y', 'doc_lengths']:
+                    raise ValueError("test_data dictionary doesn't have the  keys: 'X1', 'X2', 'y', 'doc_lengths'")
+        except AttributeError:
+            raise ValueError("test_data must be a dictionary with the keys: 'X1', 'X2', 'y', 'doc_lengths'")
+        self.test_data = test_data
+
+    def on_epoch_end(self, epoch, logs={}):
+        # Import has to be here to prevent cyclic import
+        from evaluation_metrics import mapk, mean_ndcg
+        X1 = self.test_data["X1"]
+        X2 = self.test_data["X2"]
+        y = self.test_data["y"]
+        doc_lengths = self.test_data["doc_lengths"]
+
+        predictions = self.model.predict(x={"query": X1, "doc": X2})
+
+        Y_pred = []
+        Y_true = []
+        offset = 0
+
+        for doc_size in doc_lengths:
+            Y_pred.append(predictions[offset: offset + doc_size])
+            Y_true.append(y[offset: offset + doc_size])
+            offset += doc_size
+
+        logger.info("MAP: %.2f", mapk(Y_true, Y_pred))
+        for k in [1, 3, 5, 10, 20]:
+            logger.info("nDCG@%d : %.2f", k, mean_ndcg(Y_true, Y_pred, k=k))
diff --git a/gensim/models/experimental/custom_layers.py b/gensim/models/experimental/custom_layers.py
new file mode 100644
index 0000000000..d1959e4ba2
--- /dev/null
+++ b/gensim/models/experimental/custom_layers.py
@@ -0,0 +1,42 @@
+try:
+    from keras.engine.topology import Layer
+    import keras.backend as K
+    KERAS_AVAILABLE = True
+except ImportError:
+    KERAS_AVAILABLE = False
+
+"""Script where all the custom keras layers are kept."""
+
+
+class TopKLayer(Layer):
+    """Layer to get top k values from the interaction matrix in drmm_tks model"""
+    def __init__(self, output_dim, topk, **kwargs):
+        """
+
+        Parameters
+        ----------
+        output_dim : tuple of int
+            The dimension of the tensor after going through this layer.
+        topk : int
+            The k topmost values to be returned.
+        """
+        self.output_dim = output_dim
+        self.topk = topk
+        super(TopKLayer, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        super(TopKLayer, self).build(input_shape)
+
+    def call(self, x):
+        return K.tf.nn.top_k(x, k=self.topk, sorted=True)[0]
+
+    def compute_output_shape(self, input_shape):
+        return (input_shape[0], self.output_dim[0], self.topk)
+
+    def get_config(self):
+        config = {
+            'topk': self.topk,
+            'output_dim': self.output_dim
+        }
+        base_config = super(TopKLayer, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
diff --git a/gensim/models/experimental/custom_losses.py b/gensim/models/experimental/custom_losses.py
new file mode 100644
index 0000000000..1ad037b62c
--- /dev/null
+++ b/gensim/models/experimental/custom_losses.py
@@ -0,0 +1,29 @@
+try:
+    from keras import backend as K
+    from keras.layers import Lambda
+    KERAS_AVAILABLE = True
+except ImportError:
+    KERAS_AVAILABLE = False
+
+"""Script where all the custom loss functions will be defined"""
+
+
+def rank_hinge_loss(y_true, y_pred):
+    """Loss function for Ranking Similarity Learning tasks
+    More details here : https://en.wikipedia.org/wiki/Hinge_loss
+
+    Parameters
+    ----------
+    y_true : list of list of int
+        The true relation between a query and a doc
+        It can be either 1 : relevant or 0 : not relevant
+    y_pred : list of list of float
+        The predicted relation between a query and a doc
+    """
+    if not KERAS_AVAILABLE:
+        raise ImportError("Please install Keras to use this function")
+    margin = 1
+    y_pos = Lambda(lambda a: a[::2, :], output_shape=(1,))(y_pred)
+    y_neg = Lambda(lambda a: a[1::2, :], output_shape=(1,))(y_pred)
+    loss = K.maximum(0., margin + y_neg - y_pos)
+    return K.mean(loss)
diff --git a/gensim/models/experimental/drmm_tks.py b/gensim/models/experimental/drmm_tks.py
new file mode 100644
index 0000000000..894dcaa319
--- /dev/null
+++ b/gensim/models/experimental/drmm_tks.py
@@ -0,0 +1,843 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Author: Aneesh Joshi <aneeshyjoshi@gmail.com>
+# Copyright (C) 2018 RaRe Technologies s.r.o.
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
+
+"""This module makes a trainable and usable model for getting similarity between documents using the DRMM_TKS model.
+
+Once the model is trained with the query-candidate-relevance data, the model can provide a vector for each new
+document which is entered into it. The similarity between any 2 documents can then be measured using the
+cosine similarty between the vectors.
+
+Abbreviations
+=============
+- DRMM : Deep Relevance Matching Model
+- TKS : Top K Solutions
+
+About DRMM_TKS
+==============
+This is a variant version of DRMM, which applied topk pooling in the matching matrix.
+It has the following steps:
+
+1. embed queries and docs into embedding vector named `q_embed` and `d_embed` respectively.
+2. computing `q_embed` and `d_embed` with element-wise multiplication.
+3. computing output of upper layer with dense layer operation.
+4. take softmax operation on the output of this layer named `g` and find the k largest entries named `mm_k`.
+5. input `mm_k` into hidden layers, with specified length of layers and activation function.
+6. compute `g` and `mm_k` with element-wise multiplication.
+
+On predicting, the model returns the score list between queries and documents.
+
+The trained model needs to be trained on data in the format:
+
+>>> from gensim.models.experimental import DRMM_TKS
+>>> import gensim.downloader as api
+>>> queries = ["When was World War 1 fought ?".lower().split(), "When was Gandhi born ?".lower().split()]
+>>> docs = [["The world war was bad".lower().split(), "It was fought in 1996".lower().split()], ["Gandhi was born in"
+...        "the 18th century".lower().split(), "He fought for the Indian freedom movement".lower().split(),
+...        "Gandhi was assasinated".lower().split()]]
+>>> labels = [[0, 1], [1, 0, 0]]
+>>> word_embeddings_kv = api.load('glove-wiki-gigaword-50')
+>>> model = DRMM_TKS(queries, docs, labels, word_embedding=word_embeddings_kv, verbose=0)
+
+Persist a model to disk with :
+
+>>> from gensim.test.utils import get_tmpfile
+>>> file_path = get_tmpfile('DRMM_TKS.model')
+>>> model.save(file_path)
+>>> model = DRMM_TKS.load(file_path)
+
+You can also create the modela and train it later :
+
+>>> model = DRMM_TKS()
+>>> model.train(queries, docs, labels, word_embeddings_kv, epochs=12, verbose=0)
+
+Testing on new data :
+
+>>> from gensim.test.utils import datapath
+>>> model = DRMM_TKS.load(datapath('drmm_tks'))
+>>>
+>>> queries = ["how are glacier caves formed ?".lower().split()]
+>>> docs = [["A partly submerged glacier cave on Perito Moreno Glacier".lower().split(), "glacier cave is cave formed"
+...        " within the ice of glacier".lower().split()]]
+>>> print(model.predict(queries, docs))
+[[0.9915068 ]
+ [0.99228466]]
+>>> print(model.predict([["hello", "world"]], [[["i", "am", "happy"], ["good", "morning"]]]))
+[[0.9975487]
+ [0.999115 ]]
+
+
+More information can be found in:
+`Jiafeng Guo, Yixing Fan, Qingyao Ai, W. Bruce Croft "A Deep Relevance Matching Model for Ad-hoc Retrieval"
+<http://www.bigdatalab.ac.cn/~gjf/papers/2016/CIKM2016a_guo.pdf>`_
+`MatchZoo Repository <https://github.com/faneshion/MatchZoo>`_
+`Similarity Learning Wikipedia Page <https://en.wikipedia.org/wiki/Similarity_learning>`_
+
+"""
+
+import logging
+import numpy as np
+import hashlib
+from numpy import random as np_random
+from gensim.models import KeyedVectors
+from collections import Counter
+from gensim.models.experimental.custom_losses import rank_hinge_loss
+from gensim.models.experimental.custom_layers import TopKLayer
+from gensim.models.experimental.custom_callbacks import ValidationCallback
+from gensim.models.experimental.evaluation_metrics import mapk, mean_ndcg
+from sklearn.preprocessing import normalize
+from gensim import utils
+from collections import Iterable
+
+try:
+    import keras.backend as K
+    from keras import optimizers
+    from keras.models import load_model
+    from keras.losses import hinge
+    from keras.models import Model
+    from keras.layers import Input, Embedding, Dot, Dense, Reshape, Dropout
+    KERAS_AVAILABLE = True
+except ImportError:
+    KERAS_AVAILABLE = False
+
+logger = logging.getLogger(__name__)
+
+
+def _get_full_batch_iter(pair_list, batch_size):
+    """Provides all the data points int the format: X1, X2, y with
+    alternate positive and negative examples of `batch_size` in a streamable format.
+
+    Parameters
+    ----------
+    pair_list : iterable list of tuple
+                See docstring for _get_pair_list for more details
+    batch_size : int
+        half the size in which the generator will yield datapoints. The size is doubled since
+        we include positive and negative examples.
+
+    Yields
+    -------
+    X1 : numpy array of shape (batch_size * 2, text_maxlen)
+        the queries
+    X2 : numpy array of shape (batch_size * 2, text_maxlen)
+        the docs
+    y : numpy array with {0, 1} of shape (batch_size * 2, 1)
+        The relation between X1[i] and X2[j]
+        1 : X2[i] is relevant to X1[i]
+        0 : X2[i] is not relevant to X1[i]
+    """
+
+    X1, X2, y = [], [], []
+    while True:
+        for i, (query, pos_doc, neg_doc) in enumerate(pair_list):
+            X1.append(query)
+            X2.append(pos_doc)
+            y.append(1)
+            X1.append(query)
+            X2.append(neg_doc)
+            y.append(0)
+            if i % batch_size == 0 and i != 0:
+                yield ({'query': np.array(X1), 'doc': np.array(X2)}, np.array(y))
+                X1, X2, y = [], [], []
+
+
+def _get_pair_list(queries, docs, labels, _make_indexed, is_iterable):
+    """Yields a tuple with query document pairs in the format
+    (query, positive_doc, negative_doc)
+    [(q1, d+, d-), (q2, d+, d-), (q3, d+, d-), ..., (qn, d+, d-)]
+        where each query or document is a list of int
+
+    Parameters
+    ----------
+    queries : iterable list of list of str
+        The queries to the model
+    docs : iterable list of list of list of str
+        The candidate documents for each query
+    labels : iterable list of int
+        The relevance of the document to the query. 1 = relevant, 0 = not relevant
+    _make_indexed : function
+        Translates the given sentence as a list of list of str into a list of list of int
+        based on the model's internal dictionary
+    is_iterable : bool
+        Whether the input data is streamable
+
+    Example
+    -------
+    [(['When', 'was', 'Abraham', 'Lincoln', 'born', '?'],
+      ['He', 'was', 'born', 'in', '1809'],
+      ['Abraham', 'Lincoln', 'was', 'the', 'president',
+       'of', 'the', 'United', 'States', 'of', 'America']),
+
+     (['When', 'was', 'the', 'first', 'World', 'War', '?'],
+      ['It', 'was', 'fought', 'in', '1914'],
+      ['There', 'were', 'over', 'a', 'million', 'deaths']),
+
+     (['When', 'was', 'the', 'first', 'World', 'War', '?'],
+      ['It', 'was', 'fought', 'in', '1914'],
+      ['The', 'first', 'world', 'war', 'was', 'bad'])
+    ]
+
+    """
+    if is_iterable:
+        while True:
+            j=0
+            for q, doc, label in zip(queries, docs, labels):
+                doc, label = (list(t) for t in zip(*sorted(zip(doc, label), reverse=True)))
+                for item in zip(doc, label):
+                    if item[1] == 1:
+                        for new_item in zip(doc, label):
+                            if new_item[1] == 0:
+                                j+=1
+                                yield(_make_indexed(q), _make_indexed(item[0]), _make_indexed(new_item[0]))
+    else:
+        for q, doc, label in zip(queries, docs, labels):
+            doc, label = (list(t) for t in zip(*sorted(zip(doc, label), reverse=True)))
+            for item in zip(doc, label):
+                if item[1] == 1:
+                    for new_item in zip(doc, label):
+                        if new_item[1] == 0:
+                            yield(_make_indexed(q), _make_indexed(item[0]), _make_indexed(new_item[0]))
+
+
+class DRMM_TKS(utils.SaveLoad):
+    """Model for training a Similarity Learning Model using the DRMM TKS model.
+    You only have to provide sentences in the data as a list of words.
+    """
+
+    def __init__(self, queries=None, docs=None, labels=None, word_embedding=None,
+                 text_maxlen=200, normalize_embeddings=True, epochs=10, unk_handle_method='random',
+                 validation_data=None, topk=50, target_mode='ranking', verbose=1):
+        """Initializes the model and trains it
+
+        Parameters
+        ----------
+        queries: iterable list of list of string words, optional
+            The questions for the similarity learning model.
+        docs: iterable list of list of list of string words, optional
+            The candidate answers for the similarity learning model.
+        labels: iterable list of list of int, optional
+            Indicates when a candidate document is relevant to a query
+            - 1 : relevant
+            - 0 : irrelevant
+        word_embedding : :class:`~gensim.models.keyedvectors.KeyedVectors`, optional
+            a KeyedVector object which has the embeddings pre-loaded.
+            If None, random word embeddings will be used.
+        text_maxlen : int, optional
+            The maximum possible length of a query or a document.
+            This is used for padding sentences.
+        normalize_embeddings : bool, optional
+            Whether the word embeddings provided should be normalized.
+        epochs : int, optional
+            The number of epochs for which the model should train on the data.
+        unk_handle_method : {'zero', 'random'}, optional
+            The method for handling unkown words.
+                - 'zero' : unknown words are given a zero vector
+                - 'random' : unknown words are given a uniformly random vector bassed on the word string hash
+        validation_data: list of the form [test_queries, test_docs, test_labels], optional
+            where test_queries, test_docs  and test_labels are of the same form as
+            their counter parts stated above.
+        topk : int, optional
+            the k topmost values in the interaction matrix between the queries and the docs
+        target_mode : {'ranking', 'classification'}, optional
+            the way the model should be trained, either to rank or classify
+        verbose : {0, 1, 2}
+            the level of information shared while training
+                - 0 : silent
+                - 1 : progress bar
+                - 2 : one line per epoch
+
+
+        Examples
+        --------
+        The trained model needs to be trained on data in the format
+
+        >>> queries = ["When was World War 1 fought ?".lower().split(), "When was Gandhi born ?".lower().split()]
+        >>> docs = [["The world war was bad".lower().split(), "It was fought in 1996".lower().split()], ["Gandhi was"
+        ...    "born in the 18th century".lower().split(), "He fought for the Indian freedom movement".lower().split(),
+        ...    "Gandhi was assasinated".lower().split()]]
+        >>> labels = [[0, 1], [1, 0, 0]]
+        >>> import gensim.downloader as api
+        >>> word_embeddings_kv = api.load('glove-wiki-gigaword-50')
+        >>> model = DRMM_TKS(queries, docs, labels, word_embedding=word_embeddings_kv, verbose=0)
+        """
+        self.queries = queries
+        self.docs = docs
+        self.labels = labels
+        self.word_counter = Counter()
+        self.text_maxlen = text_maxlen
+        self.topk = topk
+        self.word_embedding = word_embedding
+        self.word2index, self.index2word = {}, {}
+        self.normalize_embeddings = normalize_embeddings
+        self.model = None
+        self.epochs = epochs
+        self.validation_data = validation_data
+        self.target_mode = target_mode
+        self.verbose = verbose
+        self.first_train = True  # Whether the model has been trained before
+        self.needs_vocab_build = True
+
+        # These functions have been defined outside the class and set as attributes here
+        # so that they can be ignored when saving the model to file
+        self._get_pair_list = _get_pair_list
+        self._get_full_batch_iter = _get_full_batch_iter
+
+        if self.target_mode not in ['ranking', 'classification']:
+            raise ValueError(
+                "Unkown target_mode %s. It must be either 'ranking' or 'classification'" % self.target_mode
+            )
+
+        if unk_handle_method not in ['random', 'zero']:
+            raise ValueError("Unkown token handling method %s" % str(unk_handle_method))
+        self.unk_handle_method = unk_handle_method
+
+        if self.queries is not None and self.docs is not None and self.labels is not None:
+            self.build_vocab(self.queries, self.docs, self.labels, self.word_embedding)
+            self.train(self.queries, self.docs, self.labels, self.word_embedding,
+                       self.text_maxlen, self.normalize_embeddings, self.epochs, self.unk_handle_method,
+                       self.validation_data, self.topk, self.target_mode, self.verbose)
+
+    def build_vocab(self, queries, docs, labels, word_embedding):
+        """Indexes all the words and makes an embedding_matrix which
+        can be fed directly into an Embedding layer
+        """
+
+        logger.info("Starting Vocab Build")
+
+        # get all the vocab words
+        for q in self.queries:
+            self.word_counter.update(q)
+        for doc in self.docs:
+            for d in doc:
+                self.word_counter.update(d)
+        for i, word in enumerate(self.word_counter.keys()):
+            self.word2index[word] = i
+            self.index2word[i] = word
+
+        self.vocab_size = len(self.word2index)
+        logger.info("Vocab Build Complete")
+        logger.info("Vocab Size is %d", self.vocab_size)
+
+        logger.info("Building embedding index using KeyedVector pretrained word embeddings")
+        if type(self.word_embedding) == KeyedVectors:
+            kv_model = self.word_embedding
+            embedding_vocab_size, self.embedding_dim = len(kv_model.vocab), kv_model.vector_size
+        else:
+            raise ValueError(
+                    "Unknown value of word_embedding : %s. Must be either a KeyedVector object",
+                    str(word_embedding)
+                )
+
+        logger.info(
+            "The embeddings_index built from the given file has %d words of %d dimensions",
+            embedding_vocab_size, self.embedding_dim
+        )
+
+        logger.info("Building the Embedding Matrix for the model's Embedding Layer")
+
+        # Initialize the embedding matrix
+        # UNK word gets the vector based on the method
+        if self.unk_handle_method == 'random':
+            self.embedding_matrix = np.random.uniform(-0.2, 0.2, (self.vocab_size, self.embedding_dim))
+        elif self.unk_handle_method == 'zero':
+            self.embedding_matrix = np.zeros((self.vocab_size, self.embedding_dim))
+
+        n_non_embedding_words = 0
+        for word, i in self.word2index.items():
+            if word in kv_model:
+                # words not found in keyed vectors will get the vector based on unk_handle_method
+                self.embedding_matrix[i] = kv_model[word]
+            else:
+                if self.unk_handle_method == 'random':
+                    # Creates the same random vector for the given string each time
+                    self.embedding_matrix[i] = self._seeded_vector(word, self.embedding_dim)
+                n_non_embedding_words += 1
+        logger.info(
+            "There are %d words out of %d (%.2f%%) not in the embeddings. Setting them to %s", n_non_embedding_words,
+            self.vocab_size, n_non_embedding_words * 100 / self.vocab_size, self.unk_handle_method
+        )
+
+        # Include embeddings for words in embedding file but not in the train vocab
+        # It will be useful for embedding words encountered in validation and test set
+        logger.info(
+            "Adding additional words from the embedding file to embedding matrix"
+        )
+
+        # The point where vocab words end
+        vocab_offset = self.vocab_size
+        extra_embeddings = []
+        # Take the words in the embedding file which aren't there int the train vocab
+        for word in list(kv_model.vocab):
+            if word not in self.word2index:
+                # Add the new word's vector and index it
+                extra_embeddings.append(kv_model[word])
+                # We also need to keep an additional indexing of these
+                # words
+                self.word2index[word] = vocab_offset
+                vocab_offset += 1
+
+        # Set the pad and unk word to second last and last index
+        self.pad_word_index = vocab_offset
+        self.unk_word_index = vocab_offset + 1
+
+        if self.unk_handle_method == 'random':
+            unk_embedding_row = np.random.uniform(-0.2, 0.2, (1, self.embedding_dim))
+        elif self.unk_handle_method == 'zero':
+            unk_embedding_row = np.zeros((1, self.embedding_dim))
+
+        pad_embedding_row = np.random.uniform(-0.2,
+                                              0.2, (1, self.embedding_dim))
+
+        if len(extra_embeddings) > 0:
+            self.embedding_matrix = np.vstack(
+                [self.embedding_matrix, np.array(extra_embeddings),
+                 pad_embedding_row, unk_embedding_row]
+            )
+        else:
+            self.embedding_matrix = np.vstack(
+                [self.embedding_matrix, pad_embedding_row, unk_embedding_row]
+            )
+
+        if self.normalize_embeddings:
+            logger.info("Normalizing the word embeddings")
+            self.embedding_matrix = normalize(self.embedding_matrix)
+
+        logger.info("Embedding Matrix build complete. It now has shape %s", str(self.embedding_matrix.shape))
+        logger.info("Pad word has been set to index %d", self.pad_word_index)
+        logger.info("Unknown word has been set to index %d", self.unk_word_index)
+        logger.info("Embedding index build complete")
+        self.needs_vocab_build = False
+
+    def _string2numeric_hash(self, text):
+        "Gets a numeric hash for a given string"
+        return int(hashlib.md5(text.encode()).hexdigest()[:8], 16)
+
+    def _seeded_vector(self, seed_string, vector_size):
+        """Create one 'random' vector (but deterministic by seed_string)"""
+        # Note: built-in hash() may vary by Python version or even (in Py3.x) per launch
+        once = np_random.RandomState(self._string2numeric_hash(seed_string) & 0xffffffff)
+        return (once.rand(vector_size) - 0.5) / vector_size
+
+    def _make_indexed(self, sentence):
+        """Gets the indexed version of the sentence based on the self.word2index dict
+        in the form of a list
+
+        This function should never encounter any OOV words since it only indexes
+        in vocab words
+
+        Parameters
+        ----------
+        sentence : iterable list of list of str
+            The sentence to be indexed
+
+        Raises
+        ------
+        ValueError : If the sentence has a lenght more than text_maxlen
+        """
+
+        indexed_sent = []
+        for word in sentence:
+            indexed_sent.append(self.word2index[word])
+
+        if len(indexed_sent) > self.text_maxlen:
+            raise ValueError(
+                "text_maxlen: %d isn't big enough. Error at sentence of length %d."
+                "Sentence is %s" % (self.text_maxlen, len(sentence), sentence)
+            )
+        indexed_sent = indexed_sent + [self.pad_word_index] * (self.text_maxlen - len(indexed_sent))
+        return indexed_sent
+
+    def _get_full_batch(self):
+        """Provides all the data points int the format: X1, X2, y with
+        alternate positive and negative examples
+
+        Returns
+        -------
+        X1 : numpy array of shape (num_samples, text_maxlen)
+            the queries
+        X2 : numpy array of shape (num_samples, text_maxlen)
+            the docs
+        y : numpy array with {0, 1} of shape (num_samples,)
+            The relation between X1[i] and X2[j]
+            1 : X2[i] is relevant to X1[i]
+            0 : X2[i] is not relevant to X1[i]
+        """
+        X1, X2, y = [], [], []
+        for i, (query, pos_doc, neg_doc) in enumerate(self.pair_list):
+            X1.append(query)
+            X2.append(pos_doc)
+            y.append(1)
+            X1.append(query)
+            X2.append(neg_doc)
+            y.append(0)
+        return np.array(X1), np.array(X2), np.array(y)
+
+    def train(self, queries, docs, labels, word_embedding=None,
+              text_maxlen=200, normalize_embeddings=True, epochs=10, unk_handle_method='zero',
+              validation_data=None, topk=20, target_mode='ranking', verbose=1, batch_size=5, steps_per_epoch=900):
+        """Trains a DRMM_TKS model using specified parameters
+
+        This method is called from on model initialization if the data is provided.
+        It can also be trained in an online manner or after initialization
+        """
+
+        self.queries = queries or self.queries
+        self.docs = docs or self.docs
+        self.labels = labels or self.labels
+
+        # This won't change the embedding layer TODO
+        self.word_embedding = word_embedding or self.word_embedding
+        self.text_maxlen = text_maxlen or self.text_maxlen
+        self.normalize_embeddings = normalize_embeddings or self.normalize_embeddings
+        self.epochs = epochs or self.epochs
+        self.unk_handle_method = unk_handle_method or self.unk_handle_method
+        self.validation_data = validation_data or self.validation_data
+        self.topk = topk or self.topk
+        self.target_mode = target_mode or self.target_mode
+
+        if verbose != 0:  # Check needed since 0 or 2 will always give 2
+            self.verbose = verbose or self.verbose
+        else:
+            self.verbose = 0
+
+        if self.queries is None or self.docs is None or self.labels is None:
+            raise ValueError("queries, docs and labels have to be specified")
+        # We need to build these each time since any of the parameters can change from each train to trian
+        if self.needs_vocab_build:
+            self.build_vocab(self.queries, self.docs, self.labels, self.word_embedding)
+
+        is_iterable = False
+        if isinstance(self.queries, Iterable) and not isinstance(self.queries, list):
+            is_iterable = True
+            logger.info("Input is an iterable amd will be streamed")
+
+        self.pair_list = self._get_pair_list(self.queries, self.docs, self.labels, self._make_indexed, is_iterable)
+        if is_iterable:
+            train_generator = self._get_full_batch_iter(self.pair_list, 10)
+        else:
+            X1_train, X2_train, y_train = self._get_full_batch()
+
+        if self.first_train:
+            # The settings below should be set only once
+            self.model = self._get_keras_model()
+            optimizer = 'adam'
+            optimizer = 'adadelta'
+            optimizer = optimizers.get(optimizer)
+            learning_rate = 0.0001
+            learning_rate = 1
+            K.set_value(optimizer.lr, learning_rate)
+            # either one can be selected. Currently, the choice is manual.
+            loss = hinge
+            loss = 'mse'
+            loss = rank_hinge_loss
+            self.model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
+        else:
+            logger.info("Model will be retrained")
+
+        self.model.summary(print_fn=logger.info)
+
+        # Put the validation data in as a callback
+        val_callback = None
+        if self.validation_data is not None:
+            test_queries, test_docs, test_labels = self.validation_data
+
+            long_doc_list = []
+            long_label_list = []
+            long_query_list = []
+            doc_lens = []
+
+            for query, doc, label in zip(test_queries, test_docs, test_labels):
+                i = 0
+                for d, l in zip(doc, label):
+                    long_query_list.append(query)
+                    long_doc_list.append(d)
+                    long_label_list.append(l)
+                    i += 1
+                doc_lens.append(len(doc))
+
+            indexed_long_query_list = self._translate_user_data(long_query_list)
+            indexed_long_doc_list = self._translate_user_data(long_doc_list)
+
+            val_callback = ValidationCallback(
+                                {"X1": indexed_long_query_list, "X2": indexed_long_doc_list, "doc_lengths": doc_lens,
+                                "y": long_label_list}
+                            )
+            val_callback = [val_callback]  # since `model.fit` requires a list
+
+        # If train is called again, not all values should be reset
+        if self.first_train is True:
+            self.first_train = False
+
+        if is_iterable:
+            self.model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, callbacks=val_callback,
+                                    epochs=self.epochs, shuffle=False, )
+        else:
+            self.model.fit(x={"query": X1_train, "doc": X2_train}, y=y_train, batch_size=5,
+                           verbose=self.verbose, epochs=self.epochs, shuffle=False, callbacks=val_callback)
+
+    def _translate_user_data(self, data):
+        """Translates given user data into an indexed format which the model understands.
+        If a model is not in the vocabulary, it is assigned the `unk_word_index` which maps
+        to the unk vector decided by `unk_handle_method`
+
+        Parameters
+        ----------
+        data : list of list of string words
+            The data to be tranlsated
+
+        Examples
+        --------
+        >>> from gensim.test.utils import datapath
+        >>> model = DRMM_TKS.load(datapath('drmm_tks'))
+        >>>
+        >>> queries = ["When was World War 1 fought ?".split(), "When was Gandhi born ?".split()]
+        >>> print(model._translate_user_data(queries))
+        [[31  1 23 31  4  5  6 30 30 30]
+         [31  1 31  8  6 30 30 30 30 30]]
+        """
+        translated_data = []
+        n_skipped_words = 0
+        for sentence in data:
+            translated_sentence = []
+            for word in sentence:
+                if word in self.word2index:
+                    translated_sentence.append(self.word2index[word])
+                else:
+                    # If the key isn't there give it the zero word index
+                    translated_sentence.append(self.unk_word_index)
+                    n_skipped_words += 1
+            if len(sentence) > self.text_maxlen:
+                logger.info(
+                    "text_maxlen: %d isn't big enough. Error at sentence of length %d."
+                    "Sentence is %s", self.text_maxlen, len(sentence), str(sentence)
+                )
+            translated_sentence = translated_sentence + (self.text_maxlen - len(sentence)) * [self.pad_word_index]
+            translated_data.append(np.array(translated_sentence))
+
+        logger.info(
+            "Found %d unknown words. Set them to unknown word index : %d", n_skipped_words, self.unk_word_index
+        )
+        return np.array(translated_data)
+
+    def predict(self, queries, docs):
+        """Predcits the similarity between a query-document pair
+        based on the trained DRMM TKS model
+
+        Parameters
+        ----------
+        queries : list of list of str
+            The questions for the similarity learning model
+        docs : list of list of list of str
+            The candidate answers for the similarity learning model
+
+
+        Examples
+        --------
+        >>> from gensim.test.utils import datapath
+        >>> model = DRMM_TKS.load(datapath('drmm_tks'))
+        >>>
+        >>> queries = ["When was World War 1 fought ?".split(), "When was Gandhi born ?".split()]
+        >>> docs = [["The world war was bad".split(), "It was fought in 1996".split()], ["Gandhi was born in the 18th"
+        ...        " century".split(), "He fought for the Indian freedom movement".split(), "Gandhi was"
+        ...        " assasinated".split()]]
+        >>> print(model.predict(queries, docs))
+        [[0.9933108 ]
+         [0.9925415 ]
+         [0.9827911 ]
+         [0.99258184]
+         [0.9960481 ]]
+        """
+
+        long_query_list = []
+        long_doc_list = []
+        for query, doc in zip(queries, docs):
+            for d in doc:
+                long_query_list.append(query)
+                long_doc_list.append(d)
+
+        indexed_long_query_list = self._translate_user_data(long_query_list)
+        indexed_long_doc_list = self._translate_user_data(long_doc_list)
+
+        predictions = self.model.predict(x={'query': indexed_long_query_list, 'doc': indexed_long_doc_list})
+
+        logger.info("Predictions in the format query, doc, similarity")
+        for i, (q, d) in enumerate(zip(long_query_list, long_doc_list)):
+            logger.info("%s\t%s\t%s", str(q), str(d), str(predictions[i][0]))
+
+        return predictions
+
+    def evaluate(self, queries, docs, labels):
+        """Evaluates the model and provides the results in terms of metrics (MAP, nDCG)
+        This should ideally be called on the test set.
+
+        Parameters
+        ----------
+        queries : list of list of str
+            The questions for the similarity learning model
+        docs : list of list of list of str
+            The candidate answers for the similarity learning model
+        labels : list of list of int
+            The relevance of the document to the query. 1 = relevant, 0 = not relevant
+        """
+        long_doc_list = []
+        long_label_list = []
+        long_query_list = []
+        doc_lens = []
+        for query, doc, label in zip(queries, docs, labels):
+            i = 0
+            for d, l in zip(doc, label):
+                long_query_list.append(query)
+                long_doc_list.append(d)
+                long_label_list.append(l)
+                i += 1
+            doc_lens.append(len(doc))
+        indexed_long_query_list = self._translate_user_data(long_query_list)
+        indexed_long_doc_list = self._translate_user_data(long_doc_list)
+        predictions = self.model.predict(x={'query': indexed_long_query_list, 'doc': indexed_long_doc_list})
+        Y_pred = []
+        Y_true = []
+        offset = 0
+        for doc_size in doc_lens:
+            Y_pred.append(predictions[offset: offset + doc_size])
+            Y_true.append(long_label_list[offset: offset + doc_size])
+            offset += doc_size
+        logger.info("MAP: %.2f", mapk(Y_true, Y_pred))
+        for k in [1, 3, 5, 10, 20]:
+            logger.info("nDCG@%d : %.2f", k, mean_ndcg(Y_true, Y_pred, k=k))
+
+    def save(self, fname, *args, **kwargs):
+        """Save the model.
+        This saved model can be loaded again using :func:`~gensim.models.experimental.drmm_tks.DRMM_TKS.load`
+        The keras model shouldn't be serialized using pickle or cPickle. So, the non-keras
+        variables will be saved using gensim's SaveLoad and the keras model will be saved using
+        the keras save method with ".keras" prefix.
+
+        Also see :func:`~gensim.models.experimental.drmm_tks.DRMM_TKS.load`
+
+        Parameters
+        ----------
+        fname : str
+            Path to the file.
+
+        Examples
+        --------
+        >>> from gensim.test.utils import datapath, get_tmpfile
+        >>> model = DRMM_TKS.load(datapath('drmm_tks'))
+        >>> model_save_path = get_tmpfile('drmm_tks_model')
+        >>> model.save(model_save_path)
+        """
+        # don't save the keras model as it needs to be saved with a keras function
+        # Also, we can't save iterable properties. So, ignore them.
+        kwargs['ignore'] = kwargs.get(
+                            'ignore', ['model', '_get_pair_list', '_get_full_batch_iter',
+                                        'queries', 'docs', 'labels', 'pair_list'])
+        kwargs['fname_or_handle'] = fname
+        super(DRMM_TKS, self).save(*args, **kwargs)
+        self.model.save(fname + ".keras")
+
+    @classmethod
+    def load(cls, *args, **kwargs):
+        """Loads a previously saved `DRMM TKS` model. Also see `save()`.
+        Collects the gensim and the keras models and returns it as on gensim model.
+
+        Parameters
+        ----------
+        fname : str
+            Path to the saved file.
+
+        Returns
+        -------
+        :obj: `~gensim.models.experimental.DRMM_TKS`
+            Returns the loaded model as an instance of :class: `~gensim.models.experimental.DRMM_TKS`.
+
+
+        Examples
+        --------
+        >>> from gensim.test.utils import datapath, get_tmpfile
+        >>> model_file_path = datapath('drmm_tks')
+        >>> model = DRMM_TKS.load(model_file_path)
+        """
+        fname = args[0]
+        gensim_model = super(DRMM_TKS, cls).load(*args, **kwargs)
+        keras_model = load_model(
+            fname + '.keras', custom_objects={'TopKLayer': TopKLayer})
+        gensim_model.model = keras_model
+        gensim_model._get_pair_list = _get_pair_list
+        gensim_model._get_full_batch_iter = _get_full_batch_iter
+        return gensim_model
+
+    def _get_keras_model(self, embed_trainable=False, dropout_rate=0.5, hidden_sizes=[100, 1]):
+        """Builds and returns the keras class for drmm tks model
+
+        About DRMM_TKS
+        --------------
+        This is a variant version of DRMM, which applied topk pooling in the matching matrix.
+        It has the following steps:
+        1. embed queries into embedding vector named 'q_embed' and 'd_embed' respectively
+        2. computing 'q_embed' and 'd_embed' with element-wise multiplication
+        3. computing output of upper layer with dense layer operation
+        4. take softmax operation on the output of this layer named 'g' and find the k largest entries named 'mm_k'.
+        5. input 'mm_k' into hidden layers, with specified length of layers and activation function
+        6. compute 'g' and 'mm_k' with element-wise multiplication.
+
+        On predicting, the model returns the score list between queries and documents.
+
+        Parameters
+        ----------
+        embed_trainable : bool, optional
+            Whether the embeddings should be trained
+            if True, the embeddings are trianed
+        dropout_rate : float between 0 and 1, optional
+            The probability of making a neuron dead
+            Used for regularization.
+        hidden_sizes : list of int, optional
+            The list of hidden sizes for the fully connected layers connected to the matching matrix
+            Example :
+                hidden_sizes = [10, 20, 30]
+            will add 3 fully connected layers of 10, 20 and 30 hidden neurons
+
+        """
+
+        if not KERAS_AVAILABLE:
+            raise ImportError("Please install Keras to use this model")
+
+        n_layers = len(hidden_sizes)
+
+        query = Input(name='query', shape=(self.text_maxlen,))
+        doc = Input(name='doc', shape=(self.text_maxlen,))
+        embedding = Embedding(self.embedding_matrix.shape[0], self.embedding_dim,
+                              weights=[self.embedding_matrix], trainable=embed_trainable)
+
+        q_embed = embedding(query)
+        d_embed = embedding(doc)
+
+        mm = Dot(axes=[2, 2], normalize=True)([q_embed, d_embed])
+
+        # compute term gating
+        w_g = Dense(1, activation='softmax')(q_embed)
+        g = Reshape((self.text_maxlen,))(w_g)
+
+        mm_k = TopKLayer(topk=self.topk, output_dim=(
+            self.text_maxlen, self.embedding_dim))(mm)
+
+        for i in range(n_layers):
+            mm_k = Dense(hidden_sizes[i], activation='softplus', kernel_initializer='he_uniform',
+                         bias_initializer='zeros')(mm_k)
+
+        mm_k_dropout = Dropout(rate=dropout_rate)(mm_k)
+
+        mm_reshape = Reshape(
+            (self.text_maxlen,))(mm_k_dropout)
+
+        mean = Dot(axes=[1, 1])([mm_reshape, g])
+
+        if self.target_mode == 'classification':
+            out_ = Dense(2, activation='softmax')(mean)
+        elif self.target_mode in ['regression', 'ranking']:
+            out_ = Reshape((1,))(mean)
+
+        model = Model(inputs=[query, doc], outputs=out_)
+        return model
diff --git a/gensim/models/experimental/evaluation_metrics.py b/gensim/models/experimental/evaluation_metrics.py
new file mode 100644
index 0000000000..d55f1a09c9
--- /dev/null
+++ b/gensim/models/experimental/evaluation_metrics.py
@@ -0,0 +1,97 @@
+import numpy as np
+import logging
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+    format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO
+)
+
+
+def mapk(Y_true, Y_pred):
+    """Calculates Mean Average Precision(MAP) for a given set of Y_true, Y_pred
+
+    Note: Currently doesn't support mapping at k. Couldn't use only map as it's a
+    reserved word
+
+    Parameters
+    ----------
+    Y_true : numpy array or list of ints either 1 or 0
+        Contains the true, ground truth values of the relevance between a query and document
+    Y_pred : numpy array or list of floats
+        Contains the predicted similarity score between a query and document
+
+    Examples
+    --------
+    >>> Y_true = [[0, 1, 0, 1], [0, 0, 0, 0, 1, 0], [0, 1, 0]]
+    >>> Y_pred = [[0.1, 0.2, -0.01, 0.4], [0.12, -0.43, 0.2, 0.1, 0.99, 0.7], [0.5, 0.63, 0.92]]
+    >>> print(mapk(Y_true, Y_pred))
+    0.75
+    """
+    aps = []
+    n_skipped = 0
+    for y_true, y_pred in zip(Y_true, Y_pred):
+        # skip datapoints where there is no solution
+        if np.sum(y_true) < 1:
+            n_skipped += 1
+            continue
+
+        pred_sorted = sorted(zip(y_true, y_pred), key=lambda x: x[1], reverse=True)
+        avg = 0
+        n_relevant = 0
+
+        for i, val in enumerate(pred_sorted):
+            if val[0] == 1:
+                avg += 1. / (i + 1.)
+                n_relevant += 1
+
+        if n_relevant != 0:
+            ap = avg / n_relevant
+            aps.append(ap)
+    return np.mean(np.array(aps))
+
+
+def mean_ndcg(Y_true, Y_pred, k=10):
+    """Calculates the mean discounted normalized cumulative gain over all
+    the entries limited to the integer k
+
+    Parameters
+    ----------
+    Y_true : numpy array or list of ints either 1 or 0
+        Contains the true, ground truth values of the relevance between a query and document
+    Y_pred : numpy array or list of floats
+        Contains the predicted similarity score between a query and document
+
+
+    Examples
+    --------
+    >>> Y_true = [[0, 1, 0, 1], [0, 0, 0, 0, 1, 0], [0, 1, 0]]
+    >>> Y_pred = [[0.1, 0.2, -0.01, 0.4], [0.12, -0.43, 0.2, 0.1, 0.19, 0.7], [0.5, 0.63, 0.72]]
+    >>> for k in [1, 3, 5, 10]:
+    ...     print("nDCG@{} is {}".format(k, mean_ndcg(Y_true, Y_pred, k)))
+    nDCG@1 is 0.3333333333333333
+    nDCG@3 is 0.7103099178571526
+    nDCG@5 is 0.7103099178571526
+    nDCG@10 is 0.7103099178571526
+
+    """
+    ndcgs = []
+    n_skipped = 0
+    for y_true, y_pred in zip(Y_true, Y_pred):
+        if np.sum(y_true) < 1:
+            n_skipped += 1
+            continue
+        pred_sorted = sorted(zip(y_true, y_pred), key=lambda x: x[1], reverse=True)
+        true_sorted = sorted(zip(y_true, y_pred), key=lambda x: x[0], reverse=True)
+        pred_sorted = pred_sorted[:k]
+        true_sorted = true_sorted[:k]
+        dcg = 0
+        for i, val in enumerate(pred_sorted):
+            if val[0] == 1:
+                dcg += 1. / np.log2(i + 2)
+        idcg = 0
+        for i, val in enumerate(true_sorted):
+            if val[0] == 1:
+                idcg += 1. / np.log2(i + 2)
+        if idcg != 0:
+            ndcgs.append(dcg / idcg)
+    return np.mean(np.array(ndcgs))
diff --git a/gensim/models/experimental/experimental_data/get_data.py b/gensim/models/experimental/experimental_data/get_data.py
new file mode 100644
index 0000000000..95b50a74a0
--- /dev/null
+++ b/gensim/models/experimental/experimental_data/get_data.py
@@ -0,0 +1,89 @@
+"""
+Utility script to download the datsets for Similarity Learning
+Currently supports:
+- WikiQA
+- Quora Duplicate Question Pairs
+- Glove 6 Billion tokens Word Embeddings
+
+Example Usage:
+To get wikiqa
+$ python get_data.py --datafile wikiqa
+
+To get quoraqp
+$ python get_data.py --datafile quoraqp
+
+To get Glove Word Embeddings
+$ python get_data.py --datafile glove
+"""
+import requests
+import argparse
+import zipfile
+import logging
+import os
+import gensim.downloader as api
+
+logger = logging.getLogger(__name__)
+
+# The urls and filepaths of currently supported files
+wikiqa_url, wikiqa_file = "https://download.microsoft.com/download/E/5/F/E5FCFCEE-7005-4814-853D-DAA7C66507E0/", "WikiQACorpus.zip"  # noqa
+quoraqp_url, quoraqp_file = "http://qim.ec.quoracdn.net/", "quora_duplicate_questions.tsv"
+
+
+def download(url, file_name, output_dir, unzip=False):
+    """Utility function to download a given file from the given url
+    Paramters:
+    ---------
+    url: str
+        Url of the file, without the file
+
+    file_name: str
+        name of the file ahead of the url path
+
+    Example:
+    url = www.example.com/datasets/
+    file_name = example_dataset.zip
+    """
+    logger.info("Downloading %s" % file_name)
+    req = requests.get(url + file_name)
+    file_save_path = os.path.join(output_dir, file_name)
+    try:
+        with open(file_save_path, "wb") as code:
+            code.write(req.content)
+            logger.info("Download of %s complete" % file_name)
+    except Exception as e:
+        logger.info(str(e))
+
+    if unzip:
+        logger.info("Unzipping %s" % file_name)
+        with zipfile.ZipFile(file_save_path, "r") as zip_ref:
+            zip_ref.extractall(path=output_dir)
+        logger.info("Unzip complete")
+
+
+if __name__ == '__main__':
+    logging.basicConfig(
+        format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s',
+        level=logging.INFO
+    )
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument('--datafile', default='all',
+                        help='file you want to download. Options: wikiqa, quoraqp, glove, all')
+    parser.add_argument('--output_dir', default='./',
+                        help='the directory where you want to save the data')
+
+    args = parser.parse_args()
+    if args.datafile == 'wikiqa':
+        download(wikiqa_url, wikiqa_file, args.output_dir, unzip=True)
+    elif args.datafile == 'quoraqp':
+        download(quoraqp_url, quoraqp_file, args.output_dir)
+    elif args.datafile == 'glove':
+        api.load('glove-wiki-gigaword-50')
+    elif args.datafile == 'all':
+        logger.info("Downloading all files.")
+        download(wikiqa_url, wikiqa_file, args.output_dir, unzip=True)
+        download(quoraqp_url, quoraqp_file, args.output_dir)
+        api.load('glove-wiki-gigaword-50')
+    else:
+        logger.info("Unknown dataset %s" % args.datafile)
diff --git a/gensim/test/test_data/drmm_tks b/gensim/test/test_data/drmm_tks
new file mode 100644
index 0000000000..4a4355c043
Binary files /dev/null and b/gensim/test/test_data/drmm_tks differ
diff --git a/gensim/test/test_data/drmm_tks.keras b/gensim/test/test_data/drmm_tks.keras
new file mode 100644
index 0000000000..d05d52ed1c
Binary files /dev/null and b/gensim/test/test_data/drmm_tks.keras differ
diff --git a/gensim/test/test_drmm_tks.py b/gensim/test/test_drmm_tks.py
new file mode 100644
index 0000000000..6e5a08b193
--- /dev/null
+++ b/gensim/test/test_drmm_tks.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2010 Radim Rehurek <radimrehurek@seznam.cz>
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
+
+"""
+Automated tests for checking transformation algorithms (the models package).
+"""
+
+import unittest
+import gensim.downloader as api
+from gensim.test.utils import datapath, get_tmpfile
+from gensim.models.experimental import DRMM_TKS
+
+
+class TestDrmmTksModel(unittest.TestCase):
+
+    def testLoadModel(self):
+        model = DRMM_TKS.load(datapath('drmm_tks'))
+        self.assertTrue(model.model is not None)
+        self.assertTrue(model._get_pair_list is not None)
+        self.assertTrue(model._get_batch_iter is not None)
+
+    def testSaveModel(self):
+        model = DRMM_TKS.load(datapath('drmm_tks'))
+        model.save(get_tmpfile('temp_drmm_tks_model'))
+
+    def testTrainModel(self):
+        queries = ["When was World War 1 fought ?".lower().split(), "When was Gandhi born ?".lower().split()]
+        docs = [["The world war was bad".lower().split(), "It was fought in 1996".lower().split()], ["Gandhi was born"
+                 " in the 18th century".lower().split(), "He fought for the Indian freedom movement".lower().split(),
+                 "Gandhi was assasinated".lower().split()]]
+        labels = [[0, 1], [1, 0, 0]]
+        word_embeddings_kv = api.load('glove-wiki-gigaword-50')
+        model = DRMM_TKS(queries, docs, labels, word_embedding=word_embeddings_kv, verbose=0)  # noqa:F841