Skip to content

Commit

Permalink
Merge cfd0ef7 into b15a6be
Browse files Browse the repository at this point in the history
  • Loading branch information
crankycoder committed Nov 27, 2018
2 parents b15a6be + cfd0ef7 commit 40d3ddf
Show file tree
Hide file tree
Showing 19 changed files with 822 additions and 938 deletions.
94 changes: 94 additions & 0 deletions API.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
API documentation

# Get addon recommendations

Allow the Authenticated User to update their details.

**URL** : `/v1/api/recommendations/<hashed_id>/`

**Method** : `POST`

**Auth required** : NO

**Permissions required** : None

**Data constraints**

```json
{
"options": {"promoted": [
["[1 to 30 chars]", Some Number],
["[1 to 30 chars]", Some Number],
]
}
}
```

Note that the only valid key for the top level JSON is `options`.

`options` is always a dictionary of optional values.

To denote no optional data - it is perfectly valid for the JSON data
to have no `options` key, or even simpler - not have POST data at all.

Each item in the promoted addon GUID list is accompanied by an
integer weight. Any weight is greater than a TAAR recommended addon
GUID.

**Data examples**

Partial data is allowed.

```json
{
"options": {"promoted": [
["guid1", 10],
["guid2", 5],
]
}
}
```


## Success Responses

**Condition** : Data provided is valid

**Code** : `200 OK`

**Content example** : Response will reflect a list of addon GUID suggestions.

```json
{
"results": ["taar-guid1", "taar-guid2", "taar-guid3"],
"result_info": [],
}
```

## Error Response

**Condition** : If provided data is invalid, e.g. options object is not a dictionary.

**Code** : `400 BAD REQUEST`

**Content example** :

```json
{
"invalid_option": [
"Please provide a dictionary with a `promoted` key mapped to a list of promoted addon GUIDs",
]
}
```

## Notes

* Endpoint will ignore irrelevant and read-only data such as parameters that
don't exist, or fields.
* Endpoint will try to fail gracefully and return an empty list in the
results key if no suggestions can be made.
* The only condition when the endpoint should return an error code if
the options data is malformed.



40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,43 @@ LocaleRecommender:

EnsembleRecommender:
* s3://telemetry-parquet/taar/ensemble/ensemble_weight.json



TAAR breaks out all S3 data load configuration into enviroment
variables. This ensures that running under test has no chance of
clobbering the production data in the event that a developer has AWS
configuration keys installed locally in `~/.aws/`

Production enviroment variables required for TAAR

Collaborative Recommender ::

TAAR_ITEM_MATRIX_BUCKET = "telemetry-public-analysis-2"
TAAR_ITEM_MATRIX_KEY = "telemetry-ml/addon_recommender/item_matrix.json"
TAAR_ADDON_MAPPING_BUCKET = "telemetry-public-analysis-2"
TAAR_ADDON_MAPPING_KEY = "telemetry-ml/addon_recommender/addon_mapping.json"


Ensemble Recommender ::

TAAR_ENSEMBLE_BUCKET = "telemetry-parquet"
TAAR_ENSEMBLE_KEY = "taar/ensemble/ensemble_weight.json"

Hybrid Recommender ::

TAAR_WHITELIST_BUCKET = "telemetry-parquet"
TAAR_WHITELIST_KEY = "telemetry-ml/addon_recommender/only_guids_top_200.json"

Locale Recommender ::

TAAR_LOCALE_BUCKET = "telemetry-parquet"
TAAR_LOCALE_KEY = "taar/locale/top10_dict.json"

Similarity Recommender ::

TAAR_SIMILARITY_BUCKET = "telemetry-parquet"
TAAR_SIMILARITY_DONOR_KEY = "taar/similarity/donors.json"
TAAR_SIMILARITY_LRCURVES_KEY = "taar/similarity/lr_curves.json"

4 changes: 1 addition & 3 deletions prod-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
certifi==2018.10.15 \
--hash=sha256:339dc09518b07e2fa7eda5450740925974815557727d6bd35d319c1524a04a4c \
--hash=sha256:6d58c986d22b038c8c0df30d639f23a3e6d172a05c3583e766f4c0b785c0986a
certifi==2018.10.15
722 changes: 119 additions & 603 deletions requirements.txt

Large diffs are not rendered by default.

30 changes: 27 additions & 3 deletions taar/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,37 @@ def configure_plugin(app): # noqa: C901
This is a factory function that configures all the routes for
flask given a particular library.
"""
@app.route('/api/recommendations/<uuid:uuid_client_id>/')
def recommendations(uuid_client_id):

@app.route("/v1/api/recommendations/<hashed_client_id>/", methods=["GET", "POST"])
def recommendations(hashed_client_id):
"""Return a list of recommendations provided a telemetry client_id."""
# Use the module global PROXY_MANAGER
global PROXY_MANAGER

try:
promoted_guids = []
if request.method == "POST":
json_data = request.data
# At least Python3.5 returns request.data as bytes
# type instead of a string type.
# Both Python2.7 and Python3.7 return a string type
if type(json_data) == bytes:
json_data = json_data.decode("utf8")

post_data = json.loads(json_data)
promoted_guids = post_data.get("options", {}).get("promoted", [])
if promoted_guids:
promoted_guids.sort(key=lambda x: x[1], reverse=True)
promoted_guids = [x[0] for x in promoted_guids]
except Exception as e:
return app.response_class(
response=json.dumps({"error": "Invalid JSON in POST: {}".format(e)}),
status=400,
mimetype="application/json",
)

# Coerce the uuid.UUID type into a string
client_id = str(uuid_client_id)
client_id = str(hashed_client_id)

branch = request.args.get("branch", "")

Expand Down Expand Up @@ -76,6 +99,7 @@ def recommendations(uuid_client_id):
# Strip out weights from TAAR results to maintain compatibility
# with TAAR 1.0
jdata = {"results": [x[0] for x in recommendations]}
jdata["results"] = (promoted_guids + jdata["results"])[:TAAR_MAX_RESULTS]

response = app.response_class(
response=json.dumps(jdata), status=200, mimetype="application/json"
Expand Down
93 changes: 54 additions & 39 deletions taar/recommenders/collaborative_recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@

from .base_recommender import AbstractRecommender

ITEM_MATRIX_CONFIG = ('telemetry-public-analysis-2', 'telemetry-ml/addon_recommender/item_matrix.json')
ADDON_MAPPING_CONFIG = ('telemetry-public-analysis-2', 'telemetry-ml/addon_recommender/addon_mapping.json')
from .s3config import TAAR_ITEM_MATRIX_BUCKET
from .s3config import TAAR_ITEM_MATRIX_KEY
from .s3config import TAAR_ADDON_MAPPING_BUCKET
from .s3config import TAAR_ADDON_MAPPING_KEY


# http://garage.pimentech.net/libcommonPython_src_python_libcommon_javastringhashcode/
def java_string_hashcode(s):
h = 0
for c in s:
Expand All @@ -33,24 +34,19 @@ class CollaborativeRecommender(AbstractRecommender):
recommender = CollaborativeRecommender()
dists = recommender.recommend(client_info)
"""

def __init__(self, ctx):
self._ctx = ctx

if 'collaborative_addon_mapping' in self._ctx:
self._addon_mapping = self._ctx['collaborative_addon_mapping']
else:
self._addon_mapping = LazyJSONLoader(self._ctx,
ADDON_MAPPING_CONFIG[0],
ADDON_MAPPING_CONFIG[1])
self._addon_mapping = LazyJSONLoader(
self._ctx, TAAR_ADDON_MAPPING_BUCKET, TAAR_ADDON_MAPPING_KEY
)

if 'collaborative_item_matrix' in self._ctx:
self._raw_item_matrix = self._ctx['collaborative_item_matrix']
else:
self._raw_item_matrix = LazyJSONLoader(self._ctx,
ITEM_MATRIX_CONFIG[0],
ITEM_MATRIX_CONFIG[1])
self._raw_item_matrix = LazyJSONLoader(
self._ctx, TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY
)

self.logger = self._ctx[IMozLogging].get_logger('taar')
self.logger = self._ctx[IMozLogging].get_logger("taar")

self.model = None
self._build_model()
Expand All @@ -66,45 +62,62 @@ def raw_item_matrix(self):
def _load_json_models(self):
# Download the addon mappings.
if self.addon_mapping is None:
self.logger.error("Cannot download the addon mapping file {} {}".format(*ADDON_MAPPING_CONFIG))
self.logger.error(
"Cannot download the addon mapping file {} {}".format(
TAAR_ADDON_MAPPING_BUCKET, TAAR_ADDON_MAPPING_KEY
)
)

if self.addon_mapping is None:
self.logger.error("Cannot download the model file {} {}".format(*ITEM_MATRIX_CONFIG))
self.logger.error(
"Cannot download the model file {} {}".format(
TAAR_ITEM_MATRIX_BUCKET, TAAR_ITEM_MATRIX_KEY
)
)

def _build_model(self):
if self.raw_item_matrix is None:
return

# Build a dense numpy matrix out of it.
num_rows = len(self.raw_item_matrix)
num_cols = len(self.raw_item_matrix[0]['features'])
num_cols = len(self.raw_item_matrix[0]["features"])

self.model = np.zeros(shape=(num_rows, num_cols))
for index, row in enumerate(self.raw_item_matrix):
self.model[index, :] = row['features']
self.model[index, :] = row["features"]

def can_recommend(self, client_data, extra_data={}):
# We can't recommend if we don't have our data files.
if self.raw_item_matrix is None or self.model is None or self.addon_mapping is None:
if (
self.raw_item_matrix is None
or self.model is None
or self.addon_mapping is None
):
return False

# We only get meaningful recommendation if a client has at least an
# addon installed.
if len(client_data.get('installed_addons', [])) > 0:
if len(client_data.get("installed_addons", [])) > 0:
return True

return False

def recommend(self, client_data, limit, extra_data={}):
# Addons identifiers are stored as positive hash values within the model.
installed_addons_as_hashes =\
[positive_hash(addon_id) for addon_id in client_data.get('installed_addons', [])]
installed_addons_as_hashes = [
positive_hash(addon_id)
for addon_id in client_data.get("installed_addons", [])
]

# Build the query vector by setting the position of the queried addons to 1.0
# and the other to 0.0.
query_vector = np.array([1.0
if (entry.get("id") in installed_addons_as_hashes)
else 0.0 for entry in self.raw_item_matrix])
query_vector = np.array(
[
1.0 if (entry.get("id") in installed_addons_as_hashes) else 0.0
for entry in self.raw_item_matrix
]
)

# Build the user factors matrix.
user_factors = np.matmul(query_vector, self.model)
Expand All @@ -119,28 +132,30 @@ def recommend(self, client_data, limit, extra_data={}):
# filter out legacy addons from the suggestions.
hashed_id = addon.get("id")
str_hashed_id = str(hashed_id)
if (hashed_id in installed_addons_as_hashes or
str_hashed_id not in self.addon_mapping or
self.addon_mapping[str_hashed_id].get("isWebextension", False) is False):
if (
hashed_id in installed_addons_as_hashes
or str_hashed_id not in self.addon_mapping
or self.addon_mapping[str_hashed_id].get("isWebextension", False)
is False
):
continue

dist = np.dot(user_factors_transposed, addon.get('features'))
dist = np.dot(user_factors_transposed, addon.get("features"))
# Read the addon ids from the "addon_mapping" looking it
# up by 'id' (which is an hashed value).
addon_id = self.addon_mapping[str_hashed_id].get("id")
distances[addon_id] = dist

# Sort the suggested addons by their score and return the
# sorted list of addon ids.
sorted_dists = sorted(distances.items(),
key=op.itemgetter(1),
reverse=True)
sorted_dists = sorted(distances.items(), key=op.itemgetter(1), reverse=True)
recommendations = [(s[0], s[1]) for s in sorted_dists[:limit]]

log_data = (client_data['client_id'],
str([r[0] for r in recommendations]))
self.logger.info("collaborative_recommender_triggered, "
"client_id: [%s], "
"guids: [%s]" % log_data)
log_data = (client_data["client_id"], str([r[0] for r in recommendations]))
self.logger.info(
"collaborative_recommender_triggered, "
"client_id: [%s], "
"guids: [%s]" % log_data
)

return recommendations

0 comments on commit 40d3ddf

Please sign in to comment.