This repository has been archived by the owner on Jan 31, 2022. It is now read-only.
/
app.py
128 lines (102 loc) · 4.25 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import hashlib
import os
import logging
from flask import (abort, Flask, session, render_template,
session, redirect, url_for, request,
flash, jsonify)
from flask_session import Session
from urllib import request as request_url
from pathlib import Path
from inference import InferenceWrapper, pass_through
app = Flask(__name__)
# Configure session to use filesystem. Hamel: BOILERPLATE.
app.config["SESSION_PERMANENT"] = False
Session(app)
LOG = logging.getLogger(__name__)
def init_language_model():
"""
Downloads pre-trained language model and instantiates inference mechanism.
"""
model_url = 'https://storage.googleapis.com/issue_label_bot/model/lang_model/models_22zkdqlr/trained_model_22zkdqlr.pkl'
path = Path('./model_files')
full_path = path/'model.pkl'
if not full_path.exists():
print('Loading model.')
path.mkdir(exist_ok=True)
request_url.urlretrieve(model_url, path/'model.pkl')
app.inference_wrapper = InferenceWrapper(model_path=path, model_file_name='model.pkl')
LOG.warning('Finished loading model.')
@app.route("/healthz", methods=["GET"])
def healthz():
"route for health check."
return jsonify({'success':True}), 200, {'ContentType':'application/json'}
@app.route("/", methods=["GET"])
def index():
"Landing page"
return render_template("index.html")
# smee by default sends things to /event_handler route
@app.route("/text", methods=["POST"])
def text():
"""
Route that allows user to send json with raw text of title and body. This
route expects a payload to be sent that contains:
{'title': "some text ...",
'body': "some text ....}
"""
# pre-process data
title = request.json['title']
body = request.json['body']
logging.debug(f"Recieved title={title} body={body}")
data = app.inference_wrapper.process_dict({'title':title, 'body':body})
logging.debug(f'prediction requested for {str(data)}')
# make prediction: you can only return strings with api
# decode with np.frombuffer(request.content, dtype='<f4')
embeddings_str = app.inference_wrapper.get_pooled_features(data['text']).detach().numpy().tostring()
# For debugging print out hash of the content embeddings. This is to
# see if they are changing
m = hashlib.md5()
m.update(embeddings_str)
logging.debug(f"hash of embeddings {m.hexdigest()}")
return embeddings_str
@app.route("/all_issues/<string:owner>/<string:repo>", methods=["POST"])
def fetch_issues(owner, repo):
"""
Retrieve the embeddings for all the issues of a repo.
"""
#TODO: finish this
#TODO(jlewi): I'm not sure we want to finish this method. It might be
#better if the embedding service wasn't aware of GitHub; i.e. the input
#was always title and text. Any interactions with GitHub should then
#happen in microservices in front of this one.
return NotImplementedError()
installed = app_installation_exists(owner=owner, repo=repo)
if not installed:
abort(400, description="The app is not installed on this repository.")
if not is_public(owner, repo):
abort(400, description="This app only works on public repositories.")
def is_public(owner, repo):
"Verify repo is public."
try:
return requests.head(f'https://github.com/{owner}/{repo}').status_code == 200
except:
return False
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO,
format=('%(levelname)s|%(asctime)s'
'|%(message)s|%(pathname)s|%(lineno)d|'),
datefmt='%Y-%m-%dT%H:%M:%S',
)
init_language_model()
FLASK_DEBUG = os.getenv("FLASK_DEBUG", "false").lower()
# Need to convert it to boolean
if FLASK_DEBUG in ["true", "t"]:
FLASK_DEBUG = True
else:
FLASK_DEBUG = False
logging.info(f"FLASK_DEBUG={FLASK_DEBUG}")
if FLASK_DEBUG:
raise ValueError(f"Flask debug mode currently doesn't work with the "
f"embedding model. See "
f"https://github.com/kubeflow/code-intelligence/pull/77#issuecomment-569105812")
app.run(debug=FLASK_DEBUG, host='0.0.0.0',
port=os.getenv('PORT'), threaded=False)