Added the ability to create a new model from scratch

nasa-jpl-memex · Jun 15, 2017 · e44af8d · e44af8d
1 parent 02cb060
commit e44af8d
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 6 deletions.
diff --git a/webui/app/__init__.py b/webui/app/__init__.py
@@ -2,6 +2,9 @@
 from flask import Flask
 from app.apis import api
 
+from sklearn.externals import joblib
+import os
+
 # Define the WSGI application object
 app = Flask(__name__,
             static_url_path='',
@@ -18,6 +21,10 @@
 # Register blueprint(s)
 app.register_blueprint(app_module)
 
+filename = 'model.pkl'
+if os.path.isfile(filename):
+    model = joblib.load(os.path.join(app.root_path, app.config['UPLOAD_FOLDER'], filename))
+    setattr(app, 'model', model)
 
 # Initialize flask-restplus
 api.init_app(app)
diff --git a/webui/app/classifier/__init__.py b/webui/app/classifier/__init__.py
@@ -26,34 +26,38 @@ def load_vocab():
     return keywords
 
 
+def clear_model():
+    print('clear_model')
+    fname = 'model.pkl'
+    if os.path.isfile(fname):
+        os.remove(fname)
+    setattr(flask.current_app, 'model', None)
+    return '0'
+
+
 def update_model(annotations):
     global accuracy, splits, iteration
 
-    keywords = getattr(flask.current_app, 'keywords', None)
     url_text = getattr(flask.current_app, 'url_text', None)
 
     clf = MLPClassifier(max_iter=1000, learning_rate='adaptive',)
     count_vect = CountVectorizer(lowercase=True, stop_words='english')
     tfidftransformer = TfidfTransformer()
 
-
     if url_text is None:
         print('An error occurred while accessing the application context variables')
         return '-1'
 
     labeled = np.array(annotations)
     model=getattr(flask.current_app, 'model', None)
 
-
     if model is not None:
         # add the old docs to the new
         prev_url_text=model['url_text']
         prev_labeled=model['labeled']
         url_text=np.append(url_text,prev_url_text,axis=0)
         labeled=np.append(labeled,prev_labeled,axis=0)
 
-
-
     features = count_vect.fit_transform(url_text)
     features=tfidftransformer.fit_transform(features).toarray().astype(np.float64)
 
@@ -69,6 +73,9 @@ def update_model(annotations):
     predicted = clf.predict(features)
     accuracy = (labeled == predicted).sum() / float(len(labeled))
 
+    fname = 'model.pkl'
+    joblib.dump(model, fname)
+
     return str(accuracy)
 
 
@@ -131,7 +138,6 @@ def export_model():
     model['accuracy']=accuracy
 
     fname = 'model.pkl'
-
     joblib.dump(model, fname)
 
     return flask.send_from_directory(directory=flask.current_app.root_path + '/../', filename=fname)

diff --git a/webui/app/controller.py b/webui/app/controller.py
@@ -1,5 +1,6 @@
 from flask import Blueprint, request, render_template, redirect, url_for, send_from_directory
 from app import classifier
+import os
 
 # Define Blueprint(s)
 mod_app = Blueprint('application', __name__, url_prefix='/explorer')
@@ -11,6 +12,11 @@ def index():
     return send_from_directory('static/pages', 'index.html')
 
 
+@mod_app.route('/classify/createnew/', methods=['GET'])
+def create_new_model():
+    return classifier.clear_model()
+
+
 # POST Requests
 @mod_app.route('/classify/update/', methods=['POST'])
 def build_model():