In [5]:
# test if we can use the Python object

import pickle
import re
import os
from vectorizer import vect
clf = pickle.load(open(os.path.join('pkl_objects','classifier.pkl'),'rb'))

In [12]:
import numpy as np
label = {0:'negative',1:'positive'}
example = ['I love this movie']
X = vect.transform(example)
print('Prediction:%s\nProbability:%.2f%%' %\
     (label[clf.predict(X)[0]], np.max(clf.predict_proba(X)*100)))

Prediction:positive
Probability:85.58%


### 设置SQLite 数据库用来数据存储
在这里，可以用数据库收集用户在网络应用中预测的反馈。可以用这个反馈来升级我们的分类模型。  
在movieclassifier目录下面创建两个movie的评论：  
火狐自带SQlite https://addons.mozilla.org/en-US/firefox/addon/sqlite-manager/

In [20]:
import sqlite3
import os
conn = sqlite3.connect('reviews.sqlite')
c = conn.cursor()
c.execute('CREATE TABLE review_db'\
         '(review TEXT, sentiment INTEGER, date TEXT)')
example1 = 'I love this movie'
c.execute("INSERT INTO review_db\
         (review,sentiment,date) VALUES\
         (?,?,DATETIME('now'))",(example1,1))
example2 = 'I disliked this movie'
c.execute("INSERT INTO review_db\
         (review, sentiment, date) VALUES\
         (?,?,DATETIME('now'))",(example2,0))
conn.commit()
conn.close()

In [21]:
conn = sqlite3.connect('reviews.sqlite')
c = conn.cursor()
c.execute("SELECT * FROM review_db WHERE date\
         BETWEEN '2015-01-01 00:00:00' AND DATETIME('now')")
results = c.fetchall()
conn.close()
print(results)

[('I love this movie', 1, '2018-04-09 03:40:00'), ('I disliked this movie', 0, '2018-04-09 03:40:00')]


### 通过Flask来创建一个web application
Flask是由Python写的， 是microframework,它的内核是精简简单的（kept lean and simple)，但是很容易和其他库一起扩展。  
Python其他的web frameworks Django  
1st_flask_app_1/  
app.py  
templates/  
first_app.html  

In [None]:
# app.py
from flask import Flask,render_template
app = Flask(__name__)

@app.route('/') # route decorator来表明特定的URL会触发index 函数的执行
def index():
    return render_template('first_app.html')

if __name__ == '__main__':
    app.run()

In [1]:
# first_app.html
<!doctype html>
<html>
    <head>
        <title>First app</title>
    </head>
    <body>
        <div>Hi, this is my first Flask web app!</div>
    </body>
</html>

SyntaxError: invalid syntax (<ipython-input-1-983dbe3c3a91>, line 2)

###  Form validation and rendering 表格验证和展示
利用WTForms库来收集用户的数据

In [None]:
from flask import Flask, render_template, request
from wtforms import Form, TextAreaField, validators
app = Flask(__name__)

class HelloForm(Form):
    sayhello = TextAreaField('', [validators.DataRequired()])
    
@app.route('/')
def index():
    form = HelloForm(request.form)
    return render_template('first_app.html', form=form)

@app.route('/hello', methods=['POST'])
def hello():
    form = HelloForm(request.form)
    if request.method == 'POST' and form.validate():
        name = request.form['sayhello']
        return render_template('hello.html',name=name)
    return render_template('first_app.html',form=form)

if __name__ == "__main__":
    app.run(debug=True)

In [None]:
# Jinjia2  _formhelpers.html
{% macro render_field(field) %}
    <dt>{{ field.label }}
    <dd>{{ field(**kwargs)|safe }}
    {% if field.errors %}
        <ul class=errors>
        {% for error in field.errors %}
            <li>{{ error }}</li>
        {% endfor %}
        </ul>
    {% endif %}
    </dd>
{% endmacro %}

In [None]:
# cascading Style Sheets (css) style.css 改变HTML的body元素

body{
    font-size: 2em;
}


In [None]:
# first_app.html

<!doctype html>
<html>
    <head>
        <title>First app</title>
    <link rel="stylesheet" href="{{ url_for('static',filename='style.css') }}">
    </head>
    <body>
    
{% from "_formhelpers.html" import render_field %}

<div>What's your name?</div>
<form method=post action="/hello">
    <dl>
        {{ render_field(form.sayhello) }}
    </dl>
    <input type=submit value='Say Hello' name='submit_btn'>
</form>
    </body>
</html>

In [None]:
# hello.html

<!doctype html>
<html>
    <head>
        <title>First app</title>
    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
    </head>
    <body>
<div>Hello {{ name }}</div>
    </body>
</html>

将movie classifier转到web应用当中

In [2]:
# app.py
from flask import Flask, render_template, request
from wtforms import Form, TextAreaField, validators
import pickle
import sqlite3
import os
import numpy as np
# imiport HashingVectorizer from local dir
from vectorizer import vect

app = Flask(__name__)

####### Preparing the Classifier
cur_dir = os.path.dirname(__file__)
clf = pickle.load(open(os.path.join(cur_dir,'pkl_objects/classifier.pkl'),'rb'))
db = os.path.join(cur_dir, 'reviews.sqlite')

def classify(document):
    label = {0:'negative', 1:'positive'}
    X = vect.transform([document])
    y = clf.predict(X)[0]
    proba = np.max(clf.predict_proba(X))
    return label[y], proba

def train(document,y):
    X = vect.transform([document])
    clf.partial_fit(X,[y])
    
def sqlite_entry(path, document,y):
    conn = sqlite3.connect(path)
    c = conn.cursor()
    c.execute("INSERT INTO review_db (review, sentiment, date) \
              VALUES (?,?,DATETIME('now'))",(document,y))
    conn.commit()
    conn.close()

# web part
class ReviewForm(Form):
    moviereview = TextAreaField('',[validators.DataRequired(),validators.length(min=15)])
    
@app.route('/')
def index():
    form = ReviewForm(request.form)
    return render_template('reviewform.html',form=form)

@app.route('/results',methods=['POST'])
def results():
    form = ReviewForm(request.form)
    if request.method == 'POST' and form.validate():
        review = request.form['moviereview'] 
        y, proba = classify(review)
        return render_template('results.html', content=review, prediction=y, probability=round(proba*100, 2))
    return render_template('reviewform.html', form=form)

@app.route('/thanks', methods=['POST'])
def feedback():
    feedback = request.form['feedback_button']
    review = request.form['review']
    prediction = request.form['prediction']
    
    inv_label = {'negative': 0, 'positive': 1}
    y = inv_label[prediction]
    if feedback == 'Incorrect':
        y = int(not(y))
    train(review, y)
    sqlite_entry(db, review, y)
    return render_template('thanks.html')

from update import update_model

if __name__ == '__main__':
    update_model(filepath=db, model=clf, batch_size=10000)
    app.run(debug=True)
    

In [None]:
# reviewform.html

<!doctype html>
<html>
<head>
    <title>Movie Classification</title>
</head>
    <body>
    
    <h2>Please enter your movie review:</h2>
        
    {% from "_formhelpers.html" import render_field %}
    
<form method=post action="/results">
    <dl>
        {{ render_field(form.moviereview, cols='30', rows='10') }}
    </dl>
    <div>
        <input type=submit value='Submit review' name='submit_btn'>
    </div>
</form>

    </body>
</html>

In [None]:
# results.html

<!doctype html>
<html>
    <head>
        <title>Movie Classification</title>
    <link rel="stylesheet" href="{{ url_for('static',filename='style.css') }}">
    </head>
    <body>
    
<h3>Your movie review:</h3>
<div>{{ content }}</div>

<h3>Prediction:</h3>
<div>This movie review is <strong>{{ prediction }}</strong>(probability: {{ probability }}%).</div>

<div id='button'>
    <form action="/thanks" method="post">
        <input type=submit value='Correct' name='feedback_button'>
        <input type=submit value='Incorrect' name='feedback_button'>
        <input type=hidden value='{{ prediction }}' name='prediction'>
        <input type=hidden value='{{ content }}' name='review'>
    </form>
</div>

<div id='button'>
    <form action="/">
        <input type=submit value='Submit another review'>
    </form>
</div>

    </body>
</html>

In [None]:
# style.css

body{
    width:600px;
}
button{
    padding-top: 20px;
}

In [None]:
# thanks.html

<!doctype html>
<html>
    <head>
        <title>Movie Classification</title>
</head>
    <body>
    
<h3>Thank you for your feedback!</h3>
<div id='button'>
    <form action="/">
        <input type=submit value='Submit another review'>
    </form>
</div>

    </body>
</html>

### 将web application放到public server
在这里用PythonAnywhere web hosting service,  
https://www.pythonanywhere.com/   

http://luxuriance.pythonanywhere.com/  (这个项目放的地址）

升级movie review classifier:
因为在线的实时接受反馈数据，如果说不存储这些数据训练的模型，那么当软件崩溃的时候，又要重新初始化参数重新训练。 
一个选择是，只要更新，那么就pickle这个clf对象，但是如果说用户数量很多，那么计算效率就会低，而且当用户同时提供反馈的时候，会发生冲撞，导致模型失效。  
另一种一种解决方法是，利用SQLite在web端收集的数据来升级预测模型，在电脑的本机来升级分类器。  
movieclassifier下面的 update.py 

In [None]:
import pickle
import sqlite3
import numpy as np
import os

# import HashingVectorizer from local dir
from vectorizer import vect

def update_model(db_path, model, batch_size=10000):
    conn = sqlite3.connect(db_path)
    c = conn.cursor()
    c.execute('SELECT * from review_db')
    
    results = c.fetchmany(batch_size)
    while results:
        data = np.array(results)
        X = data[:, 0]
        y = data[:, 1].astype(int)
        
        classes = np.array([0,1])
        X_train = vect.transform(X)
        clf.partial_fit(X_train, y, classes=classes)
        result = c.fetchmany(batch_size)
    conn.close()
    return None

cur_dir = os.path.dirname(__file__)

clf = pickle.load(open(os.path.join(cur_dir,'pkl_objects','classifier.pkl'),'rb'))
db = os.path.join(cur_dir, 'reviews.sqlite')

update_model(db, clf, batch_size=10000)

pickle.dump(clf, open(os.path.join(cur_dir,'pkl_objects','classifier.pkl'),'wb'),protocol=4)