/
sentiment.py
143 lines (113 loc) · 4.4 KB
/
sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import re
import datetime
import time
import logging as log
from datetime import datetime
from google.appengine.ext.webapp import template
from google.appengine.ext import webapp
from google.appengine.ext.webapp import util
from models import *
from plugins.base import SENTIMENTS, init_plugins
import random
class MainHandler(webapp.RequestHandler):
def get(self):
show_output = bool(self.request.get('show_output'))
recs = Record.all().filter('analyzed =', False)
log.debug("%s records to analyze", recs.count())
random_vals = bool(self.request.get('random_vals'))
false_term_lines = []
neg_val_lines = []
pos_val_lines = []
no_val_lines = []
t1 = time.clock()
for rec in recs:
value = 0
init_plugins(rec.text)
#first filter out businesses that do not qualify
value, modtext = analyze("X", rec.text, rec.company.name)
line = rec.company.name + ' | ' + rec.text
#continue with the analysis of the text only if the result is > 1
if value > 0:
if random_vals:
value = random.randint(-20, 20)
modtext = rec.text
else:
value, modtext = analyze("+", rec.text, rec.company.name)
line = line + ' | ' + modtext + ' | ' + str(value)
#multiple operators for the same text
#x = calculate("-", text)
#un-comment for local development
#return value
#if value == 0:
# modtext = ""
if value <> 0:
log.debug("text %s", modtext)
log.debug("value %s", value)
#only make sent for useful value
sent = Sentiment(company=rec.company, text=modtext, value=float(value), record=rec)
sent.put()
rec.company.total_value += float(value)
rec.company.sentiment_count += 1
rec.company.average_value = rec.company.total_value / rec.company.sentiment_count
rec.company.put()
log.debug("Text recorded %s with a rating %s", modtext, value)
if show_output:
if value > 0:
pos_val_lines.append(line+"<BR/>")
elif value < 0:
neg_val_lines.append(line+"<BR/>")
else:
no_val_lines.append(line+"<BR/>")
else:
if show_output:
false_term_lines.append(line+"<BR/>")
rec.analyzed = True;
rec.put()
log.info("Sentiment analysis took %d seconds", time.clock()-t1)
path = os.path.join(os.path.dirname(__file__), 'templates/user.html')
if show_output:
self.response.out.write("<B>False Terms</B></br/>")
self.response.out.write(false_term_lines)
self.response.out.write("</br/>")
self.response.out.write("<B>No Values</B></br/>")
self.response.out.write(no_val_lines)
self.response.out.write("</br/>")
self.response.out.write("<B>Positive Sentiments</B></br/>")
self.response.out.write(pos_val_lines)
self.response.out.write("</br/>")
self.response.out.write("<B>Negative Sentiments</B></br/>")
self.response.out.write(neg_val_lines)
else:
self.response.out.write("Done")
#inspired from Calculator plugin
class end_token(object):
lbp = 0
#parsing and expressions:
# http://effbot.org/zone/simple-top-down-parsing.htm#function-calls
def tokenize(program):
for number, operator in re.findall("\s*(?:(\d+)|(\*\*|.))", program):
if operator in SENTIMENTS:
yield SENTIMENTS[operator]()
else:
raise SyntaxError("unknown operator: %r" % operator)
yield end_token()
def analyze(program, text, company_name):
global token
global modtext
next = tokenize(program).next
token = next()
t = token
log.debug("send text for analysis: %s", text)
value = t.led(text, company_name)
modtext = t.modtext
#return the modified text and the sentiment value to the caller
return value, modtext
def main():
application = webapp.WSGIApplication([
('/sentiment', MainHandler)
],
debug=True)
util.run_wsgi_app(application)
if __name__ == '__main__':
main()