Skip to content

Commit

Permalink
Merge pull request #22 from van51/master
Browse files Browse the repository at this point in the history
Added lang_detect backend for the demo
  • Loading branch information
Soeren Sonnenburg committed Sep 12, 2013
2 parents 9db74ef + dc93f60 commit 408e1ed
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 1 deletion.
67 changes: 67 additions & 0 deletions demos/application/LanguageClassifier.py
@@ -0,0 +1,67 @@
from modshogun import MulticlassLibLinear
from numpy import array

import json
import sys
import gzip as gz
import pickle as pkl

default_filepath = "data/lang_detection/default.svm.gz"

id_to_lang = {0 : "English", 1 : "Greek", 2 : "German",
3 : "Spanish", 4 : "Italian"}

class LanguageClassifier:
def __init__(self):
self.svm = None

def load_classifier(self):
gz_stream = gz.open(default_filepath, 'rb')
self.svm = pkl.load(gz_stream)
gz_stream.close()

def load_svm(self, filepath):
from modshogun import SerializableAsciiFile

print("Attempting to load a multiclass liblinear svm from \"" +
filepath +"\"")
self.svm = MulticlassLibLinear()
loader = SerializableAsciiFile(filepath, "r")
self.svm.load_serializable(loader)
print("Svm succesfully loaded")


def classify_doc(self, doc):
from modshogun import StringCharFeatures, RAWBYTE
from modshogun import HashedDocDotFeatures
from modshogun import NGramTokenizer
from modshogun import MulticlassLabels

docs = [doc]
string_feats = StringCharFeatures(docs, RAWBYTE)
tokenizer = NGramTokenizer(4)
normalize = True
num_bits = 18

hashed_doc_feats = HashedDocDotFeatures(num_bits, string_feats,
tokenizer, normalize, 3, 2)

labels = self.svm.apply(hashed_doc_feats).get_labels()

return id_to_lang[labels[0]]

if __name__=='__main__':
lc = LanguageClassifier()
if len(sys.argv)==1:
lc.load_classifier()
else:
lc.load_svm(sys.argv[1])

while True:
print("Enter a sentence to classify or type \"!quit\" to quit")
sentence = raw_input()
if sentence=='!quit':
break

lang = lc.classify_doc(sentence)
print("Your sentence \"" + sentence +"\" was classified as : " + lang)
4 changes: 4 additions & 0 deletions demos/application/__init__.py
@@ -1,4 +1,8 @@
from Ai import Ai
from LanguageClassifier import LanguageClassifier

ai = Ai()
ai.load_classifier()

lc = LanguageClassifier()
lc.load_classifier()
30 changes: 30 additions & 0 deletions demos/application/lang_detect.py
@@ -0,0 +1,30 @@
from django.http import HttpResponse, Http404
from django.template import RequestContext
from django.shortcuts import render_to_response

def handler(request):
if request.method == 'GET':
return entrance(request)
else:
return recognize(request)

def entrance(request):
properties = { 'title' : 'Language Detection Demo' }
#'template': {'type': 'drawing'},
#'panels': [
# {
# 'panel_name': 'preview',
# 'panel_label': 'Preview'}]}
return render_to_response("application/lang_detect.html",
properties,
context_instance = RequestContext(request))

def recognize(request):
try:
text = json.loads(request.POST['text'])
lang = lc.classify_doc(text)
return HttpResponse(json.dumps({'predict': lang}))
except:
import traceback
print traceback.format_exc()
raise Http404
1 change: 1 addition & 0 deletions shogun_demo/urls.py
Expand Up @@ -15,6 +15,7 @@
url(r'^dimred/tapkee/promoters.json', 'dimred.tapkee.promoters'),

url(r'^application/ocr/', 'application.ocr.handler'),
url(r'^application/ld/', 'application.lang_detect.handler'),

url(r'^misc/kernel_matrix/', 'misc.kernel_matrix.handler'),
url(r'^misc/tree/', 'misc.tree.handlers'),
Expand Down

0 comments on commit 408e1ed

Please sign in to comment.