Permalink
Browse files

Merge branch 'search'

* search:
  Turning off auto-indexing for starred stories until it's ready to go.
  Adding elasticsearch fabfile config.
  Updating search for saved stories to search fuzzy queries. Connected to UI.
  Making poor progress on search. Can't even query yet.
  Adding Search app and object, used to index stories and then search them. Needs views, ui, and support for saved stories.
  • Loading branch information...
2 parents 833edd1 + 3449e22 commit d5c6475fca32db3cb017a781630952b330299c7e @samuelclay committed Dec 21, 2012
View
@@ -18,7 +18,7 @@
from django.core.validators import email_re
from django.core.mail import EmailMultiAlternatives
from django.contrib.sites.models import Site
-from mongoengine.queryset import OperationError, Q
+from mongoengine.queryset import OperationError
from apps.recommendations.models import RecommendedFeed
from apps.analyzer.models import MClassifierTitle, MClassifierAuthor, MClassifierFeed, MClassifierTag
from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds
@@ -29,6 +29,7 @@
from apps.reader.forms import SignupForm, LoginForm, FeatureForm
from apps.rss_feeds.models import MFeedIcon
from apps.statistics.models import MStatistics
+from apps.search.models import SearchStarredStory
try:
from apps.rss_feeds.models import Feed, MFeedPage, DuplicateFeed, MStory, MStarredStory
except:
@@ -635,11 +636,11 @@ def load_starred_stories(request):
if page: offset = limit * (page - 1)
if query:
+ results = SearchStarredStory.query(user.pk, query)
+ story_ids = [result.db_id for result in results]
mstories = MStarredStory.objects(
- Q(user_id=user.pk) &
- (Q(story_title__icontains=query) |
- Q(story_content__icontains=query) |
- Q(story_author_name__icontains=query))
+ user_id=user.pk,
+ id__in=story_ids
).order_by('-starred_date')[offset:offset+limit]
else:
mstories = MStarredStory.objects(
View
@@ -20,6 +20,7 @@
from mongoengine.queryset import OperationError, Q
from mongoengine.base import ValidationError
from apps.rss_feeds.tasks import UpdateFeeds, PushFeeds
+from apps.search.models import SearchStarredStory
from utils import json_functions as json
from utils import feedfinder, feedparser
from utils import urlnorm
@@ -1499,6 +1500,18 @@ def save(self, *args, **kwargs):
self.story_original_content_z = zlib.compress(self.story_original_content)
self.story_original_content = None
super(MStarredStory, self).save(*args, **kwargs)
+
+ # self.index_for_search()
+
+ def index_for_search(self):
+ story_content = zlib.decompress(self.story_content_z)
+ SearchStarredStory.index(user_id=self.user_id,
+ story_id=self.story_guid,
+ story_title=self.story_title,
+ story_content=story_content,
+ story_author=self.story_author_name,
+ story_date=self.story_date,
+ db_id=str(self.id))
@property
def guid_hash(self):
View
No changes.
View
@@ -0,0 +1,85 @@
+import pyes
+from django.conf import settings
+
+class SearchStarredStory:
+
+ ES = pyes.ES(settings.ELASTICSEARCH_HOSTS)
+ name = "starred-stories"
+
+ @classmethod
+ def create_elasticsearch_mapping(cls):
+ cls.ES.create_index("%s-index" % cls.name)
+ mapping = {
+ 'title': {
+ 'boost': 2.0,
+ 'index': 'analyzed',
+ 'store': 'yes',
+ 'type': 'string',
+ "term_vector" : "with_positions_offsets"
+ },
+ 'content': {
+ 'boost': 1.0,
+ 'index': 'analyzed',
+ 'store': 'yes',
+ 'type': 'string',
+ "term_vector" : "with_positions_offsets"
+ },
+ 'author': {
+ 'boost': 1.0,
+ 'index': 'analyzed',
+ 'store': 'yes',
+ 'type': 'string',
+ },
+ 'db_id': {
+ 'index': 'not_analyzed',
+ 'store': 'yes',
+ 'type': 'string',
+ },
+ 'feed_id': {
+ 'store': 'yes',
+ 'type': 'integer'
+ },
+ 'date': {
+ 'store': 'yes',
+ 'type': 'date',
+ },
+ 'user_ids': {
+ 'index': 'not_analyzed',
+ 'store': 'yes',
+ 'type': 'integer',
+ 'index_name': 'user_id'
+ }
+ }
+ cls.ES.put_mapping("%s-type" % cls.name, {'properties': mapping}, ["%s-index" % cls.name])
+
+ @classmethod
+ def index(cls, user_id, story_id, story_title, story_content, story_author, story_date, db_id):
+ doc = {
+ "content": story_content,
+ "title": story_title,
+ "author": story_author,
+ "date": story_date,
+ "user_ids": user_id,
+ "db_id": db_id,
+ }
+ cls.ES.index(doc, "%s-index" % cls.name, "%s-type" % cls.name, story_id)
+
+ @classmethod
+ def query(cls, user_id, text):
+ cls.ES.refresh()
+ q = pyes.query.StringQuery(text)
+ results = cls.ES.search(q)
+
+ if not results.total:
+ q = pyes.query.FuzzyQuery('title', text)
+ results = cls.ES.search(q)
+
+ if not results.total:
+ q = pyes.query.FuzzyQuery('content', text)
+ results = cls.ES.search(q)
+
+ if not results.total:
+ q = pyes.query.FuzzyQuery('author', text)
+ results = cls.ES.search(q)
+
+ return results
View
@@ -0,0 +1,16 @@
+"""
+This file demonstrates writing tests using the unittest module. These will pass
+when you run "manage.py test".
+
+Replace this with more appropriate tests for your application.
+"""
+
+from django.test import TestCase
+
+
+class SimpleTest(TestCase):
+ def test_basic_addition(self):
+ """
+ Tests that 1 + 1 always equals 2.
+ """
+ self.assertEqual(1 + 1, 2)
View
@@ -0,0 +1 @@
+# Create your views here.
View
@@ -637,6 +637,7 @@ def setup_db_firewall():
sudo('ufw allow from 199.15.248.0/21 to any port 28017') # MongoDB web
sudo('ufw allow from 199.15.248.0/21 to any port 6379 ') # Redis
sudo('ufw allow from 199.15.248.0/21 to any port 11211 ') # Memcached
+ sudo('ufw allow from 199.15.248.0/21 to any port 9200 ') # Elasticsearch
# EC2
sudo('ufw allow proto tcp from 54.242.38.48 to any port 5432,27017,6379,11211')
@@ -744,7 +745,18 @@ def setup_db_mdadm():
sudo("mdadm --examine --scan | sudo tee -a /etc/mdadm/mdadm.conf")
sudo("echo '/dev/md0 /srv/db xfs rw,nobarrier,noatime,nodiratime,noauto 0 0' | sudo tee -a /etc/fstab")
sudo("sudo update-initramfs -u -v -k `uname -r`")
+
+def setup_elasticsearch():
+ ES_VERSION = "0.20.1"
+ sudo('apt-get update')
+ sudo('apt-get install openjdk-7-jre -y')
+ with cd(env.VENDOR_PATH):
+ run('mkdir elasticsearch')
+ with cd(os.path.join(env.VENDOR_PATH, 'elasticsearch-%s' % ES_VERSION)):
+ run('wget http://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-%s.deb' % ES_VERSION)
+ sudo('dpkg -i elasticsearch-%s.deb' % ES_VERSION)
+
# ================
# = Setup - Task =
# ================
@@ -86,6 +86,8 @@ REDIS = {
'host': '127.0.0.1',
}
+ELASTICSEARCH_HOSTS = ['127.0.0.1:9200']
+
BACKED_BY_AWS = {
'pages_on_s3': False,
'icons_on_s3': False,
@@ -1313,6 +1313,7 @@
this.switch_taskbar_view(this.story_view);
this.setup_mousemove_on_views();
this.make_feed_title_in_stories();
+ this.hide_stories_error();
this.model.fetch_starred_stories(1, _.bind(this.post_open_starred_stories, this),
this.show_stories_error, true);
View
@@ -233,6 +233,7 @@
'apps.push',
'apps.social',
'apps.oauth',
+ 'apps.search',
'apps.categories',
'south',
'utils',
@@ -415,6 +416,12 @@ def allow_syncdb(self, db, model):
'host': 'db01',
}
+# =================
+# = Elasticsearch =
+# =================
+
+ELASTICSEARCH_HOSTS = ['db02:9200']
+
# ===============
# = Social APIs =
# ===============

0 comments on commit d5c6475

Please sign in to comment.