Permalink
Browse files

initial commit

  • Loading branch information...
0 parents commit c3b69f36e33beaec900e187ce9b2e29650edd6bc @paltman paltman committed May 23, 2010
@@ -0,0 +1 @@
+Patrick Altman <paltman@gmail.com>
@@ -0,0 +1,28 @@
+Copyright (c) 2010, Patrick Altman <paltman@gmail.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+ * Neither the name of the author nor the names of other
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
No changes.
No changes.
No changes.
No changes.
@@ -0,0 +1,20 @@
+django-pdf
+==========
+
+`django-pdf` is an app that provides the ability to take user uploads of PDF
+documents that are then subsequently stored on S3 and converted to single
+png images for each page in the PDF.
+
+Non Python Requirements
+-----------------------
+
+In order to accomplish this processing, Amazon AWS is used via the `boto`
+library. In addition, to provide a better user experience, celery/rabbitmq are
+used to facilitate background transfer of PDF uploads to S3 instead of holding
+the request/response cycle open during transfer.
+
+`boto` and `celery` will be installed through normal pip processing of the
+included requirements.txt file. However, you will need to install rabbitmq
+in order for uploads to get processed.
+
+In addition, you will need an Amazon AWS account with access to EC2 enabled.
No changes.
No changes.
No changes.
@@ -0,0 +1,6 @@
+from django.contrib import admin
+
+from pdf.models import Document
+
+
+admin.site.register(Document)
@@ -0,0 +1,31 @@
+import os
+
+from django import forms
+from django.utils.translation import ugettext_lazy as _
+
+from pdf.models import Document
+
+
+class DocumentValidationError(forms.ValidationError):
+ def __init__(self):
+ msg = _(u'Only PDF files are valid uploads.')
+ super(DocumentValidationError, self).__init__(msg)
+
+
+class DocumentField(forms.FileField):
+ """A validating PDF document upload field"""
+
+ def clean(self, data, initial=None):
+ f = super(DocumentField, self).clean(data, initial)
+ ext = os.path.splitext(f.name)[1][1:].lower()
+ if ext == 'pdf' and f.content_type == 'application/pdf':
+ return f
+ raise DocumentValidationError()
+
+
+class DocumentForm(forms.ModelForm):
+ local_document = DocumentField()
+
+ class Meta:
+ model = Document
+ fields = ('name', 'local_document')
@@ -0,0 +1,100 @@
+import os
+import uuid
+import simplejson
+
+from datetime import datetime
+
+import boto
+
+from django.conf import settings
+from django.contrib.auth.models import User
+from django.core.urlresolvers import reverse
+from django.db import models
+from django.utils.translation import ugettext_lazy as _
+
+
+DOCUMENT_STATES = (
+ ('U', _('Uploaded')),
+ ('S', _('Stored Remotely')),
+ ('Q', _('Queued')),
+ ('P', _('Processing')),
+ ('F', _('Finished')),
+ ('E', _('Processing Error')))
+
+
+class Document(models.Model):
+ """
+ A simple model which stores data about an uploaded document.
+ """
+ user = models.ForeignKey(User, verbose_name=_('user'))
+ name = models.CharField(_("Title"), max_length=100)
+ uuid = models.CharField(_('Unique Identifier'), max_length=36)
+ local_document = models.FileField(_("Local Document"), null=True, blank=True, upload_to=settings.PDF_UPLOAD_PATH)
+ remote_document = models.URLField(_("Remote Document"), null=True, blank=True)
+ status = models.CharField(_("Remote Processing Status"), default='U', max_length=1, choices=DOCUMENT_STATES)
+ exception = models.TextField(_("Processing Exception"), null=True, blank=True)
+ pages = models.IntegerField(_("Number of Pages in Document"), null=True, blank=True)
+
+ date_uploaded = models.DateTimeField(_("Date Uploaded"))
+ date_stored = models.DateTimeField(_("Date Stored Remotely"), null=True, blank=True)
+ date_queued = models.DateTimeField(_("Date Queued"), null=True, blank=True)
+ date_process_start = models.DateTimeField(_("Date Process Started"), null=True, blank=True)
+ date_process_end = models.DateTimeField(_("Date Process Completed"), null=True, blank=True)
+ date_exception = models.DateTimeField(_("Date of Exception"), null=True, blank=True)
+
+ date_created = models.DateTimeField(_("Date Created"), default=datetime.utcnow)
+
+ class Meta:
+ verbose_name = _('document')
+ verbose_name_plural = _('documents')
+
+ def __unicode__(self):
+ return unicode(_("%s's uploaded document." % self.user))
+
+ def get_detail_url(self):
+ return reverse("pdf_detail", kwargs={'uuid': self.uuid})
+
+ @property
+ def page_images(self):
+ if self.remote_document is None:
+ return []
+ base = self.remote_document.replace(os.path.basename(self.remote_document), '')
+ images = []
+ if self.pages == 1:
+ images = ["%spage.png" % base, ]
+ if self.pages > 1:
+ images = ["%spage-%s.png" % (base, x) for x in range(0, self.pages)]
+ return images
+
+ def save(self, **kwargs):
+ if self.id is None:
+ self.uuid = str(uuid.uuid4())
+ super(Document, self).save(**kwargs)
+
+ @staticmethod
+ def process_response(data):
+ c = boto.connect_s3(settings.PDF_AWS_KEY, settings.PDF_AWS_SECRET)
+ key = c.get_bucket(data['bucket']).get_key(data['key'])
+ if key is not None:
+ response_data = simplejson.loads(key.get_contents_as_string())
+ doc = Document.objects.get(uuid=response_data['uuid'])
+ status = response_data['status']
+ now = response_data.get("now", None)
+ if now is not None:
+ now = datetime.strptime(now, "%Y-%m-%d %H:%M:%S")
+ if status == 'E':
+ doc.status = "E"
+ doc.exception = response_data.get('exception', None)
+ doc.date_exception = now
+ if status == 'F':
+ if doc.status != 'E':
+ doc.status = 'F'
+ doc.date_process_end = now
+ doc.pages = response_data.get("pages", None)
+ if status == 'P':
+ if doc.status not in ('E', 'F'):
+ doc.status = 'P'
+ doc.date_process_start = now
+ doc.save()
+ return True
+ return False
Oops, something went wrong.

0 comments on commit c3b69f3

Please sign in to comment.