Permalink
Browse files

Version 0.3.9-beta changes:

* Module 'utils.py' improved to have two main new functions: memoize and run_under_process
  - 'memoize' is a decorator that stores function results in memory and use it when the
    function is called again with the same arguments
  - 'run_under_process' is a decorator that run a function under a multiprocessing.Process
    instance, that means you use better the power of multi-core servers to generate reports

* To make the things easier to use decorator 'run_under_process', we created a new method
  on geraldo.base.Report class: 'generate_under_process_by', that stores the generation
  output in a temp file, using a process, and loads that file in the file-like object.
  • Loading branch information...
marinho committed Dec 21, 2009
1 parent 3b4d4b7 commit 6e2ae2f57d64b631378b22bd981f840612dbda8c
Showing with 330 additions and 31 deletions.
  1. +2 −0 .gitignore
  2. +11 −0 CHANGES
  3. +1 −1 geraldo/__init__.py
  4. +48 −7 geraldo/base.py
  5. +126 −0 geraldo/tests/20-utilities.txt
  6. +84 −0 geraldo/tests/21-in-multiprocessing.txt
  7. +58 −23 geraldo/utils.py
View
@@ -12,6 +12,8 @@ build
dist
Geraldo.egg-info
*.egg
.DS_Store
*$py.class
site/fonts
site/layout
View
11 CHANGES
@@ -1,3 +1,14 @@
2009-12-20: Version 0.3.9-beta
---------------------------------
* Module 'utils.py' improved to have two main new functions: memoize and run_under_process
- 'memoize' is a decorator that stores function results in memory and use it when the
function is called again with the same arguments
- 'run_under_process' is a decorator that run a function under a multiprocessing.Process
instance, that means you use better the power of multi-core servers to generate reports
* To make the things easier to use decorator 'run_under_process', we created a new method
on geraldo.base.Report class: 'generate_under_process_by', that stores the generation
output in a temp file, using a process, and loads that file in the file-like object.
2009-11-05: Version 0.3.7-stable
---------------------------------
* Fixing problem with memory consuming when generating reports.
View
@@ -31,7 +31,7 @@
- tests - a package with automated doc tests.
"""
VERSION = (0, 3, 8, 'stable')
VERSION = (0, 3, 9, 'beta')
def get_version():
return '%d.%d.%d-%s'%VERSION
View
@@ -5,16 +5,13 @@
from reportlab.lib.colors import black
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT
from utils import calculate_size, get_attr_value
from utils import calculate_size, get_attr_value, landscape, format_date
from exceptions import EmptyQueryset, ObjectNotFound, ManyObjectsFound,\
AttributeNotFound, NotYetImplemented
BAND_WIDTH = 'band-width'
BAND_HEIGHT = 'band-height'
def landscape(page_size):
return page_size[1], page_size[0]
class GeraldoObject(object):
"""Base class inherited by all report classes, including band, subreports,
groups, graphics and widgets.
@@ -30,7 +27,7 @@ def __init__(self, *kwargs):
if 'name' in kwargs:
self.name = kwargs.pop('name')
def destroy(self): # XXX
def destroy(self):
try:
children = self.get_children()
except (NotYetImplemented, AttributeError):
@@ -186,7 +183,7 @@ def get_objects_list(self):
if not self.queryset:
return []
return [object for object in self.queryset]
return list(self.queryset)
def format_date(self, date, expression):
"""Use a date format string method to return formatted datetime.
@@ -195,7 +192,8 @@ def format_date(self, date, expression):
this (until we find a better and agnosthic solution).
Please don't hack this method up. Just override it on your report class."""
return date.strftime(expression)
return format_date(date, expression)
def get_children(self):
ret = []
@@ -310,11 +308,54 @@ def generate_by(self, generator_class, *args, **kwargs):
if not self.print_if_empty and not self.queryset:
raise EmptyQueryset("This report doesn't accept empty queryset")
# TODO: use multiprocessing
# Initialize generator instance
generator = generator_class(self, *args, **kwargs)
return generator.execute()
def generate_under_process_by(self, generator_class, *args, **kwargs):
"""Uses the power of multiprocessing library to run report generation under
a Process and save memory consumming, with better use of multi-core servers.
This just will work well if you are generating in a destination file or
file-like object (i.e. an HttpResponse on Django).
It doesn't returns nothing because Process doesn't."""
import tempfile, random, os
from utils import run_under_process
# Checks 'filename' argument
if 'filename' in kwargs and not isinstance(kwargs['filename'], basestring):
# Stores file-like object
filelike = kwargs.pop('filename')
# Make a randomic temporary filename
chars = map(chr, range(ord('a'), ord('z')) + range(ord('0'), ord('9')))
filename = ''.join([random.choice(chars) for c in range(40)])
kwargs['filename'] = os.path.join(tempfile.gettempdir(), filename)
else:
filelike = None
@run_under_process
def generate_report(report, generator_class, *args, **kwargs):
# Generate report into response object
report.generate_by(generator_class, *args, **kwargs)
# Run report generation
generate_report(self, generator_class, *args, **kwargs)
# Loads temp file
if filelike:
# Reads the temp file
fp = file(kwargs['filename'])
cont = fp.read()
fp.close()
# Writes temp file content in file-like object
filelike.write(cont)
def get_page_rect(self):
"""Calculates a dictionary with page dimensions inside the margins
and returns. It is used to make page borders."""
@@ -0,0 +1,126 @@
UTILITIES
=========
The tests on this document are about new changes we have done to optimize
Geraldo's performance, scalability or just refactoring.
Memoize
-------
Memoize is like a decorator that stores in memory the returned value for
a set of arguments. The next time that function is valled with the same
arguments, that stored value will be returned without run function code.
In future we wanna use memcached to store these values.
>>> from geraldo.utils import memoize
>>> @memoize
... def capitalize(word):
... print 'running...'
... return word.capitalize()
>>> capitalize('tarsila')
running...
'Tarsila'
Pay attention to 'running...' not appearing
>>> capitalize('tarsila')
'Tarsila'
MultiProcessing
---------------
A decorator to encapsulate functions that will run under a separated process.
The use of multiprocessing library (available on Python 2.6, but installable
on 2.4 or higher) is the best way to work with multiple process on Python.
It is faster and more efficient than threading and will save memory
consumming, specially on reports generation.
>>> import os
>>> from geraldo.utils import run_under_process
>>> from tempfile import gettempdir
>>> @run_under_process
... def create_file(filename, content):
... fp = file(filename, 'w')
... fp.write(content)
... fp.close()
>>> filename = os.path.join(gettempdir(), 'testing-process.txt')
>>> create_file(filename, 'Test')
>>> fp = file(filename)
>>> fp.read()
'Test'
>>> fp.close()
Getting Attribute Values
------------------------
To retrieve attribute value from an object, we have the function 'get_attr_value'
that is able to get children attributes and run simples method (with no
arguments) also.
>>> from geraldo.utils import get_attr_value
An attribute
>>> class Word(object): the_word = 'test'
>>> word = Word()
>>> get_attr_value(word, 'the_word')
'test'
A method of attribute
>>> get_attr_value(word, 'the_word.upper')
'TEST'
A method...
>>> word = 'Test'
>>> get_attr_value(word, 'upper')
'TEST'
Default date/time formatting function
-------------------------------------
A function that formats date/time values, using memoize optimization.
>>> import datetime
>>> from geraldo.utils import format_date
>>> some_day = datetime.date(2008,10,1)
>>> some_time = datetime.datetime(2008,10,1,10,30,1)
>>> format_date(some_day, '%d/%m/%Y')
'01/10/2008'
>>> format_date(some_time, '%d/%m/%Y %H:%M:%S')
'01/10/2008 10:30:01'
Landscape function
------------------
Just a simple and friendly way to switch a page size height/width tuple.
>>> from geraldo.utils import landscape, A4
>>> landscape(A4) == (A4[1], A4[0])
True
Calculating sizes
-----------------
A function that calcs a dimension, that can be an expression in a string
or just a simple value.
>>> from geraldo.utils import calculate_size, cm
>>> calculate_size(10*cm) == calculate_size('10*cm')
True
@@ -0,0 +1,84 @@
GENERATING IN MULTIPROCESSING
=============================
This is exactly the same report we made for "without-Django" test. But this is going
to be ran under an independent Process, from multiprocessing library.
Is important to keep aware on:
- multiprocessing works on Python 2.3 or higher - and is a builtin package on 2.6
- you must use 'generate_under_process_by' instead of 'generate_by'
- you can do it without 'generate_under_process_by', using geraldo.utils.run_under_process
decorator, if you prefer do it manually
- the method ''generate_under_process_by' is not the best solution ever. You must keep
aware on what kind of report generation you are doing, and how server is configure, but
most of cases will work well
- the decorator 'run_under_process' will work only if geraldo.utils.DISABLE_MULTIPROCESSING
is False
>>> import os
>>> cur_dir = os.path.dirname(os.path.abspath(__file__))
>>> from geraldo.utils import A4, cm, TA_CENTER, TA_RIGHT
>>> from geraldo import Report, ReportBand, Label, ObjectValue, SystemField,\
... FIELD_ACTION_COUNT, BAND_WIDTH
Report class
>>> class SimpleListReport(Report):
... title = 'Demonstration without Django'
...
... class band_page_header(ReportBand):
... height = 1.3*cm
... elements = [
... SystemField(expression='%(report_title)s', top=0.1*cm, left=0, width=BAND_WIDTH,
... style={'fontName': 'Helvetica-Bold', 'fontSize': 14, 'alignment': TA_CENTER}),
... Label(text="ID", top=0.8*cm, left=0),
... Label(text="Name", top=0.8*cm, left=3*cm),
... ]
... borders = {'bottom': True}
...
... class band_page_footer(ReportBand):
... height = 0.5*cm
... elements = [
... Label(text='Created with Geraldo Reports', top=0.1*cm, left=0),
... SystemField(expression='Page # %(page_number)d of %(page_count)d', top=0.1*cm,
... width=BAND_WIDTH, style={'alignment': TA_RIGHT}),
... ]
... borders = {'top': True}
...
... class band_detail(ReportBand):
... height = 0.5*cm
... elements = [
... ObjectValue(attribute_name='id', top=0, left=0),
... ObjectValue(attribute_name='name', top=0, left=3*cm),
... ]
>>> class MyObject(object):
... def __init__(self, **kwargs):
... for k,v in kwargs.items():
... setattr(self, k, v)
>>> objects_list = [
... MyObject(id=1, name='Rio de Janeiro'),
... MyObject(id=2, name='New York'),
... MyObject(id=3, name='Paris'),
... MyObject(id=4, name='London'),
... MyObject(id=5, name='Tokyo'),
... MyObject(id=6, name='Moscow'),
... MyObject(id=7, name='Beijing'),
... MyObject(id=8, name='Hamburg'),
... MyObject(id=9, name='New Delhi'),
... MyObject(id=10, name='Jakarta'),
... ]
>>> report = SimpleListReport(queryset=objects_list)
PDF generation
>>> from geraldo.generators import PDFGenerator
>>> report.generate_under_process_by(PDFGenerator, filename=os.path.join(cur_dir, 'output/generated-in-multiprocessing.pdf'))
Oops, something went wrong.

0 comments on commit 6e2ae2f

Please sign in to comment.