Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 894 lines (753 sloc) 37.66 kB
99a5c0c @slinkp Adding copyrights and GPL v3 everywhere
slinkp authored
1 # Copyright 2007,2008,2009,2011 Everyblock LLC, OpenPlans, and contributors
2 #
3 # This file is part of ebpub
4 #
5 # ebpub is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # ebpub is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with ebpub. If not, see <http://www.gnu.org/licenses/>.
17 #
18
5c9826f initial import
Don Kukral authored
19 from django.contrib.gis.db import models
20 from django.contrib.gis.db.models import Count
ff0a9b6 @slinkp Change newsitem_detail URL: remove useless date arguments.
slinkp authored
21 from django.core import urlresolvers
5c9826f initial import
Don Kukral authored
22 from django.db import connection, transaction
23 from ebpub.streets.models import Block
24 from ebpub.utils.text import slugify
de7c1fe @slinkp Yet more de-hardcoded URLs.
slinkp authored
25
5c9826f initial import
Don Kukral authored
26 import datetime
27
0eb2538 @slinkp Monkeypatches that need settings can't be imported before settings ar…
slinkp authored
28 # Need these monkeypatches for "natural key" support during fixture load/dump.
29 import ebpub.monkeypatches
30 ebpub.monkeypatches.patch_once()
3b0dc05 @slinkp Update stuff using Query.extra_where to use QuerySet.extra(where...).…
slinkp authored
31
9e0becb @slinkp Deleted SchemaInfo, moved all that metadata into Schema. Observed per…
slinkp authored
32 FREQUENCY_CHOICES = ('Hourly', 'Throughout the day', 'Daily', 'Twice a week', 'Weekly', 'Twice a month', 'Monthly', 'Quarterly', 'Sporadically', 'No longer updated')
33 FREQUENCY_CHOICES = [(a, a) for a in FREQUENCY_CHOICES]
34
0eb2538 @slinkp Monkeypatches that need settings can't be imported before settings ar…
slinkp authored
35
36
5c9826f initial import
Don Kukral authored
37 def field_mapping(schema_id_list):
38 """
39 Given a list of schema IDs, returns a dictionary of dictionaries, mapping
40 schema_ids to dictionaries mapping the fields' name->real_name.
41 Example return value:
42 {1: {u'crime_type': 'varchar01', u'crime_date', 'date01'},
43 2: {u'permit_number': 'varchar01', 'to_date': 'date01'},
44 }
45 """
46 # schema_fields = [{'schema_id': 1, 'name': u'crime_type', 'real_name': u'varchar01'},
47 # {'schema_id': 1, 'name': u'crime_date', 'real_name': u'date01'}]
48 result = {}
49 for sf in SchemaField.objects.filter(schema__id__in=(schema_id_list)).values('schema', 'name', 'real_name'):
50 result.setdefault(sf['schema'], {})[sf['name']] = sf['real_name']
51 return result
52
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
53
5c9826f initial import
Don Kukral authored
54 class SchemaManager(models.Manager):
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
55
56 def get_by_natural_key(self, slug):
57 return self.get(slug=slug)
58
9e0becb @slinkp Deleted SchemaInfo, moved all that metadata into Schema. Observed per…
slinkp authored
59 def get_query_set(self):
60 return super(SchemaManager, self).get_query_set().defer(
61 'short_description',
62 'summary',
63 'source',
64 'grab_bag_headline',
65 'grab_bag',
66 'short_source',
67 'update_frequency',
68 'intro',
69 )
70
71
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
72 class SchemaPublicManager(SchemaManager):
73
5c9826f initial import
Don Kukral authored
74 def get_query_set(self):
75 return super(SchemaManager, self).get_query_set().filter(is_public=True)
76
77 class Schema(models.Model):
188c1d2 @slinkp Much commenting on how NewsItems work
slinkp authored
78 """
79 Describes a type of NewsItem. A NewsItem has exactly one Schema,
80 which describes its attributes, via associated SchemaFields.
57a93d2 @slinkp another comment
slinkp authored
81
82 nb. to get all NewsItem instances for a Schema, you can do the usual as per
83 http://docs.djangoproject.com/en/dev/topics/db/queries/#backwards-related-objects:
84 schema.newsitem_set.all()
188c1d2 @slinkp Much commenting on how NewsItems work
slinkp authored
85 """
5c9826f initial import
Don Kukral authored
86 name = models.CharField(max_length=32)
87 plural_name = models.CharField(max_length=32)
88 indefinite_article = models.CharField(max_length=2) # 'a' or 'an'
89 slug = models.CharField(max_length=32, unique=True)
90 min_date = models.DateField() # the earliest available NewsItem.pub_date for this Schema
91 last_updated = models.DateField()
dcf27b3 make schema creation slightly less detailed by adding some defaults.
Luke Tucker authored
92 date_name = models.CharField(max_length=32, default='Date') # human-readable name for the NewsItem.item_date field
93 date_name_plural = models.CharField(max_length=32, default='Dates')
94 importance = models.SmallIntegerField(default=0) # bigger number is more important
95 is_public = models.BooleanField(db_index=True, default=False)
96 is_special_report = models.BooleanField(default=False)
5c9826f initial import
Don Kukral authored
97
98 # whether RSS feed should collapse many of these into one
dcf27b3 make schema creation slightly less detailed by adding some defaults.
Luke Tucker authored
99 can_collapse = models.BooleanField(default=False)
5c9826f initial import
Don Kukral authored
100
101 # whether a newsitem_detail page exists for NewsItems of this Schema
dcf27b3 make schema creation slightly less detailed by adding some defaults.
Luke Tucker authored
102 has_newsitem_detail = models.BooleanField(default=False)
5c9826f initial import
Don Kukral authored
103
104 # whether aggregate charts are allowed for this Schema
dcf27b3 make schema creation slightly less detailed by adding some defaults.
Luke Tucker authored
105 allow_charting = models.BooleanField(default=False)
5c9826f initial import
Don Kukral authored
106
c0c0edb @slinkp formatting
slinkp authored
107 # whether attributes should be preloaded for NewsItems of this
108 # Schema, in the list view
dcf27b3 make schema creation slightly less detailed by adding some defaults.
Luke Tucker authored
109 uses_attributes_in_list = models.BooleanField(default=False)
5c9826f initial import
Don Kukral authored
110
111 # number of records to show on place_overview
dcf27b3 make schema creation slightly less detailed by adding some defaults.
Luke Tucker authored
112 number_in_overview = models.SmallIntegerField(default=5)
5c9826f initial import
Don Kukral authored
113
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
114 objects = SchemaManager()
115 public_objects = SchemaPublicManager()
5c9826f initial import
Don Kukral authored
116
117 def __unicode__(self):
118 return self.name
119
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
120 def natural_key(self):
121 return (self.slug,)
122
5c9826f initial import
Don Kukral authored
123 def url(self):
124 return '/%s/' % self.slug
125
126 def icon_slug(self):
127 if self.is_special_report:
128 return 'special-report'
129 return self.slug
130
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
131
9e0becb @slinkp Deleted SchemaInfo, moved all that metadata into Schema. Observed per…
slinkp authored
132 # Metadata fields moved from SchemaInfo
a03aaad More defaults
Luke Tucker authored
133 short_description = models.TextField(blank=True, default='')
134 summary = models.TextField(blank=True, default='')
135 source = models.TextField(blank=True, default='')
136 grab_bag_headline = models.CharField(max_length=128, blank=True, default='')
137 grab_bag = models.TextField(blank=True, default='') # TODO: what does this field mean?
138 short_source = models.CharField(max_length=128, blank=True, default='')
9e0becb @slinkp Deleted SchemaInfo, moved all that metadata into Schema. Observed per…
slinkp authored
139 update_frequency = models.CharField(max_length=64, blank=True, default='',
140 choices=FREQUENCY_CHOICES)
a03aaad More defaults
Luke Tucker authored
141 intro = models.TextField(blank=True, default='')
5c9826f initial import
Don Kukral authored
142
a11a8b0 @slinkp Lots more columns, filters, and usable default ordering in admin UI; …
slinkp authored
143 class Meta:
144 ordering = ('name',)
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
145
146 class SchemaFieldManager(models.Manager):
147
148 def get_by_natural_key(self, schema_slug, real_name):
149 return self.get(schema__slug=schema_slug, real_name=real_name)
150
5c9826f initial import
Don Kukral authored
151 class SchemaField(models.Model):
188c1d2 @slinkp Much commenting on how NewsItems work
slinkp authored
152 """
153 Describes the meaning of one Attribute field for one Schema type.
154 """
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
155 objects = SchemaFieldManager()
156
5c9826f initial import
Don Kukral authored
157 schema = models.ForeignKey(Schema)
158 name = models.CharField(max_length=32)
188c1d2 @slinkp Much commenting on how NewsItems work
slinkp authored
159 real_name = models.CharField(max_length=10) # Column name in the Attribute model. 'varchar01', 'varchar02', etc.
5c9826f initial import
Don Kukral authored
160 pretty_name = models.CharField(max_length=32) # human-readable name, for presentation
161 pretty_name_plural = models.CharField(max_length=32) # plural human-readable name
a03aaad More defaults
Luke Tucker authored
162 display = models.BooleanField(default=True) # whether to display value on the public site
163 is_lookup = models.BooleanField(default=False) # whether the value is a foreign key to Lookup
164 is_filter = models.BooleanField(default=False)
76ffdfd @slinkp more notes on what is_charted does
slinkp authored
165 is_charted = models.BooleanField(default=False) # whether schema_detail displays a chart for this field; also see "trends" tabs on place_overview.html
a03aaad More defaults
Luke Tucker authored
166 display_order = models.SmallIntegerField(default=10)
167 is_searchable = models.BooleanField(default=False) # whether the value is searchable by content
5c9826f initial import
Don Kukral authored
168
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
169 def natural_key(self):
170 return (self.schema.slug, self.real_name)
171
90b5568 @slinkp Enforce documented uniqueness for SchemaField schema + real_name. An…
slinkp authored
172 class Meta(object):
173 unique_together = (('schema', 'real_name'),)
a11a8b0 @slinkp Lots more columns, filters, and usable default ordering in admin UI; …
slinkp authored
174 ordering = ('pretty_name',)
90b5568 @slinkp Enforce documented uniqueness for SchemaField schema + real_name. An…
slinkp authored
175
5c9826f initial import
Don Kukral authored
176 def __unicode__(self):
177 return u'%s - %s' % (self.schema, self.name)
178
764e1b9 @slinkp Use modern @property syntax; simplify SchemaField.is_type()
slinkp authored
179 @property
180 def slug(self):
5c9826f initial import
Don Kukral authored
181 return self.name.replace('_', '-')
182
764e1b9 @slinkp Use modern @property syntax; simplify SchemaField.is_type()
slinkp authored
183 @property
184 def datatype(self):
5c9826f initial import
Don Kukral authored
185 return self.real_name[:-2]
186
187 def is_type(self, *data_types):
188 """
189 Returns True if this SchemaField is of *any* of the given data types.
190
191 Allowed values are 'varchar', 'date', 'time', 'datetime', 'bool', 'int'.
192 """
764e1b9 @slinkp Use modern @property syntax; simplify SchemaField.is_type()
slinkp authored
193 return self.datatype in data_types
5c9826f initial import
Don Kukral authored
194
195 def is_many_to_many_lookup(self):
196 """
197 Returns True if this SchemaField is a many-to-many lookup.
198 """
199 return self.is_lookup and not self.is_type('int')
200
201 def all_lookups(self):
202 if not self.is_lookup:
203 raise ValueError('SchemaField.all_lookups() can only be called if is_lookup is True')
204 return Lookup.objects.filter(schema_field__id=self.id).order_by('name')
205
206 def browse_by_title(self):
207 "Returns FOO in 'Browse by FOO', for this SchemaField."
208 if self.is_type('bool'):
209 return u'whether they %s' % self.pretty_name_plural
210 return self.pretty_name
211
212 def smart_pretty_name(self):
213 """
214 Returns the pretty name for this SchemaField, taking into account
215 many-to-many fields.
216 """
217 if self.is_many_to_many_lookup():
218 return self.pretty_name_plural
219 return self.pretty_name
220
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
221
222 class LocationTypeManager(models.Manager):
223 def get_by_natural_key(self, slug):
224 return self.get(slug=slug)
225
5c9826f initial import
Don Kukral authored
226 class LocationType(models.Model):
227 name = models.CharField(max_length=255) # e.g., "Ward" or "Congressional District"
228 plural_name = models.CharField(max_length=64) # e.g., "Wards"
229 scope = models.CharField(max_length=64) # e.g., "Chicago" or "U.S.A."
230 slug = models.CharField(max_length=32, unique=True)
da69e41 @slinkp Add location types list API
slinkp authored
231 is_browsable = models.BooleanField() # whether this is displayed on location_type_list. XXX unused??
90b5568 @slinkp Enforce documented uniqueness for SchemaField schema + real_name. An…
slinkp authored
232 is_significant = models.BooleanField() # whether this is used to display aggregates, shows up in 'nearby locations', etc.
5c9826f initial import
Don Kukral authored
233
234 def __unicode__(self):
235 return u'%s, %s' % (self.name, self.scope)
236
237 def url(self):
238 return '/locations/%s/' % self.slug
239
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
240 def natural_key(self):
241 return (self.slug,)
242
a11a8b0 @slinkp Lots more columns, filters, and usable default ordering in admin UI; …
slinkp authored
243 class Meta:
244 ordering = ('name',)
245
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
246 objects = LocationTypeManager()
247
248
249 class LocationManager(models.GeoManager):
250 def get_by_natural_key(self, slug, location_type_slug):
251 return self.get(slug=slug, location_type__slug=slug)
252
5c9826f initial import
Don Kukral authored
253 class Location(models.Model):
254 name = models.CharField(max_length=255) # e.g., "35th Ward"
255 normalized_name = models.CharField(max_length=255, db_index=True)
256 slug = models.CharField(max_length=32, db_index=True)
257 location_type = models.ForeignKey(LocationType)
258 location = models.GeometryField(null=True)
60f0a6f @slinkp Tons of workarounds for editing locations in the admin UI. Closes #48
slinkp authored
259 centroid = models.PointField(blank=True, null=True)
5c9826f initial import
Don Kukral authored
260 display_order = models.SmallIntegerField()
261 city = models.CharField(max_length=255)
262 source = models.CharField(max_length=64)
263 area = models.FloatField(blank=True, null=True) # in square meters
264 population = models.IntegerField(blank=True, null=True) # from the 2000 Census
265 user_id = models.IntegerField(blank=True, null=True)
266 is_public = models.BooleanField()
267 description = models.TextField(blank=True)
268 creation_date = models.DateTimeField(blank=True, null=True)
269 last_mod_date = models.DateTimeField(blank=True, null=True)
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
270 objects = LocationManager()
5c9826f initial import
Don Kukral authored
271
272 class Meta:
273 unique_together = (('slug', 'location_type'),)
a11a8b0 @slinkp Lots more columns, filters, and usable default ordering in admin UI; …
slinkp authored
274 ordering = ('slug',)
5c9826f initial import
Don Kukral authored
275
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
276 def natural_key(self):
277 return (self.slug, self.location_type.slug)
278
5c9826f initial import
Don Kukral authored
279 def __unicode__(self):
280 return self.name
281
282 def url(self):
283 return '/locations/%s/%s/' % (self.location_type.slug, self.slug)
284
285 def rss_url(self):
286 return '/rss%s' % self.url()
287
288 def alert_url(self):
289 return '%salerts/' % self.url()
290
291 def edit_url(self):
292 return '/locations/%s/edit/%s/' % (self.location_type.slug, self.slug)
293
294 # Give Location objects a "pretty_name" attribute for interoperability with
295 # Block objects. (Parts of our app accept either a Block or Location.)
764e1b9 @slinkp Use modern @property syntax; simplify SchemaField.is_type()
slinkp authored
296 @property
297 def pretty_name(self):
5c9826f initial import
Don Kukral authored
298 return self.name
299
764e1b9 @slinkp Use modern @property syntax; simplify SchemaField.is_type()
slinkp authored
300 @property
301 def is_custom(self):
5c9826f initial import
Don Kukral authored
302 return self.location_type.slug == 'custom'
764e1b9 @slinkp Use modern @property syntax; simplify SchemaField.is_type()
slinkp authored
303
5c9826f initial import
Don Kukral authored
304
305 class AttributesDescriptor(object):
306 """
307 This class provides the functionality that makes the attributes available
308 as `attributes` on a model instance.
309 """
310 def __get__(self, instance, instance_type=None):
311 if instance is None:
312 raise AttributeError("%s must be accessed via instance" % self.__class__.__name__)
313 if not hasattr(instance, '_attributes_cache'):
a726543 @ltucker be nicer when there are no schemafields
ltucker authored
314 select_dict = field_mapping([instance.schema_id]).get(instance.schema_id, {})
5c9826f initial import
Don Kukral authored
315 instance._attributes_cache = AttributeDict(instance.id, instance.schema_id, select_dict)
316 return instance._attributes_cache
317
318 def __set__(self, instance, value):
319 if instance is None:
320 raise AttributeError("%s must be accessed via instance" % self.__class__.__name__)
321 if not isinstance(value, dict):
322 raise ValueError('Only a dictionary is allowed')
323 mapping = field_mapping([instance.schema_id])[instance.schema_id].items()
324 values = [value.get(k, None) for k, v in mapping]
325 cursor = connection.cursor()
326 cursor.execute("""
327 UPDATE %s
328 SET %s
329 WHERE news_item_id = %%s
330 """ % (Attribute._meta.db_table, ','.join(['%s=%%s' % v for k, v in mapping])),
331 values + [instance.id])
332 # If no records were updated, that means the DB doesn't yet have a
333 # row in the attributes table for this news item. Do an INSERT.
334 if cursor.rowcount < 1:
335 cursor.execute("""
336 INSERT INTO %s (news_item_id, schema_id, %s)
337 VALUES (%%s, %%s, %s)""" % (Attribute._meta.db_table, ','.join([v for k, v in mapping]), ','.join(['%s' for k in mapping])),
338 [instance.id, instance.schema_id] + values)
339 transaction.commit_unless_managed()
340
341 class AttributeDict(dict):
342 """
343 A dictionary-like object that serves as a wrapper around attributes for a
344 given NewsItem.
345 """
346 def __init__(self, news_item_id, schema_id, mapping):
347 dict.__init__(self)
348 self.news_item_id = news_item_id
349 self.schema_id = schema_id
350 self.mapping = mapping # name -> real_name dictionary
351 self.cached = False
352
353 def __do_query(self):
354 if not self.cached:
fb48fba @slinkp seeclickfix scraper now uses a new schema.
slinkp authored
355 attr_values = Attribute.objects.filter(news_item__id=self.news_item_id).extra(select=self.mapping).values(*self.mapping.keys())
356 # Rarely, we might have added the first SchemaField for this
357 # Schema *after* the NewsItem was scraped. In that case
358 # attr_values will be empty list.
359 if attr_values:
360 self.update(attr_values[0])
5c9826f initial import
Don Kukral authored
361 self.cached = True
362
338de4b @ltucker output extension schema fields in items_json
ltucker authored
363 def keys(self, *args, **kwargs):
364 self.__do_query()
365 return dict.keys(self, *args, **kwargs)
366
367 def items(self, *args, **kwargs):
368 self.__do_query()
369 return dict.items(self, *args, **kwargs)
370
5c9826f initial import
Don Kukral authored
371 def get(self, *args, **kwargs):
372 self.__do_query()
373 return dict.get(self, *args, **kwargs)
374
375 def __getitem__(self, name):
376 self.__do_query()
377 return dict.__getitem__(self, name)
378
379 def __setitem__(self, name, value):
188c1d2 @slinkp Much commenting on how NewsItems work
slinkp authored
380 # TODO: refactor, code overlaps largely with AttributeDescriptor.__set__
5c9826f initial import
Don Kukral authored
381 cursor = connection.cursor()
382 real_name = self.mapping[name]
383 cursor.execute("""
384 UPDATE %s
385 SET %s = %%s
386 WHERE news_item_id = %%s
387 """ % (Attribute._meta.db_table, real_name), [value, self.news_item_id])
388 # If no records were updated, that means the DB doesn't yet have a
389 # row in the attributes table for this news item. Do an INSERT.
390 if cursor.rowcount < 1:
391 cursor.execute("""
392 INSERT INTO %s (news_item_id, schema_id, %s)
393 VALUES (%%s, %%s, %%s)""" % (Attribute._meta.db_table, real_name),
394 [self.news_item_id, self.schema_id, value])
395 transaction.commit_unless_managed()
396 dict.__setitem__(self, name, value)
397
398 class NewsItemQuerySet(models.query.GeoQuerySet):
b3fcffa @slinkp Fix #146: breakage was introduced in changeset a11a8b00 when we added…
slinkp authored
399
5c9826f initial import
Don Kukral authored
400 def prepare_attribute_qs(self):
401 clone = self._clone()
402 if 'db_attribute' not in clone.query.extra_tables:
3b0dc05 @slinkp Update stuff using Query.extra_where to use QuerySet.extra(where...).…
slinkp authored
403 clone = clone.extra(tables=('db_attribute',))
404 # extra_where went away in Django 1.1.
405 # This seems to be the correct replacement as per
406 # http://docs.djangoproject.com/en/dev/ref/models/querysets/
407 clone = clone.extra(where=('db_newsitem.id = db_attribute.news_item_id',))
5c9826f initial import
Don Kukral authored
408 return clone
409
410 def by_attribute(self, schema_field, att_value, is_lookup=False):
411 """
412 Returns a QuerySet of NewsItems whose attribute value for the given
413 SchemaField is att_value. If att_value is a list, this will do the
414 equivalent of an "OR" search, returning all NewsItems that have an
415 attribute value in the att_value list.
416
417 This handles many-to-many lookups correctly behind the scenes.
418
419 If is_lookup is True, then att_value is treated as the 'code' of a
420 Lookup object, and the Lookup's ID will be retrieved for use in the
421 query.
422 """
a8bc11d @slinkp More hacking on import scripts: factoring into separate scripts, addi…
slinkp authored
423
5c9826f initial import
Don Kukral authored
424 clone = self.prepare_attribute_qs()
425 real_name = str(schema_field.real_name)
426 if not isinstance(att_value, (list, tuple)):
427 att_value = [att_value]
428 if is_lookup:
429 att_value = Lookup.objects.filter(schema_field__id=schema_field.id, code__in=att_value)
430 if not att_value:
431 # If the lookup values don't exist, then there aren't any
432 # NewsItems with this attribute value. Note that we aren't
433 # using QuerySet.none() here, because we want the result to
434 # be a NewsItemQuerySet, and none() returns a normal QuerySet.
3b0dc05 @slinkp Update stuff using Query.extra_where to use QuerySet.extra(where...).…
slinkp authored
435 clone = clone.extra(where=('1=0',))
5c9826f initial import
Don Kukral authored
436 return clone
437 att_value = [val.id for val in att_value]
438 if schema_field.is_many_to_many_lookup():
439 # We have to use a regular expression search to look for all rows
440 # with the given att_value *somewhere* in the column. The [[:<:]]
441 # thing is a word boundary.
442 for value in att_value:
443 if not str(value).isdigit():
444 raise ValueError('Only integer strings allowed for att_value in many-to-many SchemaFields')
3b0dc05 @slinkp Update stuff using Query.extra_where to use QuerySet.extra(where...).…
slinkp authored
445 clone = clone.extra(where=("db_attribute.%s ~ '[[:<:]]%s[[:>:]]'" % (real_name, '|'.join([str(val) for val in att_value])),))
5c9826f initial import
Don Kukral authored
446 elif None in att_value:
447 if att_value != [None]:
448 raise ValueError('by_attribute() att_value list cannot have more than one element if it includes None')
3b0dc05 @slinkp Update stuff using Query.extra_where to use QuerySet.extra(where...).…
slinkp authored
449 clone = clone.extra(where=("db_attribute.%s IS NULL" % real_name,))
5c9826f initial import
Don Kukral authored
450 else:
3b0dc05 @slinkp Update stuff using Query.extra_where to use QuerySet.extra(where...).…
slinkp authored
451 clone = clone.extra(where=("db_attribute.%s IN (%s)" % (real_name, ','.join(['%s' for val in att_value])),),
452 params=tuple(att_value))
5c9826f initial import
Don Kukral authored
453 return clone
454
455 def date_counts(self):
456 """
457 Returns a dictionary mapping {item_date: count}.
458 """
459 # TODO: values + annotate doesn't seem to play nice with GeoQuerySet
460 # at the moment. This is the changeset where it broke:
461 # http://code.djangoproject.com/changeset/10326
462 from django.db.models.query import QuerySet
463 qs = QuerySet.values(self, 'item_date').annotate(count=models.Count('id'))
464 return dict([(v['item_date'], v['count']) for v in qs])
465
466 def top_lookups(self, schema_field, count):
467 """
468 Returns a list of {lookup, count} dictionaries representing the top
469 Lookups for this QuerySet.
470 """
471 real_name = "db_attribute." + str(schema_field.real_name)
472 if schema_field.is_many_to_many_lookup():
b3fcffa @slinkp Fix #146: breakage was introduced in changeset a11a8b00 when we added…
slinkp authored
473 # First prepare a subquery to get a *single* count of
474 # attribute rows that match each relevant m2m lookup
475 # value. It's very important to get a single row here or
476 # else we get a DataBaseError with "more than one row
477 # returned by a subquery used as an expression". (Bug #146)
478 clone = self.prepare_attribute_qs()
479 clone = clone.filter(schema__id=schema_field.schema_id)
480 # This is a regex search for the lookup id.
5c9826f initial import
Don Kukral authored
481 clone = clone.extra(where=[real_name + " ~ ('[[:<:]]' || db_lookup.id || '[[:>:]]')"])
482 # We want to count the current queryset and get a single
483 # row for injecting into the subsequent Lookup query, but
484 # we don't want Django's aggregation support to
485 # automatically group by fields that aren't relevant and
486 # would cause multiple rows as a result. So we call
487 # `values()' on a field that we're already filtering by,
488 # in this case, schema, as essentially a harmless identify
489 # function.
b3fcffa @slinkp Fix #146: breakage was introduced in changeset a11a8b00 when we added…
slinkp authored
490 # See http://docs.djangoproject.com/en/dev/topics/db/aggregation/#values
491 clone = clone.values('schema')
492
493 # Fix #146: Having any `ORDER BY foo` in this subquery causes
494 # Django to also add a `GROUP BY foo`, which potentially
495 # returns multiple rows. So, remove the ordering.
496 clone = clone.order_by()
497 clone = clone.annotate(count=Count('schema'))
498 # Unusual: We don't run the clone query, we just stuff its SQL
499 # into our Lookup qs.
5c9826f initial import
Don Kukral authored
500 qs = Lookup.objects.filter(schema_field__id=schema_field.id)
501 qs = qs.extra(select={'lookup_id': 'id', 'item_count': clone.values('count').query})
502 else:
b3fcffa @slinkp Fix #146: breakage was introduced in changeset a11a8b00 when we added…
slinkp authored
503 # Counts of attribute rows matching each relevant Lookup.
504 # Much easier when is_many_to_many_lookup == False :-)
5c9826f initial import
Don Kukral authored
505 qs = self.prepare_attribute_qs().extra(select={'lookup_id': real_name})
506 qs.query.group_by = [real_name]
507 qs = qs.values('lookup_id').annotate(item_count=Count('id'))
b3fcffa @slinkp Fix #146: breakage was introduced in changeset a11a8b00 when we added…
slinkp authored
508
509 qs = qs.values('lookup_id', 'item_count').order_by('-item_count')
510 ids_and_counts = [(v['lookup_id'], v['item_count']) for v in qs
511 if v['item_count']]
512 ids_and_counts = ids_and_counts[:count]
5c9826f initial import
Don Kukral authored
513 lookup_objs = Lookup.objects.in_bulk([i[0] for i in ids_and_counts])
b3fcffa @slinkp Fix #146: breakage was introduced in changeset a11a8b00 when we added…
slinkp authored
514 return [{'lookup': lookup_objs[i[0]], 'count': i[1]} for i in ids_and_counts
515 if not None in i]
5c9826f initial import
Don Kukral authored
516
517 def text_search(self, schema_field, query):
518 """
519 Returns a QuerySet of NewsItems whose attribute for
520 a given schema field matches a text search query.
521 """
522 clone = self.prepare_attribute_qs()
523 query = query.lower()
3b0dc05 @slinkp Update stuff using Query.extra_where to use QuerySet.extra(where...).…
slinkp authored
524
525 clone = clone.extra(where=("db_attribute." + str(schema_field.real_name) + " ILIKE %s",),
526 params=("%%%s%%" % query,))
5c9826f initial import
Don Kukral authored
527 return clone
528
529 class NewsItemManager(models.GeoManager):
530 def get_query_set(self):
531 return NewsItemQuerySet(self.model)
532
533 def by_attribute(self, *args, **kwargs):
534 return self.get_query_set().by_attribute(*args, **kwargs)
535
536 def text_search(self, *args, **kwargs):
537 return self.get_query_set().text_search(*args, **kwargs)
538
539 def date_counts(self, *args, **kwargs):
540 return self.get_query_set().date_counts(*args, **kwargs)
541
542 def top_lookups(self, *args, **kwargs):
543 return self.get_query_set().top_lookups(*args, **kwargs)
544
545 class NewsItem(models.Model):
188c1d2 @slinkp Much commenting on how NewsItems work
slinkp authored
546 """
547 Lowest common denominator metadata for News-like things.
548
549 self.schema and self.attributes are used for extended metadata;
550 If all you want is to examine the attributes, self.attributes
551 can be treated like a dict.
552 (Internally it's a bit complicated. See the Schema, SchemaField, and
553 Attribute models, plus AttributeDescriptor, for how it all works.)
554
555 NewsItems have several distinct notions of location:
556
557 * The NewsItemLocation model is for fast lookups of NewsItems to
558 all Locations where the .location fields overlap. This is set
559 by a sql trigger whenever self.location changes; not set by any
560 python code. Used in various views for filtering.
561
562 * self.location is typically a point, and is used in views for
563 filtering newsitems. Theoretically (untested!!) could also be a
564 GeometryCollection, for news items that mention multiple
565 places. This is typically set during scraping, by geocoding if
566 not provided in the source data.
567
568 * self.location_object is a Location and a) is usually Null in
569 practice, and b) is only needed by self.location_url(), so we
570 can link back to a location view from a newsitem view. It would
571 be set during scraping. (Example use case: NYC crime
572 aggregates, where there's no location or address data for the
573 "news item" other than which precinct it occurs in.
574 eg. http://nyc.everyblock.com/crime/by-date/2010/8/23/3364632/ )
575
576 * self.block is optionally one Block. Also set during
577 scraping/geocoding. So far can't find anything that actually
578 uses these.
579
580 """
581
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
582 # We don't have a natural_key() method because we don't know for
583 # sure that anything other than ID will be unique.
584
5c9826f initial import
Don Kukral authored
585 schema = models.ForeignKey(Schema)
586 title = models.CharField(max_length=255)
587 description = models.TextField()
588 url = models.TextField(blank=True)
0c729c9 @slinkp work in progress redoing docs to make installation more 'normal djang…
slinkp authored
589 pub_date = models.DateTimeField(db_index=True) # TODO: default to now()
590 item_date = models.DateField(db_index=True) # TODO: default to now()
2084893 @slinkp geodjango can now create spatial indexes.
slinkp authored
591 location = models.GeometryField(blank=True, null=True, spatial_index=True)
5c9826f initial import
Don Kukral authored
592 location_name = models.CharField(max_length=255)
593 location_object = models.ForeignKey(Location, blank=True, null=True)
594 block = models.ForeignKey(Block, blank=True, null=True)
595 objects = NewsItemManager()
188c1d2 @slinkp Much commenting on how NewsItems work
slinkp authored
596 attributes = AttributesDescriptor() # Treat it like a dict.
5c9826f initial import
Don Kukral authored
597
a11a8b0 @slinkp Lots more columns, filters, and usable default ordering in admin UI; …
slinkp authored
598 class Meta:
599 ordering = ('title',)
600
5c9826f initial import
Don Kukral authored
601 def __unicode__(self):
92b5f2b @ltucker display "Untitled News Item" if item has no title, fixes non-linked i…
ltucker authored
602 return self.title or 'Untitled News Item'
5c9826f initial import
Don Kukral authored
603
604 def item_url(self):
ff0a9b6 @slinkp Change newsitem_detail URL: remove useless date arguments.
slinkp authored
605 return urlresolvers.reverse('ebpub-newsitem-detail', args=[self.schema.slug, self.id], kwargs={})
5c9826f initial import
Don Kukral authored
606
607 def item_url_with_domain(self):
608 from django.conf import settings
2aa82d7 @slinkp on second thought, EB_DOMAIN and EB_FULL_DOMAIN have no reason to be …
slinkp authored
609 return 'http://%s%s' % (settings.EB_DOMAIN, self.item_url())
5c9826f initial import
Don Kukral authored
610
611 def item_date_url(self):
b7941a6 @slinkp Fixed date formatting in item_date_url, always needs a range. hopeful…
slinkp authored
612 year = self.item_date.year
613 month = self.item_date.month
614 day = self.item_date.day
615 slug = self.schema.slug
8997653 @slinkp Comment all known places where URL generation for schema_filter is fu…
slinkp authored
616 # TODO: factor out URL generation. #69
7f8cebd @slinkp Change URL structure used for the schema_filter view; closes #113. T…
slinkp authored
617 return '/%(slug)s/filter/by-date=%(year)s-%(month)s-%(day)s,%(year)s-%(month)s-%(day)s/' % locals()
5c9826f initial import
Don Kukral authored
618
619 def location_url(self):
620 if self.location_object_id is not None:
621 return self.location_object.url()
622 return None
623
624 def attributes_for_template(self):
625 """
626 Return a list of AttributeForTemplate objects for this NewsItem. The
627 objects are ordered by SchemaField.display_order.
628 """
629 fields = SchemaField.objects.filter(schema__id=self.schema_id).select_related().order_by('display_order')
3403be1 added check to validate fields exists
Everyblock User authored
630 if not fields:
631 return []
684c02e @slinkp Remove unneeded SchemaFieldInfo model. Refs #50. help_text wasn't eve…
slinkp authored
632
5c9826f initial import
Don Kukral authored
633 try:
634 attribute_row = Attribute.objects.filter(news_item__id=self.id).values(*[f.real_name for f in fields])[0]
635 except KeyError:
636 return []
684c02e @slinkp Remove unneeded SchemaFieldInfo model. Refs #50. help_text wasn't eve…
slinkp authored
637 return [AttributeForTemplate(f, attribute_row) for f in fields]
5c9826f initial import
Don Kukral authored
638
de7c1fe @slinkp Yet more de-hardcoded URLs.
slinkp authored
639
5c9826f initial import
Don Kukral authored
640 class AttributeForTemplate(object):
684c02e @slinkp Remove unneeded SchemaFieldInfo model. Refs #50. help_text wasn't eve…
slinkp authored
641 def __init__(self, schema_field, attribute_row):
5c9826f initial import
Don Kukral authored
642 self.sf = schema_field
643 self.raw_value = attribute_row[schema_field.real_name]
644 self.schema_slug = schema_field.schema.slug
645 self.is_lookup = schema_field.is_lookup
646 self.is_filter = schema_field.is_filter
647 if self.is_lookup:
648 if self.raw_value == '':
649 self.values = []
650 elif self.sf.is_many_to_many_lookup():
651 try:
652 id_values = map(int, self.raw_value.split(','))
653 except ValueError:
654 self.values = []
655 else:
656 lookups = Lookup.objects.in_bulk(id_values)
657 self.values = [lookups[i] for i in id_values]
658 else:
659 self.values = [Lookup.objects.get(id=self.raw_value)]
660 else:
661 self.values = [self.raw_value]
662
663 def value_list(self):
664 """
9b9be15 @slinkp more comments on non-obvious code
slinkp authored
665 Returns a list of {value, url, description} dictionaries
666 representing each value for this attribute.
5c9826f initial import
Don Kukral authored
667 """
668 from django.utils.dateformat import format, time_format
9b9be15 @slinkp more comments on non-obvious code
slinkp authored
669 # Setting these to [None] ensures that zip() returns a list
670 # of at least length one.
5c9826f initial import
Don Kukral authored
671 urls = [None]
672 descriptions = [None]
673 if self.is_filter:
de7c1fe @slinkp Yet more de-hardcoded URLs.
slinkp authored
674 from ebpub.db.schemafilters import SchemaFilterChain
675 chain = SchemaFilterChain(schema=self.sf.schema)
676 chain.base_url = self.sf.schema.url()
5c9826f initial import
Don Kukral authored
677 if self.is_lookup:
de7c1fe @slinkp Yet more de-hardcoded URLs.
slinkp authored
678 urls = [chain.replace(self.sf, look).make_url() if look else None
679 for look in self.values]
680 else:
681 urls = [chain.replace(self.sf, self.raw_value).make_url()]
5c9826f initial import
Don Kukral authored
682 if self.is_lookup:
683 values = [val and val.name or 'None' for val in self.values]
684 descriptions = [val and val.description or None for val in self.values]
685 elif isinstance(self.raw_value, datetime.datetime):
686 values = [format(self.raw_value, 'F j, Y, P')]
687 elif isinstance(self.raw_value, datetime.date):
688 values = [format(self.raw_value, 'F j, Y')]
689 elif isinstance(self.raw_value, datetime.time):
690 values = [time_format(self.raw_value, 'P')]
691 elif self.raw_value is True:
692 values = ['Yes']
693 elif self.raw_value is False:
694 values = ['No']
695 elif self.raw_value is None:
696 values = ['N/A']
697 else:
698 values = [self.raw_value]
699 return [{'value': value, 'url': url, 'description': description} for value, url, description in zip(values, urls, descriptions)]
700
701 class Attribute(models.Model):
188c1d2 @slinkp Much commenting on how NewsItems work
slinkp authored
702 """
703 Extended metadata for NewsItems.
704
705 Each row contains all the extra metadata for one NewsItem
706 instance. The field names are generic, so in order to know what
707 they mean, you must look at the SchemaFields for the Schema for
708 that NewsItem. eg. newsitem.
709
710 """
476e10c @slinkp Changing Attributes.newsitem from ForeignKey to OneToOneField. Not cl…
slinkp authored
711 news_item = models.OneToOneField(NewsItem, primary_key=True, unique=True)
5c9826f initial import
Don Kukral authored
712 schema = models.ForeignKey(Schema)
713 # All data-type field names must end in two digits, because the code assumes this.
714 varchar01 = models.CharField(max_length=255, blank=True, null=True)
715 varchar02 = models.CharField(max_length=255, blank=True, null=True)
716 varchar03 = models.CharField(max_length=255, blank=True, null=True)
717 varchar04 = models.CharField(max_length=255, blank=True, null=True)
718 varchar05 = models.CharField(max_length=255, blank=True, null=True)
719 date01 = models.DateField(blank=True, null=True)
720 date02 = models.DateField(blank=True, null=True)
721 date03 = models.DateField(blank=True, null=True)
722 date04 = models.DateField(blank=True, null=True)
723 date05 = models.DateField(blank=True, null=True)
724 time01 = models.TimeField(blank=True, null=True)
725 time02 = models.TimeField(blank=True, null=True)
726 datetime01 = models.DateTimeField(blank=True, null=True)
727 datetime02 = models.DateTimeField(blank=True, null=True)
728 datetime03 = models.DateTimeField(blank=True, null=True)
729 datetime04 = models.DateTimeField(blank=True, null=True)
730 bool01 = models.NullBooleanField(blank=True)
731 bool02 = models.NullBooleanField(blank=True)
732 bool03 = models.NullBooleanField(blank=True)
733 bool04 = models.NullBooleanField(blank=True)
734 bool05 = models.NullBooleanField(blank=True)
735 int01 = models.IntegerField(blank=True, null=True)
736 int02 = models.IntegerField(blank=True, null=True)
737 int03 = models.IntegerField(blank=True, null=True)
738 int04 = models.IntegerField(blank=True, null=True)
739 int05 = models.IntegerField(blank=True, null=True)
740 int06 = models.IntegerField(blank=True, null=True)
741 int07 = models.IntegerField(blank=True, null=True)
742 text01 = models.TextField(blank=True, null=True)
743
744 def __unicode__(self):
745 return u'Attributes for news item %s' % self.news_item_id
746
747 class LookupManager(models.Manager):
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
748
749 def get_by_natural_key(self, slug, schema_field__slug,
750 schema_field__real_name):
751 return self.get(slug=slug, schema_field__slug=schema_field__slug,
752 schema_field__real_name=schema_field__real_name)
753
5c9826f initial import
Don Kukral authored
754 def get_or_create_lookup(self, schema_field, name, code=None, description='', make_text_slug=True, logger=None):
755 """
756 Returns the Lookup instance matching the given SchemaField, name and
757 Lookup.code, creating it (with the given name/code/description) if it
758 doesn't already exist.
759
760 If make_text_slug is True, then a slug will be created from the given
761 name. If it's False, then the slug will be the Lookup's ID.
762 """
763 def log_info(message):
764 if logger is None:
765 return
766 logger.info(message)
767 def log_warn(message):
768 if logger is None:
769 return
770 logger.warn(message)
771 code = code or name # code defaults to name if it wasn't provided
772 try:
773 obj = Lookup.objects.get(schema_field__id=schema_field.id, code=code)
774 except Lookup.DoesNotExist:
775 if make_text_slug:
776 slug = slugify(name)
777 if len(slug) > 32:
778 # Only bother to warn if we're actually going to use the slug.
779 if make_text_slug:
780 log_warn("Trimming slug %r to %r in order to fit 32-char limit." % (slug, slug[:32]))
781 slug = slug[:32]
782 else:
783 # To avoid integrity errors in the slug when creating the Lookup,
784 # use a temporary dummy slug that's guaranteed not to be in use.
785 # We'll change it back immediately afterward.
786 slug = '__3029j3f029jf029jf029__'
787 if len(name) > 255:
788 old_name = name
789 name = name[:250] + '...'
790 # Save the full name in the description.
791 if not description:
792 description = old_name
793 log_warn("Trimming name %r to %r in order to fit 255-char limit." % (old_name, name))
794 obj = Lookup(schema_field_id=schema_field.id, name=name, code=code, slug=slug, description=description)
795 obj.save()
796 if not make_text_slug:
797 # Set the slug to the ID.
798 obj.slug = obj.id
799 obj.save()
800 log_info('Created %s %r' % (schema_field.name, name))
801 return obj
802
803 class Lookup(models.Model):
804 schema_field = models.ForeignKey(SchemaField)
805 name = models.CharField(max_length=255)
806 # `code` is the optional internal code to use during retrieval.
807 # For example, in scraping Chicago crimes, we use the crime type code
808 # to find the appropriate crime type in this table. We can't use `name`
809 # in that case, because we've massaged `name` to use a "prettier"
810 # formatting than exists in the data source.
811 code = models.CharField(max_length=255, blank=True)
812 slug = models.CharField(max_length=32, db_index=True)
813 description = models.TextField(blank=True)
814
815 objects = LookupManager()
816
817 class Meta:
818 unique_together = (('slug', 'schema_field'),)
a11a8b0 @slinkp Lots more columns, filters, and usable default ordering in admin UI; …
slinkp authored
819 ordering = ('slug',)
5c9826f initial import
Don Kukral authored
820
baec0a7 @slinkp Use natural keys for serialization wherever possible; it makes fixtur…
slinkp authored
821 def natural_key(self):
822 return (self.slug, self.schema_field.schema.slug,
823 self.schema_field.real_name)
824
5c9826f initial import
Don Kukral authored
825 def __unicode__(self):
826 return u'%s - %s' % (self.schema_field, self.name)
827
828 class NewsItemLocation(models.Model):
829 news_item = models.ForeignKey(NewsItem)
830 location = models.ForeignKey(Location)
831
832 class Meta:
833 unique_together = (('news_item', 'location'),)
834
835 def __unicode__(self):
836 return u'%s - %s' % (self.news_item, self.location)
837
838 class AggregateBaseClass(models.Model):
839 schema = models.ForeignKey(Schema)
840 total = models.IntegerField()
841
842 class Meta:
843 abstract = True
844
845 class AggregateAll(AggregateBaseClass):
846 # Total items in the schema.
847 pass
848
849 class AggregateDay(AggregateBaseClass):
850 # Total items in the schema with item_date on the given day
851 date_part = models.DateField(db_index=True)
852
853 class AggregateLocation(AggregateBaseClass):
854 # Total items in the schema in location, summed over that last 30 days
855 location_type = models.ForeignKey(LocationType)
856 location = models.ForeignKey(Location)
857
858 class AggregateLocationDay(AggregateBaseClass):
859 # Total items in the schema in location with item_date on the given day
860 location_type = models.ForeignKey(LocationType)
861 location = models.ForeignKey(Location)
862 date_part = models.DateField(db_index=True)
863
864 class AggregateFieldLookup(AggregateBaseClass):
865 # Total items in the schema with schema_field's value = lookup
866 schema_field = models.ForeignKey(SchemaField)
867 lookup = models.ForeignKey(Lookup)
868
869 class SearchSpecialCase(models.Model):
870 query = models.CharField(max_length=64, unique=True)
871 redirect_to = models.CharField(max_length=255, blank=True)
872 title = models.CharField(max_length=128, blank=True)
873 body = models.TextField(blank=True)
874
875 def __unicode__(self):
876 return self.query
877
878 class DataUpdate(models.Model):
879 # Keeps track of each time we update our data.
880 schema = models.ForeignKey(Schema)
881 update_start = models.DateTimeField() # When the scraper/importer started running.
882 update_finish = models.DateTimeField() # When the scraper/importer finished.
883 num_added = models.IntegerField()
884 num_changed = models.IntegerField()
885 num_deleted = models.IntegerField()
886 num_skipped = models.IntegerField()
887 got_error = models.BooleanField()
888
889 def __unicode__(self):
890 return u'%s started on %s' % (self.schema.name, self.update_start)
891
892 def total_time(self):
893 return self.update_finish - self.update_start
Something went wrong with that request. Please try again.