/
managers.py
1105 lines (878 loc) · 42.3 KB
/
managers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Managers for reviewboard.diffviewer.models."""
import bz2
import gc
import hashlib
import logging
from functools import partial
from django.conf import settings
from django.db import models, reset_queries, connection, connections
from django.db.models import Count, Q
from django.db.utils import IntegrityError
from django.utils.translation import gettext as _
from reviewboard.diffviewer.commit_utils import get_file_exists_in_history
from reviewboard.diffviewer.differ import DiffCompatVersion
from reviewboard.diffviewer.diffutils import check_diff_size
from reviewboard.diffviewer.filediff_creator import create_filediffs
logger = logging.getLogger(__name__)
class FileDiffManager(models.Manager):
"""A manager for FileDiff objects.
This contains utility methods for locating FileDiffs that haven't been
migrated to use RawFileDiffData.
"""
MIGRATE_OBJECT_LIMIT = 200
def unmigrated(self):
"""Query FileDiffs that store their own diff content.
This will return FileDiffs that were created prior to Review Board 1.7.
Returns:
django.db.models.query.QuerySet:
A queryset for filtering FileDiffs that have not been migrated to
use some form of deduplicated diff storage mechanism.
"""
return self.filter(
Q(diff_hash_id__isnull=True) &
Q(legacy_diff_hash_id__isnull=True) &
Q(parent_diff_hash_id__isnull=True) &
Q(legacy_parent_diff_hash_id__isnull=True))
def get_migration_counts(self):
"""Return the number of items that need to be migrated.
The result is a dictionary containing a breakdown of the various
counts, and the total count of all items for display.
Returns:
dict:
A dictionary of counts. This will contain the following keys:
``filediffs``:
The number of unmigrated FileDiff objects.
``legacy_file_diff_data``:
The number of unmigrated LegacyFileDiffData objects.
``total_count``:
The total count of objects to migrate.
"""
from reviewboard.diffviewer.models import LegacyFileDiffData
unmigrated_filediffs_count = self.unmigrated().count()
legacy_fdd_count = None
warning = None
legacy_fdd_cnx = connections[LegacyFileDiffData.objects.db]
if legacy_fdd_cnx.vendor == 'mysql':
# On MySQL, computing the number of LegacyFileDiffData objects
# can be very slow when using InnoDB tables.
#
# Unlike MyISAM, which keeps a total row count for easy
# reference, InnoDB requires walking the index (stored in a
# BTree) to get the total row count, and for this table, that's
# expensive.
#
# A large part of the reason why is because we made a design
# error early on and used SHA1 values as the primary key. This
# makes for a larger, expensive index, and MySQL seems to hate
# this.
#
# So the workaround is that we're just going to ask MySQL for
# its last-known row count. For MyISAM, this is going to be an
# exact value. For InnoDB, it might be an estimate.
#
# The MySQL documentation says that the estimate may be off by
# as much as 40%-50%. For this reason, we'll warn the user, but
# it won't actually harm the migration process. The total count
# is purely informative.
#
# Note that running `ANALYZE TABLE diffviewer_filediffdata`
# before the migration will sync up the values, but that operation
# may take some time.
table_name = LegacyFileDiffData._meta.db_table
try:
cursor = legacy_fdd_cnx.cursor()
cursor.execute('SHOW TABLE STATUS WHERE Name="%s"'
% table_name)
result = cursor.fetchone()
# We have to fetch these by index. These should be stable.
table_type = result[1]
legacy_fdd_count = result[4]
if legacy_fdd_count == 0:
# This might be correct, but since 0 means "we're done,"
# we want to be sure. Let's force a query below.
legacy_fdd_count = None
if table_type == 'InnoDB':
warning = _(
'The diff migration count is just an estimate. The '
'%s table is backed by InnoDB, which does not '
'provide up-to-date row counts, and querying can be '
'too slow for this table. This will only affect '
'progress notification and will not otherwise impact '
'diff migration.'
) % table_name
except Exception as e:
# Something went wrong. We're going to fall back on
# calculating from the database, though it will be slow.
logger.exception('Unable to fetch information on the %s '
'table: %s',
table_name, e)
logger.warning('Calculating the number of diffs in the '
'%s table. This may take a while...',
table_name)
if legacy_fdd_count is None:
legacy_fdd_count = LegacyFileDiffData.objects.count()
return {
'filediffs': unmigrated_filediffs_count,
'legacy_file_diff_data': legacy_fdd_count,
'total_count': unmigrated_filediffs_count + legacy_fdd_count,
'warning': warning,
}
def migrate_all(self, batch_done_cb=None, counts=None, batch_size=40,
max_diffs=None):
"""Migrate diff content in FileDiffs to use RawFileDiffData.
This will run through all unmigrated FileDiffs and migrate them,
condensing their storage needs and removing the content from
FileDiffs.
This will return a dictionary with the result of the process.
Args:
batch_done_cb (callable, optional):
A function to call after each batch of objects has been
processed. This can be used for progress notification.
This should be in the form of:
.. code-block:: python
def on_batch_done(total_diffs_migrated=None,
total_count=None, **kwargs):
...
Note that ``total_count`` may be ``None``.
counts (dict, optional):
A dictionary of counts for calculations.
The only value used is ``total_count``, which would be
a total number of objects being processed.
This is only used for reporting to ``batch_done_cb``. If
not provided, and ``batch_done_cb`` *is* provided, then
this method will query the counts itself.
batch_size (int, optional):
The number of objects to process in each batch.
max_diffs (int, optional):
The maximum number of diffs to migrate.
"""
from reviewboard.diffviewer.models import LegacyFileDiffData
assert batch_done_cb is None or callable(batch_done_cb)
total_diffs_migrated = 0
total_diff_size = 0
total_bytes_saved = 0
unmigrated_filediffs = self.unmigrated()
legacy_data_items = LegacyFileDiffData.objects.all()
if counts is not None:
total_count = counts.get('total_count')
else:
total_count = self.get_migration_counts()
if max_diffs is not None:
if total_count is None:
total_count = max_diffs
else:
total_count = min(total_count, max_diffs)
migration_tasks = (
(self._migrate_filediffs, unmigrated_filediffs),
(self._migrate_legacy_fdd, legacy_data_items),
)
for migrate_func, queryset in migration_tasks:
for batch_info in migrate_func(queryset=queryset,
batch_size=batch_size,
max_diffs=max_diffs):
total_diffs_migrated += batch_info[0]
total_diff_size += batch_info[1]
total_bytes_saved += batch_info[2]
if batch_done_cb is not None:
batch_done_cb(total_diffs_migrated=total_diffs_migrated,
total_count=total_count)
if max_diffs is not None:
max_diffs -= batch_info[0]
if max_diffs is not None and max_diffs <= 0:
break
# Call batch_done_cb one more time, using the finalized total count
# which may differ from the original count due to a bad total row
# count estimate or a too-large max_diffs.
if batch_done_cb is not None:
batch_done_cb(total_diffs_migrated=total_diffs_migrated,
total_count=total_diffs_migrated)
return {
'diffs_migrated': total_diffs_migrated,
'old_diff_size': total_diff_size,
'new_diff_size': total_diff_size - total_bytes_saved,
'bytes_saved': total_bytes_saved,
}
def _migrate_legacy_fdd(self, queryset, batch_size,
max_diffs=None):
"""Migrate data from LegacyFileDiffData to RawFileDiffData.
This will go through every
:py:class:`~reviewboard.diffviewer.models.LegacyFileDiffData` and
convert them to
:py:class:`~reviewboard.diffviewer.models.RawFileDiffData` entries,
removing the old versions. All associated FileDiffs are then updated to
point to the new RawFileDiffData entry instead of the old
LegacyFileDiffData.
Args:
queryset (django.db.models.query.QuerySet):
The queryset for retrieving
:py:class:`~reviewboard.diffviewer.models.LegacyFileDiffData`
objects.
batch_size (int):
The number of objects to process in each batch.
max_diffs (int, optional):
The maximum number of diffs to migrate. This may be ``None``,
in which case all diffs will be migrated.
Yields:
tuple:
A tuple containing the following items:
1. The size of the batch.
2. The total number of bytes of diff data from the old legacy
entries in this batch.
3. The total number of bytes saved during this migration.
4. A list of all legacy hashes that were migrated for diffs.
5. A list of all legacy hashes that were migrated for parent diffs.
6. A list of all legacy hashes that were migrated for diffs and
parent diffs.
"""
from reviewboard.diffviewer.models import RawFileDiffData
cursor = connection.cursor()
queryset = queryset.annotate(
num_filediffs=Count('filediffs'),
num_parent_filediffs=Count('parent_filediffs'))
for batch in self._iter_batches(queryset=queryset,
batch_size=batch_size,
max_diffs=max_diffs):
batch_total_diff_size = 0
batch_total_bytes_saved = 0
raw_fdds = []
all_diff_hashes = []
filediff_hashes = []
parent_filediff_hashes = []
for legacy_fdd in batch:
raw_fdd = RawFileDiffData.objects.create_from_legacy(
legacy_fdd, save=False)
raw_fdds.append(raw_fdd)
binary_hash = legacy_fdd.binary_hash
old_diff_size = len(legacy_fdd.get_binary_base64())
batch_total_diff_size += old_diff_size
batch_total_bytes_saved += old_diff_size - len(raw_fdd.binary)
# Update all associated FileDiffs to use the new objects
# instead of the old ones.
if legacy_fdd.num_filediffs > 0:
filediff_hashes.append(binary_hash)
if legacy_fdd.num_parent_filediffs > 0:
parent_filediff_hashes.append(binary_hash)
all_diff_hashes.append(binary_hash)
try:
# Attempt to create all the entries we want in one go.
RawFileDiffData.objects.bulk_create(raw_fdds)
except IntegrityError:
# One or more entries in the batch conflicted with an existing
# entry, meaning it was already created. We'll just need to
# operate on the contents of this batch one-by-one.
for raw_fdd in raw_fdds:
try:
raw_fdd.save()
except IntegrityError:
raw_fdd = RawFileDiffData.objects.get(
binary_hash=raw_fdd.binary_hash)
# This was already in the database, so we didn't have
# to write new data. That means we get to reclaim
# its size in the amount of bytes saved.
batch_total_bytes_saved += len(raw_fdd.binary)
if filediff_hashes:
self._transition_hashes(cursor, 'diff_hash', filediff_hashes)
if parent_filediff_hashes:
self._transition_hashes(cursor, 'parent_diff_hash',
parent_filediff_hashes)
queryset.filter(pk__in=all_diff_hashes).delete()
yield (len(batch), batch_total_diff_size,
batch_total_bytes_saved, filediff_hashes,
parent_filediff_hashes, all_diff_hashes)
def _migrate_filediffs(self, queryset, batch_size, max_diffs=None):
"""Migrate old diff data from a FileDiff into a RawFileDiffData.
Args:
queryset (django.db.models.query.QuerySet):
The queryset for retrieving
:py:class:`~reviewboard.diffviewer.models.FileDiff` objects.
batch_size (int):
The number of objects to process in each batch.
max_diffs (int, optional):
The maximum number of diffs to migrate. This may be ``None``,
in which case all diffs will be migrated.
Yields:
tuple:
A tuple containing the following items:
1. The size of the batch.
2. The total number of bytes of diff data from the old legacy
entries in this batch.
3. The total number of bytes saved during this migration.
"""
for batch in self._iter_batches(queryset=queryset,
batch_size=batch_size,
max_diffs=max_diffs):
batch_total_diff_size = 0
batch_total_bytes_saved = 0
for filediff in batch:
diff_size = len(filediff.get_diff64_base64())
parent_diff_size = len(filediff.get_parent_diff64_base64())
batch_total_diff_size += diff_size + parent_diff_size
diff_hash_is_new, parent_diff_hash_is_new = \
filediff._migrate_diff_data(recalculate_counts=False)
if diff_size > 0:
batch_total_bytes_saved += diff_size
if diff_hash_is_new:
# This is a new entry, so we have to subtract the
# new storage size. This *could* be larger than the
# original diff, but will usually be smaller.
batch_total_bytes_saved -= \
len(filediff.diff_hash.binary)
if parent_diff_size > 0:
batch_total_bytes_saved += parent_diff_size
if diff_hash_is_new:
# This is a new entry, so we have to subtract the
# new storage size. This *could* be larger than the
# original diff, but will usually be smaller.
batch_total_bytes_saved -= \
len(filediff.parent_diff_hash.binary)
yield len(batch), batch_total_diff_size, batch_total_bytes_saved
def _iter_batches(self, queryset, batch_size, max_diffs=None,
object_limit=MIGRATE_OBJECT_LIMIT):
"""Iterate through items in a queryset, yielding batches.
This will gather up to a specified number of items from a
queryset at a time, process them into batches of a specified
size, and yield them.
After each set of objects fetched from the database, garbage
collection will be forced and stored queries reset, in order to
reduce memory usage.
Args:
queryset (django.db.models.query.QuerySet):
The queryset to execute for fetching objects.
batch_size (int):
The maximum number of objects to yield per batch.
max_diffs (int, optional):
The maximum number of diffs to migrate. This may be ``None``,
in which case all diffs will be migrated.
object_limit (int, optional):
The maximum number of objects to fetch from the database per
query.
Yields:
list of django.db.models.Model:
A batch of items to process. This will never be larger than
``batch_size``.
"""
batch = []
total_processed = 0
# We're going to iterate until we've exhausted the available objects
# from the database for the provided query, or hit the maximum number
# of diffs that were requested.
while max_diffs is None or total_processed < max_diffs:
# Every time we work on a batch, we're re-querying the list of
# objects. This result from the query is expected not to have any
# previously-processed objects from a yielded batch. It may,
# however, have objects we've previously seen that haven't been
# yielded in a batch yet. That's why we're indexing from the
# length of the batch to the object limit (or to the requested
# max_diffs, whichever is smaller).
limit = object_limit
processed = 0
if max_diffs is not None:
limit = min(limit, max_diffs - total_processed)
batch_len = len(batch)
for item in queryset[batch_len:limit].iterator():
batch.append(item)
processed += 1
if len(batch) == batch_size:
yield batch
batch = []
total_processed += processed
# Do all we can to limit the memory usage by resetting any
# stored queries (if DEBUG is True), and force garbage
# collection of anything we may have from processing an object.
reset_queries()
gc.collect()
if processed < limit:
# We've processed all items in the database. We're done.
break
if batch:
yield batch
def _transition_hashes(self, cursor, hash_field_name, diff_hashes):
"""Transitions FileDiff-associated hashes to RawFileDiffData.
This queries all FileDiffs and RawFileDiffData entries referencing
the given list of diff hashes, and updates the FileDiffs to point
to those instead of the formerly-associated LegacyFileDiffDatas.
"""
from reviewboard.diffviewer.models import RawFileDiffData
legacy_hash_field_name = 'legacy_%s' % hash_field_name
# Since this is a pretty complex operation, we're going to sanity-check
# results on DEBUG setups, to help catch issues that might come up as
# this code changes.
if settings.DEBUG:
old_filediff_info = dict(
(filediff.pk, getattr(filediff, legacy_hash_field_name).pk)
for filediff in self.filter(**{
legacy_hash_field_name + '__in': diff_hashes,
})
)
else:
old_filediff_info = None
# If the database supports joins on updates, then we can craft
# a query that will massively speed up the diff transition time.
# Otherwise, we need to fall back on doing a select and then an
# update per result.
#
# The queries are different between databases (yay standards), so
# we can't be smart and do this in a generic way. We have to check
# the database types.
if connection.vendor == 'mysql':
cursor.execute(
'UPDATE %(filediff_table)s'
' INNER JOIN %(raw_fdd_table)s raw_fdd'
' ON raw_fdd.binary_hash = '
' %(filediff_table)s.%(hash_field_name)s_id'
' SET'
' raw_%(hash_field_name)s_id = raw_fdd.id,'
' %(hash_field_name)s_id = NULL'
' WHERE raw_fdd.binary_hash IN (%(diff_hashes)s)'
% {
'filediff_table': self.model._meta.db_table,
'raw_fdd_table': RawFileDiffData._meta.db_table,
'hash_field_name': hash_field_name,
'diff_hashes': ','.join(
"'%s'" % diff_hash
for diff_hash in diff_hashes
),
})
elif connection.vendor == 'postgresql':
cursor.execute(
'UPDATE %(filediff_table)s'
' SET'
' raw_%(hash_field_name)s_id = raw_fdd.id,'
' %(hash_field_name)s_id = NULL'
' FROM %(raw_fdd_table)s raw_fdd'
' WHERE'
' raw_fdd.binary_hash IN (%(diff_hashes)s) AND'
' raw_fdd.binary_hash = '
' %(hash_field_name)s_id'
% {
'filediff_table': self.model._meta.db_table,
'raw_fdd_table': RawFileDiffData._meta.db_table,
'hash_field_name': hash_field_name,
'diff_hashes': ','.join(
"'%s'" % diff_hash
for diff_hash in diff_hashes
),
})
else:
raw_fdds = RawFileDiffData.objects.filter(
binary_hash__in=diff_hashes).only('pk', 'binary_hash')
for raw_fdd in raw_fdds:
self.filter(**{
legacy_hash_field_name: raw_fdd.binary_hash
}).update(**{
hash_field_name: raw_fdd.pk,
legacy_hash_field_name: None
})
if settings.DEBUG:
new_filediff_info = dict(
(filediff.pk, getattr(filediff, hash_field_name).binary_hash)
for filediff in self.filter(pk__in=old_filediff_info.keys())
)
assert old_filediff_info == new_filediff_info
class RawFileDiffDataManager(models.Manager):
"""A custom manager for RawFileDiffData.
This provides conveniences for creating an entry based on a
LegacyFileDiffData object.
"""
def process_diff_data(self, data):
"""Processes a diff, returning the resulting content and compression.
If the content would benefit from being compressed, this will
return the compressed content and the value for the compression
flag. Otherwise, it will return the raw content.
"""
compressed_data = bz2.compress(data, 9)
if len(compressed_data) < len(data):
return compressed_data, self.model.COMPRESSION_BZIP2
else:
return data, None
def get_or_create_from_data(self, data):
"""Return or create a new stored entry for diff data.
Args:
data (bytes):
The diff data to store or return an entry for.
Returns:
reviewboard.diffviewer.models.raw_file_diff_data.RawFileDiffData:
The entry for the diff data.
Raises:
TypeError:
The data passed in was not a bytes string.
"""
if not isinstance(data, bytes):
raise TypeError(
'RawFileDiffData.objects.get_or_create_data expects a '
'bytes value, not %s'
% type(data))
binary_hash = self._hash_hexdigest(data)
processed_data, compression = self.process_diff_data(data)
return self.get_or_create(
binary_hash=binary_hash,
defaults={
'binary': processed_data,
'compression': compression,
})
def create_from_legacy(self, legacy, save=True):
processed_data, compression = self.process_diff_data(legacy.binary)
raw_file_diff_data = self.model(binary_hash=legacy.binary_hash,
binary=processed_data,
compression=compression)
raw_file_diff_data.extra_data = legacy.extra_data
if save:
raw_file_diff_data.save()
return raw_file_diff_data
def _hash_hexdigest(self, diff):
hasher = hashlib.sha1()
hasher.update(diff)
return hasher.hexdigest()
class BaseDiffManager(models.Manager):
"""A base manager class for creating models out of uploaded diffs"""
def create_from_data(self, *args, **kwargs):
"""Create a model instance from data.
Subclasses must implement this method.
See :py:meth:`create_from_upload` for the fields that will be passed to
this method.
Returns:
django.db.models.Model:
The model instance.
"""
raise NotImplementedError
def create_from_upload(self, repository, diff_file, parent_diff_file=None,
request=None, validate_only=False, **kwargs):
"""Create a model instance from a form upload.
This parses a diff and optional parent diff covering one or more files,
validates, and constructs either:
* a :py:class:`~reviewboard.diffviewer.models.diffset.DiffSet` or
* a :py:class:`~reviewboard.diffviewer.models.diffcommit.DiffCommit`
and the child :py:class:`FileDiffs
<reviewboard.diffviewer.models.filediff.FileDiff>` representing the
diff.
This can optionally validate the diff without saving anything to the
database. In this case, no value will be returned. Instead, callers
should take any result as success.
This function also accepts a number of keyword arguments when creating
a :py:class:`~reviewboard.diffviewer.model.DiffCommit`. In that case,
the following fields are required (except for the committer fields when
the underlying SCM does not distinguish between author and committer):
Args:
repository (reviewboard.scmtools.models.Repository):
The repository the diff applies to.
diff_file (django.core.files.uploadedfile.UploadedFile):
The diff file uploaded in the form.
parent_diff_file (django.core.files.uploadedfile.UploadedFile,
optional):
The parent diff file uploaded in the form.
request (django.http.HttpRequest, optional):
The current HTTP request, if any. This will result in better
logging.
validate_only (bool, optional):
Whether to just validate and not save. If ``True``, then this
won't populate the database at all and will return ``None``
upon success. This defaults to ``False``.
**kwargs (dict):
Additional keyword arguments to pass to
:py:meth:`create_from_data`.
See :py:meth:`~DiffSetManager.create_from_data` and
:py:meth:`DiffCommitManager.create_from_data` for acceptable
keyword arguments.
Returns:
reviewboard.diffviewer.models.diffset.DiffSet:
The resulting DiffSet stored in the database, if processing
succeeded and ``validate_only=False``.
Raises:
reviewboard.diffviewer.errors.DiffParserError:
There was an error parsing the main diff or parent diff.
reviewboard.diffviewer.errors.DiffTooBigError:
The diff file was too big to be uploaded, based on the
configured maximum diff size in settings.
reviewboard.diffviewer.errors.EmptyDiffError:
The provided diff file did not contain any file changes.
reviewboard.scmtools.core.FileNotFoundError:
A file specified in the diff could not be found in the
repository.
reviewboard.scmtools.core.SCMError:
There was an error talking to the repository when validating
the existence of a file.
reviewboard.scmtools.git.ShortSHA1Error:
A SHA1 specified in the diff was in the short form, which
could not be used to look up the file. This is applicable only
to Git.
"""
check_diff_size(diff_file, parent_diff_file)
if parent_diff_file:
parent_diff_file_name = parent_diff_file.name
parent_diff_file_contents = parent_diff_file.read()
else:
parent_diff_file_name = None
parent_diff_file_contents = None
return self.create_from_data(
repository=repository,
diff_file_name=diff_file.name,
diff_file_contents=diff_file.read(),
parent_diff_file_name=parent_diff_file_name,
parent_diff_file_contents=parent_diff_file_contents,
request=request,
validate_only=validate_only,
**kwargs)
class DiffCommitManager(BaseDiffManager):
"""A custom manager for DiffCommit objects.
This includes utilities for creating diffsets based on the data from form
uploads, webapi requests, and upstream repositories.
"""
def by_diffset_ids(self, diffset_ids):
"""Return the commits grouped by DiffSet IDs.
Args:
diffset_ids (list of int):
The primary keys of the DiffSets to retrieve commits for.
Returns:
dict:
A mapping of :py:class:`~reviewboard.diffviewer.models.diffset.
DiffSet` primary keys to lists of
:py:class:`DiffCommits <reviewboard.diffviewer.models.diffcommit.
DiffCommit>`.
"""
commits_by_diffset_id = {
pk: []
for pk in diffset_ids
}
for commit in self.filter(diffset_id__in=diffset_ids):
commits_by_diffset_id[commit.diffset_id].append(commit)
return commits_by_diffset_id
def create_from_data(self,
repository,
diff_file_name,
diff_file_contents,
parent_diff_file_name,
parent_diff_file_contents,
diffset,
commit_id,
parent_id,
commit_message,
author_name,
author_email,
author_date,
validation_info=None,
request=None,
committer_name=None,
committer_email=None,
committer_date=None,
base_commit_id=None,
check_existence=True,
validate_only=False):
"""Create a DiffCommit from raw diff data.
Args:
repository (reviewboard.scmtools.models.Repository):
The repository the diff was posted against.
diff_file_name (unicode):
The name of the diff file.
diff_file_contents (bytes):
The contents of the diff file.
parent_diff_file_name (unicode):
The name of the parent diff file.
parent_diff_file_contents (bytes):
The contents of the parent diff file.
diffset (reviewboard.diffviewer.models.diffset.DiffSet):
The DiffSet to attach the created DiffCommit to.
commit_id (unicode):
The unique identifier of the commit.
parent_id (unicode):
The unique identifier of the parent commit.
commit_message (unicode):
The commit message.
author_name (unicode):
The author's name.
author_email (unicode):
The author's e-mail address.
author_date (datetime.datetime):
The date and time that the commit was authored.
request (django.http.HttpRequest, optional):
The HTTP request from the client.
validation_info (dict, optional):
A dictionary of parsed validation information from the
:py:class:`~reviewboard.webapi.resources.validate_diffcommit.
ValidateDiffCommitResource`.
committer_name (unicode, optional):
The committer's name.
committer_email (unicode, optional)
The committer's e-mail address.
committer_date (datetime.datetime, optional):
The date and time that the commit was committed.
base_commit_id (unicode, optional):
The ID of the commit that the diff is based upon. This is
needed by some SCMs or hosting services to properly look up
files, if the diffs represent blob IDs instead of commit IDs
and the service doesn't support those lookups.
check_existence (bool, optional):
Whether or not existence checks should be performed against
the upstream repository.
validate_only (bool, optional):
Whether to just validate and not save. If ``True``, then this
won't populate the database at all and will return ``None``
upon success. This defaults to ``False``.
Returns:
reviewboard.diffviewer.models.diffcommit.DiffCommit:
The created model instance.
"""
diffcommit = self.model(
filename=diff_file_name,
diffset=diffset,
commit_id=commit_id,
parent_id=parent_id,
author_name=author_name,
author_email=author_email,
author_date=author_date,
commit_message=commit_message,
committer_name=committer_name,
committer_email=committer_email,
committer_date=committer_date)
if not validate_only:
diffcommit.save()
get_file_exists = partial(get_file_exists_in_history,
validation_info=validation_info or {},
repository=repository,
parent_id=parent_id)
create_filediffs(
get_file_exists=get_file_exists,
diff_file_contents=diff_file_contents,
parent_diff_file_contents=parent_diff_file_contents,
repository=repository,
request=request,
basedir='',
base_commit_id=base_commit_id,
diffset=diffset,
diffcommit=diffcommit,
validate_only=validate_only,
check_existence=check_existence)
if validate_only:
return None
return diffcommit
class DiffSetManager(BaseDiffManager):
"""A custom manager for DiffSet objects.
This includes utilities for creating diffsets based on the data from form
uploads, webapi requests, and upstream repositories.
"""
def create_from_data(self,
repository,
diff_file_name,
diff_file_contents,
parent_diff_file_name=None,
parent_diff_file_contents=None,
diffset_history=None,
basedir=None,
request=None,
base_commit_id=None,
check_existence=True,
validate_only=False,
**kwargs):
"""Create a DiffSet from raw diff data.
This parses a diff and optional parent diff covering one or more files,
validates, and constructs :py:class:`DiffSets
<reviewboard.diffviewer.models.diffset.DiffSet>` and
:py:class:`FileDiffs <reviewboard.diffviewer.models.filediff.FileDiff>`
representing the diff.
This can optionally validate the diff without saving anything to the
database. In this case, no value will be returned. Instead, callers
should take any result as success.
Args:
repository (reviewboard.scmtools.models.Repository):
The repository the diff applies to.
diff_file_name (unicode):
The filename of the main diff file.
diff_file_contents (bytes):
The contents of the main diff file.
parent_diff_file_name (unicode, optional):
The filename of the parent diff, if one is provided.
parent_diff_file_contents (bytes, optional):
The contents of the parent diff, if one is provided.
diffset_history (reviewboard.diffviewer.models.diffset_history.
DiffSetHistory, optional):
The history object to associate the DiffSet with. This is
not required if using ``validate_only=True``.
basedir (unicode, optional):
The base directory to prepend to all file paths in the diff.
request (django.http.HttpRequest, optional):
The current HTTP request, if any. This will result in better
logging.