Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 640 lines (526 sloc) 20.229 kb
101930a More Py3 fixups: created setup3lib, added pyversion_patching to build…
Alex Stewart authored
1 """ Patch utility to apply unified diffs
2
3 Brute-force line-by-line non-recursive parsing
4
5 Copyright (c) 2008-2010 anatoly techtonik
6 Available under the terms of MIT license
7
8 NOTE: This version has been patched by Alex Stewart <alex@foogod.com> for
9 Python 3.x support and other misc fixups.
10
11 Project home: http://code.google.com/p/python-patch/
12
13
14 $Id: patch.py 92 2010-07-02 06:04:57Z techtonik $
15 $HeadURL: http://python-patch.googlecode.com/svn/trunk/patch.py $
16 """
17
18 __author__ = "techtonik.rainforce.org"
19 __version__ = "10.04-2.pAS1"
20
21 import copy
22 import logging
23 import re
24 from logging import debug, info, warning
25 import sys
26
27 try:
28 # cStringIO doesn't support unicode in 2.5
29 from StringIO import StringIO
30 except ImportError:
31 # StringIO has been renamed to 'io' in 3.x
32 from io import StringIO
33
34 from os.path import exists, isfile, abspath
35 from os import unlink
36
37 _open = open
38
39 if sys.version_info >= (3,):
40 # Open files with universal newline support but no newline translation (3.x)
41 def open(filename, mode='r'):
42 return _open(filename, mode, newline='')
43 else:
44 # Open files with universal newline support but no newline translation (2.x)
45 def open(filename, mode='r'):
46 return _open(filename, mode + 'b')
47
48 # Python 3.x has changed iter.next() to be next(iter) instead, so for
49 # backwards compatibility, we'll just define a next() function under 2.x
50 def next(iter):
51 return iter.next()
52
53
54 #------------------------------------------------
55 # Logging is controlled by "python_patch" logger
56
57 debugmode = False
58
59 logger = logging.getLogger("python_patch")
60 loghandler = logging.StreamHandler()
61 logger.addHandler(loghandler)
62
63 debug = logger.debug
64 info = logger.info
65 warning = logger.warning
66
67 # If called as a library, don't log info/debug messages by default.
68 logger.setLevel(logging.WARN)
69
70 #------------------------------------------------
71
72 # constants for patch types
73
74 DIFF = PLAIN = "plain"
75 HG = MERCURIAL = "mercurial"
76 SVN = SUBVERSION = "svn"
77
78
79 def fromfile(filename):
80 """ Parse patch file and return Patch() object
81 """
82 info("reading patch from file %s" % filename)
83 fp = open(filename, "r")
84 patch = Patch(fp)
85 fp.close()
86 return patch
87
88
89 def fromstring(s):
90 """ Parse text string and return Patch() object
91 """
92 return Patch( StringIO(s) )
93
94
95
96 class HunkInfo(object):
97 """ Parsed hunk data container (hunk starts with @@ -R +R @@) """
98
99 def __init__(self):
100 self.startsrc=None #: line count starts with 1
101 self.linessrc=None
102 self.starttgt=None
103 self.linestgt=None
104 self.invalid=False
105 self.text=[]
106
107 def copy(self):
108 return copy.copy(self)
109
110 # def apply(self, estream):
111 # """ write hunk data into enumerable stream
112 # return strings one by one until hunk is
113 # over
114 #
115 # enumerable stream are tuples (lineno, line)
116 # where lineno starts with 0
117 # """
118 # pass
119
120
121
122 class Patch(object):
123
124 def __init__(self, stream=None):
125
126 # define Patch data members
127 # table with a row for every source file
128
129 #: list of source filenames
130 self.source=None
131 self.target=None
132 #: list of lists of hunks
133 self.hunks=None
134 #: file endings statistics for every hunk
135 self.hunkends=None
136 #: headers for each file
137 self.header=None
138
139 #: patch type - one of constants
140 self.type = None
141
142 if stream:
143 self.parse(stream)
144
145 def copy(self):
146 return copy.copy(self)
147
148 def parse(self, stream):
149 """ parse unified diff """
150 self.header = []
151
152 self.source = []
153 self.target = []
154 self.hunks = []
155 self.hunkends = []
156
157 # define possible file regions that will direct the parser flow
158 headscan = False # scanning header before the patch body
159 filenames = False # lines starting with --- and +++
160
161 hunkhead = False # @@ -R +R @@ sequence
162 hunkbody = False #
163 hunkskip = False # skipping invalid hunk mode
164
165 headscan = True
166 lineends = dict(lf=0, crlf=0, cr=0)
167 nextfileno = 0
168 nexthunkno = 0 #: even if index starts with 0 user messages number hunks from 1
169
170 # hunkinfo holds parsed values, hunkactual - calculated
171 hunkinfo = HunkInfo()
172 hunkactual = dict(linessrc=None, linestgt=None)
173
174
175 fe = enumerate(stream)
176 for lineno, line in fe:
177
178 # read out header
179 if headscan:
180 header = ''
181 try:
182 while not line.startswith("--- "):
183 header += line
184 lineno, line = next(fe)
185 except StopIteration:
186 # this is actually a loop exit
187 continue
188 self.header.append(header)
189
190 headscan = False
191 # switch to filenames state
192 filenames = True
193
194 # hunkskip and hunkbody code skipped until definition of hunkhead is parsed
195 if hunkbody:
196 # process line first
197 if re.match(r"^[- \+\\]", line):
198 # gather stats about line endings
199 if line.endswith("\r\n"):
200 self.hunkends[nextfileno-1]["crlf"] += 1
201 elif line.endswith("\n"):
202 self.hunkends[nextfileno-1]["lf"] += 1
203 elif line.endswith("\r"):
204 self.hunkends[nextfileno-1]["cr"] += 1
205
206 if line.startswith("-"):
207 hunkactual["linessrc"] += 1
208 elif line.startswith("+"):
209 hunkactual["linestgt"] += 1
210 elif not line.startswith("\\"):
211 hunkactual["linessrc"] += 1
212 hunkactual["linestgt"] += 1
213 hunkinfo.text.append(line)
214 # todo: handle \ No newline cases
215 else:
216 warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
217 # add hunk status node
218 self.hunks[nextfileno-1].append(hunkinfo.copy())
219 self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
220 # switch to hunkskip state
221 hunkbody = False
222 hunkskip = True
223
224 # check exit conditions
225 if hunkactual["linessrc"] > hunkinfo.linessrc or hunkactual["linestgt"] > hunkinfo.linestgt:
226 warning("extra hunk no.%d lines at %d for target %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
227 # add hunk status node
228 self.hunks[nextfileno-1].append(hunkinfo.copy())
229 self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
230 # switch to hunkskip state
231 hunkbody = False
232 hunkskip = True
233 elif hunkinfo.linessrc == hunkactual["linessrc"] and hunkinfo.linestgt == hunkactual["linestgt"]:
234 self.hunks[nextfileno-1].append(hunkinfo.copy())
235 # switch to hunkskip state
236 hunkbody = False
237 hunkskip = True
238
239 # detect mixed window/unix line ends
240 ends = self.hunkends[nextfileno-1]
241 if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1:
242 warning("inconsistent line ends in patch hunks for %s" % self.source[nextfileno-1])
243 if debugmode:
244 debuglines = dict(ends)
245 debuglines.update(file=self.target[nextfileno-1], hunk=nexthunkno)
246 debug("crlf: %(crlf)d lf: %(lf)d cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines)
247
248 if hunkskip:
249 match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
250 if match:
251 # switch to hunkhead state
252 hunkskip = False
253 hunkhead = True
254 elif line.startswith("--- "):
255 # switch to filenames state
256 hunkskip = False
257 filenames = True
258 if debugmode and len(self.source) > 0:
259 debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
260
261 if filenames:
262 if line.startswith("--- "):
263 if nextfileno in self.source:
264 warning("skipping invalid patch for %s" % self.source[nextfileno])
265 del self.source[nextfileno]
266 # double source filename line is encountered
267 # attempt to restart from this second line
268 re_filename = "^--- ([^\t]+)"
269 match = re.match(re_filename, line)
270 # todo: support spaces in filenames
271 if match:
272 self.source.append(match.group(1).strip())
273 else:
274 warning("skipping invalid filename at line %d" % lineno)
275 # switch back to headscan state
276 filenames = False
277 headscan = True
278 elif not line.startswith("+++ "):
279 if nextfileno in self.source:
280 warning("skipping invalid patch with no target for %s" % self.source[nextfileno])
281 del self.source[nextfileno]
282 else:
283 # this should be unreachable
284 warning("skipping invalid target patch")
285 filenames = False
286 headscan = True
287 else:
288 if nextfileno in self.target:
289 warning("skipping invalid patch - double target at line %d" % lineno)
290 del self.source[nextfileno]
291 del self.target[nextfileno]
292 nextfileno -= 1
293 # double target filename line is encountered
294 # switch back to headscan state
295 filenames = False
296 headscan = True
297 else:
298 re_filename = "^\+\+\+ ([^\t]+)"
299 match = re.match(re_filename, line)
300 if not match:
301 warning("skipping invalid patch - no target filename at line %d" % lineno)
302 # switch back to headscan state
303 filenames = False
304 headscan = True
305 else:
306 self.target.append(match.group(1).strip())
307 nextfileno += 1
308 # switch to hunkhead state
309 filenames = False
310 hunkhead = True
311 nexthunkno = 0
312 self.hunks.append([])
313 self.hunkends.append(lineends.copy())
314 continue
315
316 if hunkhead:
317 match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
318 if not match:
319 if nextfileno-1 not in self.hunks:
320 warning("skipping invalid patch with no hunks for file %s" % self.target[nextfileno-1])
321 # switch to headscan state
322 hunkhead = False
323 headscan = True
324 continue
325 else:
326 # switch to headscan state
327 hunkhead = False
328 headscan = True
329 else:
330 hunkinfo.startsrc = int(match.group(1))
331 hunkinfo.linessrc = 1
332 if match.group(3): hunkinfo.linessrc = int(match.group(3))
333 hunkinfo.starttgt = int(match.group(4))
334 hunkinfo.linestgt = 1
335 if match.group(6): hunkinfo.linestgt = int(match.group(6))
336 hunkinfo.invalid = False
337 hunkinfo.text = []
338
339 hunkactual["linessrc"] = hunkactual["linestgt"] = 0
340
341 # switch to hunkbody state
342 hunkhead = False
343 hunkbody = True
344 nexthunkno += 1
345 continue
346
347 if not hunkskip:
348 warning("patch file incomplete - %s" % filename)
349 # sys.exit(?)
350 else:
351 # duplicated message when an eof is reached
352 if debugmode and len(self.source) > 0:
353 debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
354
b9f25a9 @kumar303 Fixed syntax error that only affects running selftest.py in python 2.3
kumar303 authored
355 info("total files: %d total hunks: %d" % (len(self.source), sum([len(hset) for hset in self.hunks])))
101930a More Py3 fixups: created setup3lib, added pyversion_patching to build…
Alex Stewart authored
356
357
358 def apply(self):
359 """ apply parsed patch """
360
361 total = len(self.source)
362 for fileno, filename in enumerate(self.source):
363
364 f2patch = filename
365 if not exists(f2patch):
366 f2patch = self.target[fileno]
367 if not exists(f2patch):
368 warning("source/target file does not exist\n--- %s\n+++ %s" % (filename, f2patch))
369 continue
370 if not isfile(f2patch):
371 warning("not a file - %s" % f2patch)
372 continue
373 filename = f2patch
374
375 info("processing %d/%d:\t %s" % (fileno+1, total, filename))
376
377 # validate before patching
378 f2fp = open(filename)
379 hunkno = 0
380 hunk = self.hunks[fileno][hunkno]
381 hunkfind = []
382 hunkreplace = []
383 validhunks = 0
384 canpatch = False
385 for lineno, line in enumerate(f2fp):
386 if lineno+1 < hunk.startsrc:
387 continue
388 elif lineno+1 == hunk.startsrc:
389 hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"]
390 hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"]
391 #pprint(hunkreplace)
392 hunklineno = 0
393
394 # todo \ No newline at end of file
395
396 # check hunks in source file
397 if lineno+1 < hunk.startsrc+len(hunkfind)-1:
398 if line.rstrip("\r\n") == hunkfind[hunklineno]:
399 hunklineno+=1
400 else:
401 debug("hunk no.%d doesn't match source file %s" % (hunkno+1, filename))
402 # file may be already patched, but we will check other hunks anyway
403 hunkno += 1
404 if hunkno < len(self.hunks[fileno]):
405 hunk = self.hunks[fileno][hunkno]
406 continue
407 else:
408 break
409
410 # check if processed line is the last line
411 if lineno+1 == hunk.startsrc+len(hunkfind)-1:
412 debug("file %s hunk no.%d -- is ready to be patched" % (filename, hunkno+1))
413 hunkno+=1
414 validhunks+=1
415 if hunkno < len(self.hunks[fileno]):
416 hunk = self.hunks[fileno][hunkno]
417 else:
418 if validhunks == len(self.hunks[fileno]):
419 # patch file
420 canpatch = True
421 break
422 else:
423 if hunkno < len(self.hunks[fileno]):
424 warning("premature end of source file %s at hunk %d" % (filename, hunkno+1))
425
426 f2fp.close()
427
428 if validhunks < len(self.hunks[fileno]):
429 if self._match_file_hunks(filename, self.hunks[fileno]):
430 warning("already patched %s" % filename)
431 else:
432 warning("source file is different - %s" % filename)
433 if canpatch:
434 backupname = filename+".orig"
435 if exists(backupname):
436 warning("can't backup original file to %s - aborting" % backupname)
437 else:
438 import shutil
439 shutil.move(filename, backupname)
440 if self.write_hunks(backupname, filename, self.hunks[fileno]):
441 info("successfully patched %s" % filename)
442 unlink(backupname)
443 else:
444 warning("error patching file %s" % filename)
445 shutil.copy(filename, filename+".invalid")
446 warning("invalid version is saved to %s" % filename+".invalid")
447 # todo: proper rejects
448 shutil.move(backupname, filename)
449
450 # todo: check for premature eof
451
452
453 def can_patch(self, filename):
454 """ Check if specified filename can be patched. Returns None if file can
455 not be found among source filenames. False if patch can not be applied
456 clearly. True otherwise.
457
458 :returns: True, False or None
459 """
460 idx = self._get_file_idx(filename, source=True)
461 if idx == None:
462 return None
463 return self._match_file_hunks(filename, self.hunks[idx])
464
465
466 def _match_file_hunks(self, filepath, hunks):
467 matched = True
468 fp = open(abspath(filepath))
469
470 class NoMatch(Exception):
471 pass
472
473 lineno = 1
474 line = fp.readline()
475 hno = None
476 try:
477 for hno, h in enumerate(hunks):
478 # skip to first line of the hunk
479 while lineno < h.starttgt:
480 if not len(line): # eof
481 debug("check failed - premature eof before hunk: %d" % (hno+1))
482 raise NoMatch
483 line = fp.readline()
484 lineno += 1
485 for hline in h.text:
486 if hline.startswith("-"):
487 continue
488 if not len(line):
489 debug("check failed - premature eof on hunk: %d" % (hno+1))
490 # todo: \ No newline at the end of file
491 raise NoMatch
492 if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"):
493 debug("file is not patched - failed hunk: %d" % (hno+1))
494 raise NoMatch
495 line = fp.readline()
496 lineno += 1
497
498 except NoMatch:
499 matched = False
500 # todo: display failed hunk, i.e. expected/found
501
502 fp.close()
503 return matched
504
505
506 def patch_stream(self, instream, hunks):
507 """ Generator that yields stream patched with hunks iterable
508
509 Converts lineends in hunk lines to the best suitable format
510 autodetected from input
511 """
512
513 # todo: At the moment substituted lineends may not be the same
514 # at the start and at the end of patching. Also issue a
515 # warning/throw about mixed lineends (is it really needed?)
516
517 hunks = iter(hunks)
518
519 srclineno = 1
520
521 lineends = {'\n':0, '\r\n':0, '\r':0}
522 def get_line():
523 """
524 local utility function - return line from source stream
525 collecting line end statistics on the way
526 """
527 line = instream.readline()
528 # 'U' mode works only with text files
529 if line.endswith("\r\n"):
530 lineends["\r\n"] += 1
531 elif line.endswith("\n"):
532 lineends["\n"] += 1
533 elif line.endswith("\r"):
534 lineends["\r"] += 1
535 return line
536
537 for hno, h in enumerate(hunks):
538 debug("hunk %d" % (hno+1))
539 # skip to line just before hunk starts
540 while srclineno < h.startsrc:
541 yield get_line()
542 srclineno += 1
543
544 for hline in h.text:
545 # todo: check \ No newline at the end of file
546 if hline.startswith("-") or hline.startswith("\\"):
547 get_line()
548 srclineno += 1
549 continue
550 else:
551 if not hline.startswith("+"):
552 get_line()
553 srclineno += 1
554 line2write = hline[1:]
555 # detect if line ends are consistent in source file
556 if sum([bool(lineends[x]) for x in lineends]) == 1:
557 newline = [x for x in lineends if lineends[x] != 0][0]
558 yield line2write.rstrip("\r\n")+newline
559 else: # newlines are mixed
560 yield line2write
561
562 for line in instream:
563 yield line
564
565
566 def write_hunks(self, srcname, tgtname, hunks):
567 src = open(srcname, "r")
568 tgt = open(tgtname, "w")
569
570 debug("processing target file %s" % tgtname)
571
572 tgt.writelines(self.patch_stream(src, hunks))
573
574 tgt.close()
575 src.close()
576 return True
577
578
579 def _get_file_idx(self, filename, source=None):
580 """ Detect index of given filename within patch.
581
582 :param filename:
583 :param source: search filename among sources (True),
584 targets (False), or both (None)
585 :returns: int or None
586 """
587 filename = abspath(filename)
588 if source == True or source == None:
589 for i,fnm in enumerate(self.source):
590 if filename == abspath(fnm):
591 return i
592 if source == False or source == None:
593 for i,fnm in enumerate(self.target):
594 if filename == abspath(fnm):
595 return i
596
597
598
599
600 if __name__ == "__main__":
601 from optparse import OptionParser
602 from os.path import exists
603 import sys
604
605 opt = OptionParser(usage="%prog [options] unipatch-file", version="python-patch %s" % __version__)
606 opt.add_option("-d", "--debug", action="store_true", dest="debugmode", help="Print debugging messages")
607 opt.add_option("-q", "--quiet", action="store_true", dest="quiet", help="Only print messages on warning/error")
608 (options, args) = opt.parse_args()
609
610 if not args:
611 opt.print_version()
612 opt.print_help()
613 sys.exit()
614 debugmode = options.debugmode
615 patchfile = args[0]
616 if not exists(patchfile) or not isfile(patchfile):
617 sys.exit("patch file does not exist - %s" % patchfile)
618
619
620 if debugmode:
621 loglevel = logging.DEBUG
622 logformat = "%(levelname)8s %(message)s"
623 elif options.quiet:
624 loglevel = logging.WARN
625 logformat = "%(message)s"
626 else:
627 loglevel = logging.INFO
628 logformat = "%(message)s"
629 logger.setLevel(loglevel)
630 loghandler.setFormatter(logging.Formatter(logformat))
631
632
633
634 patch = fromfile(patchfile)
635 #pprint(patch)
636 patch.apply()
637
638 # todo: document and test line ends handling logic - patch.py detects proper line-endings
639 # for inserted hunks and issues a warning if patched file has incosistent line ends
Something went wrong with that request. Please try again.