# difflib — Helpers for computing deltas

* https://docs.python.org/3/library/difflib.html
* https://pymotw.com/3/difflib/

In [1]:
import difflib

The difflib module contains tools for computing and working with differences between sequences. It is especially useful for comparing text, and includes functions that produce reports using several common difference formats.

In [2]:
text1 = """Lorem ipsum dolor sit amet, consectetuer adipiscing
elit. Integer eu lacus accumsan arcu fermentum euismod. Donec
pulvinar porttitor tellus. Aliquam venenatis. Donec facilisis
pharetra tortor.  In nec mauris eget magna consequat
convalis. Nam sed sem vitae odio pellentesque interdum. Sed
consequat viverra nisl. Suspendisse arcu metus, blandit quis,
rhoncus ac, pharetra eget, velit. Mauris urna. Morbi nonummy
molestie orci. Praesent nisi elit, fringilla ac, suscipit non,
tristique vel, mauris. Curabitur vel lorem id nisl porta
adipiscing. Suspendisse eu lectus. In nunc. Duis vulputate
tristique enim. Donec quis lectus a justo imperdiet tempus."""

text1_lines = text1.splitlines()

text2 = """Lorem ipsum dolor sit amet, consectetuer adipiscing
elit. Integer eu lacus accumsan arcu fermentum euismod. Donec
pulvinar, porttitor tellus. Aliquam venenatis. Donec facilisis
pharetra tortor. In nec mauris eget magna consequat
convalis. Nam cras vitae mi vitae odio pellentesque interdum. Sed
consequat viverra nisl. Suspendisse arcu metus, blandit quis,
rhoncus ac, pharetra eget, velit. Mauris urna. Morbi nonummy
molestie orci. Praesent nisi elit, fringilla ac, suscipit non,
tristique vel, mauris. Curabitur vel lorem id nisl porta
adipiscing. Duis vulputate tristique enim. Donec quis lectus a
justo imperdiet tempus.  Suspendisse eu lectus. In nunc."""

text2_lines = text2.splitlines()

In [3]:
d = difflib.Differ()

In [6]:
text1_lines

['Lorem ipsum dolor sit amet, consectetuer adipiscing',
 'elit. Integer eu lacus accumsan arcu fermentum euismod. Donec',
 'pulvinar porttitor tellus. Aliquam venenatis. Donec facilisis',
 'pharetra tortor.  In nec mauris eget magna consequat',
 'convalis. Nam sed sem vitae odio pellentesque interdum. Sed',
 'consequat viverra nisl. Suspendisse arcu metus, blandit quis,',
 'rhoncus ac, pharetra eget, velit. Mauris urna. Morbi nonummy',
 'molestie orci. Praesent nisi elit, fringilla ac, suscipit non,',
 'tristique vel, mauris. Curabitur vel lorem id nisl porta',
 'adipiscing. Suspendisse eu lectus. In nunc. Duis vulputate',
 'tristique enim. Donec quis lectus a justo imperdiet tempus.']

In [7]:
text2_lines

['Lorem ipsum dolor sit amet, consectetuer adipiscing',
 'elit. Integer eu lacus accumsan arcu fermentum euismod. Donec',
 'pulvinar, porttitor tellus. Aliquam venenatis. Donec facilisis',
 'pharetra tortor. In nec mauris eget magna consequat',
 'convalis. Nam cras vitae mi vitae odio pellentesque interdum. Sed',
 'consequat viverra nisl. Suspendisse arcu metus, blandit quis,',
 'rhoncus ac, pharetra eget, velit. Mauris urna. Morbi nonummy',
 'molestie orci. Praesent nisi elit, fringilla ac, suscipit non,',
 'tristique vel, mauris. Curabitur vel lorem id nisl porta',
 'adipiscing. Duis vulputate tristique enim. Donec quis lectus a',
 'justo imperdiet tempus.  Suspendisse eu lectus. In nunc.']

In [4]:
diff = d.compare(text1_lines, text2_lines)

In [5]:
print('\n'.join(diff))

  Lorem ipsum dolor sit amet, consectetuer adipiscing
  elit. Integer eu lacus accumsan arcu fermentum euismod. Donec
- pulvinar porttitor tellus. Aliquam venenatis. Donec facilisis
+ pulvinar, porttitor tellus. Aliquam venenatis. Donec facilisis
?         +

- pharetra tortor.  In nec mauris eget magna consequat
?                 -

+ pharetra tortor. In nec mauris eget magna consequat
- convalis. Nam sed sem vitae odio pellentesque interdum. Sed
?                 - --

+ convalis. Nam cras vitae mi vitae odio pellentesque interdum. Sed
?               +++ +++++   +

  consequat viverra nisl. Suspendisse arcu metus, blandit quis,
  rhoncus ac, pharetra eget, velit. Mauris urna. Morbi nonummy
  molestie orci. Praesent nisi elit, fringilla ac, suscipit non,
  tristique vel, mauris. Curabitur vel lorem id nisl porta
- adipiscing. Suspendisse eu lectus. In nunc. Duis vulputate
- tristique enim. Donec quis lectus a justo imperdiet tempus.
+ adipiscing. Duis vulputate tristique enim. Donec 

In [8]:
diff = difflib.unified_diff(text1_lines, text2_lines, lineterm='')
print('\n'.join(diff))

--- 
+++ 
@@ -1,11 +1,11 @@
 Lorem ipsum dolor sit amet, consectetuer adipiscing
 elit. Integer eu lacus accumsan arcu fermentum euismod. Donec
-pulvinar porttitor tellus. Aliquam venenatis. Donec facilisis
-pharetra tortor.  In nec mauris eget magna consequat
-convalis. Nam sed sem vitae odio pellentesque interdum. Sed
+pulvinar, porttitor tellus. Aliquam venenatis. Donec facilisis
+pharetra tortor. In nec mauris eget magna consequat
+convalis. Nam cras vitae mi vitae odio pellentesque interdum. Sed
 consequat viverra nisl. Suspendisse arcu metus, blandit quis,
 rhoncus ac, pharetra eget, velit. Mauris urna. Morbi nonummy
 molestie orci. Praesent nisi elit, fringilla ac, suscipit non,
 tristique vel, mauris. Curabitur vel lorem id nisl porta
-adipiscing. Suspendisse eu lectus. In nunc. Duis vulputate
-tristique enim. Donec quis lectus a justo imperdiet tempus.
+adipiscing. Duis vulputate tristique enim. Donec quis lectus a
+justo imperdiet tempus.  Suspendisse eu lectus. In nunc.


In [15]:
print('\n'.join(difflib.context_diff(text1_lines, text2_lines)))

*** 

--- 

***************

*** 1,11 ****

  Lorem ipsum dolor sit amet, consectetuer adipiscing
  elit. Integer eu lacus accumsan arcu fermentum euismod. Donec
! pulvinar porttitor tellus. Aliquam venenatis. Donec facilisis
! pharetra tortor.  In nec mauris eget magna consequat
! convalis. Nam sed sem vitae odio pellentesque interdum. Sed
  consequat viverra nisl. Suspendisse arcu metus, blandit quis,
  rhoncus ac, pharetra eget, velit. Mauris urna. Morbi nonummy
  molestie orci. Praesent nisi elit, fringilla ac, suscipit non,
  tristique vel, mauris. Curabitur vel lorem id nisl porta
! adipiscing. Suspendisse eu lectus. In nunc. Duis vulputate
! tristique enim. Donec quis lectus a justo imperdiet tempus.
--- 1,11 ----

  Lorem ipsum dolor sit amet, consectetuer adipiscing
  elit. Integer eu lacus accumsan arcu fermentum euismod. Donec
! pulvinar, porttitor tellus. Aliquam venenatis. Donec facilisis
! pharetra tortor. In nec mauris eget magna consequat
! convalis. Nam cras vitae mi v

In [16]:
from difflib import SequenceMatcher

In [17]:
def show_results(match):
    print('  a    = {}'.format(match.a))
    print('  b    = {}'.format(match.b))
    print('  size = {}'.format(match.size))
    i, j, k = match
    print('  A[a:a+size] = {!r}'.format(A[i:i + k]))
    print('  B[b:b+size] = {!r}'.format(B[j:j + k]))

In [18]:
A = " abcd"
B = "abcd abcd"

print('A = {!r}'.format(A))
print('B = {!r}'.format(B))

A = ' abcd'
B = 'abcd abcd'


In [19]:
print('\nWithout junk detection:')
s1 = SequenceMatcher(None, A, B)
match1 = s1.find_longest_match(0, len(A), 0, len(B))
show_results(match1)


Without junk detection:
  a    = 0
  b    = 4
  size = 5
  A[a:a+size] = ' abcd'
  B[b:b+size] = ' abcd'


In [20]:
print('\nTreat spaces as junk:')
s2 = SequenceMatcher(lambda x: x == " ", A, B)
match2 = s2.find_longest_match(0, len(A), 0, len(B))
show_results(match2)


Treat spaces as junk:
  a    = 1
  b    = 0
  size = 4
  A[a:a+size] = 'abcd'
  B[b:b+size] = 'abcd'



## Compare arbitary types

In [24]:
s1 = [1, 2, 3, 5, 6, 4]
s2 = [2, 3, 5, 4, 6, 1]

print('Initial data:')
print('s1 =', s1)
print('s2 =', s2)
print('s1 == s2:', s1 == s2)
print()

Initial data:
s1 = [1, 2, 3, 5, 6, 4]
s2 = [2, 3, 5, 4, 6, 1]
s1 == s2: False



In [25]:
matcher = difflib.SequenceMatcher(None, s1, s2)
for tag, i1, i2, j1, j2 in reversed(matcher.get_opcodes()):

    if tag == 'delete':
        print('Remove {} from positions [{}:{}]'.format(
            s1[i1:i2], i1, i2))
        print('  before =', s1)
        del s1[i1:i2]

    elif tag == 'equal':
        print('s1[{}:{}] and s2[{}:{}] are the same'.format(
            i1, i2, j1, j2))

    elif tag == 'insert':
        print('Insert {} from s2[{}:{}] into s1 at {}'.format(
            s2[j1:j2], j1, j2, i1))
        print('  before =', s1)
        s1[i1:i2] = s2[j1:j2]

    elif tag == 'replace':
        print(('Replace {} from s1[{}:{}] '
               'with {} from s2[{}:{}]').format(
                   s1[i1:i2], i1, i2, s2[j1:j2], j1, j2))
        print('  before =', s1)
        s1[i1:i2] = s2[j1:j2]

    print('   after =', s1, '\n')

print('s1 == s2:', s1 == s2)

Replace [4] from s1[5:6] with [1] from s2[5:6]
  before = [1, 2, 3, 5, 6, 4]
   after = [1, 2, 3, 5, 6, 1] 

s1[4:5] and s2[4:5] are the same
   after = [1, 2, 3, 5, 6, 1] 

Insert [4] from s2[3:4] into s1 at 4
  before = [1, 2, 3, 5, 6, 1]
   after = [1, 2, 3, 5, 4, 6, 1] 

s1[1:4] and s2[0:3] are the same
   after = [1, 2, 3, 5, 4, 6, 1] 

Remove [1] from positions [0:1]
  before = [1, 2, 3, 5, 4, 6, 1]
   after = [2, 3, 5, 4, 6, 1] 

s1 == s2: True


In [26]:
a = "qabxcd"
b = "abycdf"
s = SequenceMatcher(None, a, b)
for tag, i1, i2, j1, j2 in s.get_opcodes():
    print('{:7}   a[{}:{}] --> b[{}:{}] {!r:>8} --> {!r}'.format(
        tag, i1, i2, j1, j2, a[i1:i2], b[j1:j2]))

delete    a[0:1] --> b[0:0]      'q' --> ''
equal     a[1:3] --> b[0:2]     'ab' --> 'ab'
replace   a[3:4] --> b[2:3]      'x' --> 'y'
equal     a[4:6] --> b[3:5]     'cd' --> 'cd'
insert    a[6:6] --> b[5:6]       '' --> 'f'


In [35]:
a = '<a href="#">Link</a>'
b = '<a href="http://localhost/">Link2</a>'

d = difflib.HtmlDiff()
print()

%html d.make_table(a, b)

%html?




UsageError: Line magic function `%html` not found (But cell magic `%%html` exists, did you mean that instead?).
