⊕ [6.3. difflib — Helpers for computing deltas — Python 3.6.8 documentation](https://docs.python.org/3.6/library/difflib.html)


In [6]:
from difflib import SequenceMatcher, context_diff
s = SequenceMatcher(None, "abcd", "bcde")
print(s.ratio(), s.quick_ratio(), s.real_quick_ratio())

0.75 0.75 1.0


In [7]:
import sys
s1 = ['bacon\n', 'eggs\n', 'ham\n', 'guido\n']
s2 = ['python\n', 'eggy\n', 'hamster\n', 'guido\n']
sys.stdout.writelines(context_diff(s1, s2, fromfile='before.py', tofile='after.py'))

*** before.py
--- after.py
***************
*** 1,4 ****
! bacon
! eggs
! ham
  guido
--- 1,4 ----
! python
! eggy
! hamster
  guido


In [8]:
from difflib import *
get_close_matches('appel', ['ape', 'apple', 'peach', 'puppy'])

['apple', 'ape']

In [9]:
>>> import keyword
>>> get_close_matches('wheel', keyword.kwlist)

['while']

In [10]:
>>> get_close_matches('accept', keyword.kwlist)

['except']

In [11]:
diff = ndiff('one\ntwo\nthree\n'.splitlines(keepends=True),
             'ore\ntree\nemu\n'.splitlines(keepends=True))
print(''.join(diff), end="")

- one
?  ^
+ ore
?  ^
- two
- three
?  -
+ tree
+ emu


In [12]:
>>> s = SequenceMatcher(None, "abxcd", "abcd")
>>> s.get_matching_blocks()

[Match(a=0, b=0, size=2), Match(a=3, b=2, size=2), Match(a=5, b=4, size=0)]

In [13]:
a = "qabxcd"
b = "abycdf"
s = SequenceMatcher(None, a, b)
for tag, i1, i2, j1, j2 in s.get_opcodes():
    print('{:7}   a[{}:{}] --> b[{}:{}] {!r:>8} --> {!r}'.format(
        tag, i1, i2, j1, j2, a[i1:i2], b[j1:j2]))

delete    a[0:1] --> b[0:0]      'q' --> ''
equal     a[1:3] --> b[0:2]     'ab' --> 'ab'
replace   a[3:4] --> b[2:3]      'x' --> 'y'
equal     a[4:6] --> b[3:5]     'cd' --> 'cd'
insert    a[6:6] --> b[5:6]       '' --> 'f'


In [14]:
s = SequenceMatcher(lambda x: x == " ",
                    "private Thread currentThread;",
                    "private volatile Thread currentThread;")
print(round(s.ratio(), 3))

0.866


In [15]:
text1 = '''  1. Beautiful is better than ugly.
  2. Explicit is better than implicit.
  3. Simple is better than complex.
  4. Complex is better than complicated.
'''.splitlines(keepends=True)
text2 = '''  1. Beautiful is better than ugly.
  3.   Simple is better than complex.
  4. Complicated is better than complex.
  5. Flat is better than nested.
'''.splitlines(keepends=True)

d = Differ()
result = list(d.compare(text1, text2))

from pprint import pprint
pprint(result)

['    1. Beautiful is better than ugly.\n',
 '-   2. Explicit is better than implicit.\n',
 '-   3. Simple is better than complex.\n',
 '+   3.   Simple is better than complex.\n',
 '?     ++\n',
 '-   4. Complex is better than complicated.\n',
 '?            ^                     ---- ^\n',
 '+   4. Complicated is better than complex.\n',
 '?           ++++ ^                      ^\n',
 '+   5. Flat is better than nested.\n']


In [16]:
>>> import sys
>>> sys.stdout.writelines(result)

    1. Beautiful is better than ugly.
-   2. Explicit is better than implicit.
-   3. Simple is better than complex.
+   3.   Simple is better than complex.
?     ++
-   4. Complex is better than complicated.
?            ^                     ---- ^
+   4. Complicated is better than complex.
?           ++++ ^                      ^
+   5. Flat is better than nested.
