In [1]:
import difflib

import sys
! {sys.executable} -m pip install -q uxdiff tabulate
import uxdiff
import tabulate

In [2]:
seq1 = [
    ("M27AZ9VW", "01", 1),
    ("7AMJ6KBEJ", "01", 50),
    ("7AMJ6KBEJ", "01", 200),
    ("7AMJ6KBEJ", "02", 300),
    ("M27AZ9VW", "01", 400),
    ("AME4BRWJ", "01", 500),
]
seq2 = [
    ('AME4BRWJ', "10", 500),
    ("M27AZ9VW", "01", 400),
    ("7AMJ6KBEJ", "01", 345),
    ("7AMJ6KBEJ", "01", 150),
]

In [3]:
seq1.sort()
seq2.sort()

[difflib.Differ](https://docs.python.org/3/library/difflib.html#differ-objects)

In [4]:
[diff for diff in difflib.Differ().compare(seq1, seq2)]

["+ ('7AMJ6KBEJ', '01', 150)",
 "+ ('7AMJ6KBEJ', '01', 345)",
 "+ ('AME4BRWJ', '10', 500)",
 "- ('7AMJ6KBEJ', '01', 50)",
 "- ('7AMJ6KBEJ', '01', 200)",
 "- ('7AMJ6KBEJ', '02', 300)",
 "- ('AME4BRWJ', '01', 500)",
 "- ('M27AZ9VW', '01', 1)",
 "  ('M27AZ9VW', '01', 400)"]

`uxdiff.Differ` compare any two sequences.

Each item of the sequence must be hashable and iterable (touple, str, etc.).

In [5]:
diffs2 = [
    diff for diff in uxdiff.Differ().compare(seq1, seq2)
    if diff not in [None, True, False]]
diffs2

[(('>', None, None, 0, ('7AMJ6KBEJ', '01', 150)), None),
 (('>', None, None, 1, ('7AMJ6KBEJ', '01', 345)), None),
 (('>', None, None, 2, ('AME4BRWJ', '10', 500)), None),
 (('<', 0, ('7AMJ6KBEJ', '01', 50), None, None), None),
 (('<', 1, ('7AMJ6KBEJ', '01', 200), None, None), None),
 (('<', 2, ('7AMJ6KBEJ', '02', 300), None, None), None),
 (('<', 3, ('AME4BRWJ', '01', 500), None, None), None),
 (('<', 4, ('M27AZ9VW', '01', 1), None, None), None),
 ((' ', 5, ('M27AZ9VW', '01', 400), 3, ('M27AZ9VW', '01', 400)), None)]

code meaning

| Code | Meaning |
| --- | --- |
| `<` | unique to sequence 1 |
| `>` | unique to sequence 2 |
| ` ` | common to both sequences |
| `\|` | different for both sequences |

Easy to see by formatting with html.

In [6]:
def tableview(diffs):
    return tabulate.tabulate([
        (diff[0][1], diff[0][2],
         diff[0][0],
         diff[0][3], diff[0][4],
         (tabulate.tabulate(
             diff[1], tablefmt='unsafehtml',
             headers=["code", "seq1", "seq2"])
          if diff[1] else '')
        ) for diff in diffs
    ], tablefmt='unsafehtml',
        headers=["idx1", "seq1", "code", "idx2", "seq2", "detail"])

In [7]:
tableview(diffs2)

idx1,seq1,code,idx2,seq2,detail
,,>,0.0,"('7AMJ6KBEJ', '01', 150)",
,,>,1.0,"('7AMJ6KBEJ', '01', 345)",
,,>,2.0,"('AME4BRWJ', '10', 500)",
0.0,"('7AMJ6KBEJ', '01', 50)",<,,,
1.0,"('7AMJ6KBEJ', '01', 200)",<,,,
2.0,"('7AMJ6KBEJ', '02', 300)",<,,,
3.0,"('AME4BRWJ', '01', 500)",<,,,
4.0,"('M27AZ9VW', '01', 1)",<,,,
5.0,"('M27AZ9VW', '01', 400)",,3.0,"('M27AZ9VW', '01', 400)",


Adjust the diff by parameters such as cutoff and fuzzy

In [8]:
diffs3 = [
    diff for diff in uxdiff.Differ(cutoff=0.5, fuzzy=0.2).compare(seq1, seq2)
    if diff not in [None, True, False]]
diffs3

[(('|', 0, ('7AMJ6KBEJ', '01', 50), 0, ('7AMJ6KBEJ', '01', 150)),
  [(' ', ('7AMJ6KBEJ', '01'), ('7AMJ6KBEJ', '01')), ('!', (50,), (150,))]),
 (('|', 1, ('7AMJ6KBEJ', '01', 200), 1, ('7AMJ6KBEJ', '01', 345)),
  [(' ', ('7AMJ6KBEJ', '01'), ('7AMJ6KBEJ', '01')), ('!', (200,), (345,))]),
 (('<', 2, ('7AMJ6KBEJ', '02', 300), None, None), None),
 (('|', 3, ('AME4BRWJ', '01', 500), 2, ('AME4BRWJ', '10', 500)),
  [(' ', ('AME4BRWJ',), ('AME4BRWJ',)),
   ('!', ('01',), ('10',)),
   (' ', (500,), (500,))]),
 (('<', 4, ('M27AZ9VW', '01', 1), None, None), None),
 ((' ', 5, ('M27AZ9VW', '01', 400), 3, ('M27AZ9VW', '01', 400)), None)]

In [9]:
tableview(diffs3)

idx1,seq1,code,idx2,seq2,detail
code,seq1,seq2,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
code,seq1,seq2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
code,seq1,seq2,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3
0,"('7AMJ6KBEJ', '01', 50)",|,0.0,"('7AMJ6KBEJ', '01', 150)","code seq1 seq2 ('7AMJ6KBEJ', '01')('7AMJ6KBEJ', '01') ! (50,) (150,)"
code,seq1,seq2,,,
,"('7AMJ6KBEJ', '01')","('7AMJ6KBEJ', '01')",,,
!,"(50,)","(150,)",,,
1,"('7AMJ6KBEJ', '01', 200)",|,1.0,"('7AMJ6KBEJ', '01', 345)","code seq1 seq2 ('7AMJ6KBEJ', '01')('7AMJ6KBEJ', '01') ! (200,) (345,)"
code,seq1,seq2,,,
,"('7AMJ6KBEJ', '01')","('7AMJ6KBEJ', '01')",,,
!,"(200,)","(345,)",,,
2,"('7AMJ6KBEJ', '02', 300)",<,,,
3,"('AME4BRWJ', '01', 500)",|,2.0,"('AME4BRWJ', '10', 500)","code seq1 seq2 ('AME4BRWJ',)('AME4BRWJ',) ! ('01',) ('10',) (500,) (500,)"

code,seq1,seq2
,"('7AMJ6KBEJ', '01')","('7AMJ6KBEJ', '01')"
!,"(50,)","(150,)"

code,seq1,seq2
,"('7AMJ6KBEJ', '01')","('7AMJ6KBEJ', '01')"
!,"(200,)","(345,)"

code,seq1,seq2
,"('AME4BRWJ',)","('AME4BRWJ',)"
!,"('01',)","('10',)"
,"(500,)","(500,)"


detail code meaning

| Code | Meaning |
| --- | --- |
| `-` | unique to sequence 1 |
| `+` | unique to sequence 2 |
| ` ` | common to both sequences |
| `!` | different for both sequences |

In [10]:
tableview([
    diff for diff in uxdiff.Differ(cutoff=0.7).compare(
        ["bacon", "eggs", "ham", "guido"],
        ["python", "eggy", "hamster", "guido", "van"]
    )
    if diff not in [None, True, False]]
)

idx1,seq1,code,idx2,seq2,detail
code,seq1,seq2,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,bacon,<,,,
,,>,0.0,python,
1,eggs,|,1.0,eggy,code seq1 seq2 egg egg ! s y
code,seq1,seq2,,,
,egg,egg,,,
!,s,y,,,
2,ham,<,,,
,,>,2.0,hamster,
3,guido,,3.0,guido,
,,>,4.0,van,

code,seq1,seq2
,egg,egg
!,s,y
