In [1]:
# load data
import json

with open("./tests/pairs_20230731.json", "r") as f:
    data = json.load(f)
len(data)

1954

In [2]:
# inspect a pair of compound lists
from google.protobuf import json_format
from ord_schema.proto import reaction_pb2
from ord_diff.schema import MDictDiff, MDictListDiff, MessageType

# select the pair of reactions
i = 22
reaction_id, reaction_string_1, reaction_string_2 = data[i]

# convert them from json to messages
reaction_1 = json_format.Parse(reaction_string_1, reaction_pb2.Reaction())
reaction_2 = json_format.Parse(reaction_string_2, reaction_pb2.Reaction())

# find the list of compound messages
compound_list_1 = []
for ri in reaction_1.inputs.values():
    compound_list_1.extend(ri.components)
compound_list_2 = []
for ri in reaction_2.inputs.values():
    compound_list_2.extend(ri.components)
diff = MDictListDiff.from_message_list_pair(
    m1_list=compound_list_1,
    m2_list=compound_list_2,
    message_type=MessageType.COMPOUND,
)
print("excess compounds in list #2:", diff.n_excess)
print("absent compounds in list #1:", diff.n_absent)
print("altered compounds (based on list #1):", diff.n_changed)

excess compounds in list #2: 0
absent compounds in list #1: 0
altered compounds (based on list #1): 2


In [3]:
from ord_diff.report import report_diff_list
df = report_diff_list(diff, message_type=MessageType.COMPOUND)
df

Unnamed: 0,from,path,change_type,leaf_type,pair_index
0,m1,identifiers.0.type,,CompoundLeafType.identifiers,0
1,m1,identifiers.0.value,,CompoundLeafType.identifiers,0
2,m1,amount.mass.value,,CompoundLeafType.amount,0
3,m1,amount.mass.units,,CompoundLeafType.amount,0
4,m1,reactionRole,,CompoundLeafType.reaction_role,0
5,m1,identifiers.0.type,,CompoundLeafType.identifiers,1
6,m1,identifiers.0.value,,CompoundLeafType.identifiers,1
7,m1,amount.volume.value,,CompoundLeafType.amount,1
8,m1,amount.volume.units,,CompoundLeafType.amount,1
9,m1,reactionRole,DeltaType.ALTERATION,CompoundLeafType.reaction_role,1


In [4]:
# we can also directly compare two reactions
# this may not be very useful: in `Reaction.inputs` the keys are arbitrary assigned
from ord_diff.report import report_diff
diff = MDictDiff.from_message_pair(reaction_1, reaction_2, message_type=MessageType.REACTION)
df = report_diff(diff, message_type=MessageType.REACTION)
df

Unnamed: 0,from,path,change_type
0,m1,inputs.m1_m4.components.0.identifiers.0.type,DeltaType.REMOVAL
1,m1,inputs.m1_m4.components.0.identifiers.0.value,DeltaType.REMOVAL
2,m1,inputs.m1_m4.components.0.amount.mass.value,DeltaType.REMOVAL
3,m1,inputs.m1_m4.components.0.amount.mass.units,DeltaType.REMOVAL
4,m1,inputs.m1_m4.components.0.reactionRole,DeltaType.REMOVAL
5,m1,inputs.m1_m4.components.1.identifiers.0.type,DeltaType.REMOVAL
6,m1,inputs.m1_m4.components.1.identifiers.0.value,DeltaType.REMOVAL
7,m1,inputs.m1_m4.components.1.amount.volume.value,DeltaType.REMOVAL
8,m1,inputs.m1_m4.components.1.amount.volume.units,DeltaType.REMOVAL
9,m1,inputs.m1_m4.components.1.reactionRole,DeltaType.REMOVAL


In [5]:
# compare two workup lists
diff = MDictListDiff.from_message_list_pair(reaction_1.workups, reaction_2.workups, message_type=MessageType.REACTION_WORKUP)

df = report_diff_list(diff, message_type=MessageType.REACTION_WORKUP)
print(reaction_1.workups)
print(reaction_2.workups)
df


[type: CUSTOM
, type: FILTRATION
]
[type: CUSTOM
, type: FILTRATION
]


Unnamed: 0,from,path,change_type,pair_index
0,m1,type,,0
1,m1,type,,1
