# Semantic Similarity for JSONs

This script is designed to collect **semantic similarity statistics for JSON objects en masse**. This forms a crucial step of our TMK TTM pipeline evaluation - comparing semantic similarity to expert-developed models (treated as a first order approximation of a ground truth).

The script leverages embeddings to compare values recursively, whether they are scalars, arrays, or nested dictionaries, and produces similarity scores at different levels (per-field, symmetric dict comparison, and overall JSON similarity).

In the interest of transparency, this notebook has been written as a **literate program**, with sufficient explanations to cover everything that is going on.

## Key Features
- Recursively compares JSON structures (scalars, arrays, dicts).
- Provides both asymmetric (per-field) and symmetric similarity measures.
- Computes overall similarity by flattening JSONs into sorted strings.

## Dependencies, Part 1
- [**`sentence-transformers`**](https://www.sbert.net/)
  Required for generating embeddings and computing semantic similarity.

## Usage
Follow the usage instructions to replicate. **First**: Install `sentence-transformers`

In [None]:
!pip install sentence-transformers

## Preliminaries
The blocks below simply set up the similarity evaluations and run a toy example as a sanity check.

In [None]:
# OS utilities
import os
# For serialisation and deserialisation of JSON objects
import json
# For semantic embeddings and similarity calculations
from sentence_transformers import SentenceTransformer, util

# Load a pre-trained sentence transformer model (MiniLM variant, efficient and lightweight)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to embed text into a dense vector representation using the model
def embed(text):
  return model.encode(text, convert_to_tensor=True)

# Function to compute cosine similarity between two embeddings and return as a float
def cosine(a, b):
  return float(util.cos_sim(a, b))

# Function to compare two values (scalars, dicts, arrays) recursively
def compare_values(v1, v2):
  '''
  Recursively compare two values (scalars, dicts, arrays).
  Returns a similarity score between 0 and 1.
  '''
  # Handle dicts: if both values are dictionaries, compare them field by field
  if isinstance(v1, dict) and isinstance(v2, dict):
    # return per_field_similarity(v1, v2) # Asymmetric
    return overall_field_similarity(v1, v2)

  # Handle arrays (lists or tuples), treating them as order-independent collections
  if isinstance(v1, (list, tuple)) and isinstance(v2, (list, tuple)):
    # Define helper function to compute directional similarity (from one list to another)
    def directional_similarity(a, b):
      sims = []
      for item1 in a:
        # For each item in list a, find the best similarity match in list b
        best = max(compare_values(item1, item2) for item2 in b) if b else 0.0
        sims.append(best)
      # Return average similarity across items in a
      return sum(sims) / len(sims) if sims else 0.0

    '''
    # Asymmetric
    sims = []
    for item1 in v1:
      # best match for each element in v1
      best = max(compare_values(item1, item2) for item2 in v2) if v2 else 0.0
      sims.append(best)
    return sum(sims) / len(sims) if sims else 0.0
    '''

    # Symmetric: Average both directional similarities (v1→v2 and v2→v1)
    sim1 = directional_similarity(v1, v2)
    sim2 = directional_similarity(v2, v1)
    return (sim1 + sim2) / 2.0

  # Fallback: if values are neither dicts nor arrays, convert to strings and compare via embeddings
  return cosine(embed(str(v1)), embed(str(v2)))

# Asymmetric comparison of dicts: returns similarity per field rather than overall score
def per_field_similarity(j1, j2):
  '''
  Compare per-field similarity for two dicts.
  Returns dict of field -> similarity score.
  '''
  similarities = {}
  # Iterate over union of keys from both dicts
  for key in set(j1.keys()).union(j2.keys()):
    # Retrieve values for each key, defaulting to empty string if missing
    val1 = j1.get(key, '')
    val2 = j2.get(key, '')
    # Compute similarity for this field
    similarities[key] = compare_values(val1, val2)
  return similarities

# Symmetric version: returns a single averaged similarity score across all fields
def overall_field_similarity(j1, j2):
  '''
  Symmetric similarity score for two dicts
  '''
  # Collect all keys present in either dict
  keys = set(j1.keys()).union(j2.keys())
  sims = []
  for key in keys:
    # Retrieve values for each key, defaulting to empty string if missing
    val1, val2 = j1.get(key, ''), j2.get(key, '')
    # Compute similarity for this field and add to list
    sims.append(compare_values(val1, val2))
  # Return average similarity across all fields
  return sum(sims) / len(sims) if sims else 0.0

# Function to compare two JSON objects by flattening them into sorted strings
def overall_similarity(j1, j2):
  '''
  Compare entire JSONs as flattened strings.
  '''
  # Serialise JSON objects into sorted strings (ensuring consistent ordering)
  text1 = json.dumps(j1, ensure_ascii=False, sort_keys=True)
  text2 = json.dumps(j2, ensure_ascii=False, sort_keys=True)
  # Compute cosine similarity between their embeddings
  return cosine(embed(text1), embed(text2))

# ---- Example ----
# Define first example JSON object with tags and metadata
json1 = {
  'tags': [['foo', 'bar'], ['baz']],
  'meta': {'author': 'Jane Doe', 'year': 2021}
}

# Define second example JSON object with slightly different tags and metadata
json2 = {
  'tags': ['allan', ['baz'], ['bar', 'foo']],
  'meta': {'author': 'J. Doe', 'year': 2020}
}

# Print overall similarity score between the two JSONs
print('Overall similarity:', overall_similarity(json1, json2))
# Print per-field similarity scores (dict of field -> similarity)
print('Per-field similarity:', per_field_similarity(json1, json2))
# Print symmetric similarity score across dict fields
print('Dict symmetric similarity:', overall_field_similarity(json1, json2))


## Dependencies, Part 2
- The script assumes JSONs are placed in folders as identified in the directory variables below. Of course, the 'pro gamer move' to upload a large number of files is - upload a zip and unzip them... Which is what the lines below do.

In [None]:
!unzip FallVsGem.zip

In [None]:
!unzip GemSession1vs2.zip

In [None]:
!unzip GemRawVsRefined.zip

# TMK JSON Comparison

Here, we **compare Task, Method, and Knowledge JSON files across two directories**.
It loads JSON content, computes semantic similarity statistics, and prints results in a human-readable format.

## Features
- Loads JSON files safely with UTF-8 encoding.
- Compares files across matching subfolders in two directories.
- Calculates:
  - **Overall similarity** (flattened JSONs).
  - **Per-field similarity** (asymmetric).
  - **Dict-symmetric similarity** (averaged across fields).
- Prints results clearly for each subfolder and file.

## Dependencies
- Python standard libraries: `os`, `json`.
- External dependency: **sentence-transformers** (for embeddings and similarity).

## Usage
1. Set the directory paths (e.g. `Fall25`, `Gem`, `GemSession1`).
2. Call `compare_TMKs(dir1, dir2)` to generate results.
3. Use `print_results(results)` for formatted output.

This provides a straightforward way to **collect and review semantic similarity statistics for TMK JSONs *en masse***.

In [None]:
# Function to load a JSON file from a given path
def load_json(path):
  # Open the file in read mode with UTF-8 encoding
  with open(path, 'r', encoding='utf-8') as f:
    # Parse and return the JSON content
    return json.load(f)

# Function to compare Task, Method, and Knowledge JSON files across two directories
def compare_TMKs(dir1, dir2, filenames=('Task.json', 'Method.json', 'Knowledge.json')):
  # Dictionary to store results for each subfolder
  results = {}

  # Iterate through subfolders in the first directory
  for subfolder in os.listdir(dir1):
    # Construct paths to the corresponding subfolders in both directories
    path1, path2 = os.path.join(dir1, subfolder), os.path.join(dir2, subfolder)

    # The folder doesn't exist
    if not(os.path.isdir(path1) or not os.path.isdir(path2)):
      continue # Skip

    # Initialise results for this subfolder
    results[subfolder] = {}

    # Iterate through the specified filenames (Task, Method, Knowledge)
    for fname in filenames:
      # Construct full paths to the files in both directories
      f1, f2 = os.path.join(path1, fname), os.path.join(path2, fname)

      # If both files exist, proceed with comparison
      if os.path.exists(f1) and os.path.exists(f2):
        print(f'Processing {subfolder}: {fname}')
        # Load JSON content from both files
        j1, j2 = load_json(f1), load_json(f2)

        # Compute similarity statistics and store them
        results[subfolder][fname] = {
          'overall': overall_similarity(j1, j2),          # Flattened JSON similarity
          'per_field': per_field_similarity(j1, j2),      # Asymmetric per-field similarity
          'dict_symmetric': overall_field_similarity(j1, j2) # Symmetric dict-level similarity
        }

  # Return the dictionary of results
  return results

# Example usage
# fall_dir = 'Fall25'
# gem_dir = 'Gem'
# results = compare_TMKs(fall_dir, gem_dir)

# Define two session directories for comparison
# sesh1_dir = 'GemSession1'
# sesh2_dir = 'GemSession1'
# Run comparison and store results
# results = compare_TMKs(sesh1_dir, sesh2_dir)

raw_dir = 'GemRaw'
refined_dir = 'GemRefined'
results = compare_TMKs(raw_dir, refined_dir)

In [None]:
# Function to print similarity results in a readable format
def print_results(results):
  # Iterate through subfolders and their associated files
  for subfolder, files in results.items():
    # Skip folders that have no data
    if (files):
      # Print a header for the subfolder
      print(f'--- {subfolder} ---\n')
    # Iterate through each file and its similarity scores
    for fname, sims in files.items():
      # Print overall similarity score
      print(fname, 'Overall:', sims['overall'])
      # Print per-field similarity scores
      print(fname, 'Per-Field:', sims['per_field'])
      # Print symmetric dict-level similarity score
      print(fname, 'Dict-Symmetric:', sims['dict_symmetric'], end='\n\n')

# Call the function to display results
print_results(results)
# print(results)  # Alternative: print raw results dictionary directly


## Hacker Heaven
The part below is not strictly necessary, we just use it as a convenient shorthand to checkpoint the results above as a string.
**Why**: Running the similarity takes long. Saving the `results` dictionary as a string is effectively persisting the output.

In [None]:
# Hackish!
# We could directly process the `results` dict above,
# but resorting to checkpointing a string to avoid rerunning the loooong semantic similarity pipeline again
fall_vs_gem = '''--- CommonsenseReasoning_v0 ---

Task.json Overall: 0.6476955413818359
Task.json Per-Field: {'tasks': 0.16233648904652165, 'model': 1.0}
Task.json Dict-Symmetric: 0.5811682445232609

Method.json Overall: 0.4847952723503113
Method.json Per-Field: {'assertions': 0.057030193507671356, 'mechanisms': 0.08258705586194992, 'methods': 0.12785858990481266, 'model': 1.0}
Method.json Dict-Symmetric: 0.31686895981860846

Knowledge.json Overall: 0.6162564754486084
Knowledge.json Per-Field: {'triples': 0.09007951617240906, 'model': 1.0, 'concepts': 0.42795910150637706, 'relations': 0.28492992477996726, 'assertions': 0.08744019269943237, 'instances': 0.4941899258815122}
Knowledge.json Dict-Symmetric: 0.39743311017328303

--- Logic_v0 ---

Task.json Overall: 0.8277547359466553
Task.json Per-Field: {'tasks': 0.18396625600843913, 'model': 1.0}
Task.json Dict-Symmetric: 0.5919831280042196

Method.json Overall: 0.8702910542488098
Method.json Per-Field: {'methods': 0.27743084109428473, 'model': 1.0}
Method.json Dict-Symmetric: 0.6387154205471424

Knowledge.json Overall: 0.9093342423439026
Knowledge.json Per-Field: {'triples': 0.0723961666226387, 'model': 1.0, 'concepts': 0.4990130471113401, 'relations': 0.26596947363577783, 'assertions': 0.06056133285164833, 'instances': 0.4153020685791904}
Knowledge.json Dict-Symmetric: 0.3855403481334325

--- CBR_v0 ---

Task.json Overall: 0.8404492139816284
Task.json Per-Field: {'tasks': 0.30223766781337974, 'model': 1.0}
Task.json Dict-Symmetric: 0.6511188339066899

Method.json Overall: 0.8886269330978394
Method.json Per-Field: {'methods': 0.18905411996243018, 'model': 1.0}
Method.json Dict-Symmetric: 0.5945270599812151

Knowledge.json Overall: 0.8096709251403809
Knowledge.json Per-Field: {'model': 1.0, 'concepts': 0.37237340958700293, 'relations': 0.33325539218882716, 'assertions': 0.21521462608749667, 'instances': 0.13203567375118536}
Knowledge.json Dict-Symmetric: 0.41057582032290246

--- LearningByCorrectingMistakes_V0 ---

Task.json Overall: 0.832068920135498
Task.json Per-Field: {'tasks': 0.25527854919404025, 'model': 1.0}
Task.json Dict-Symmetric: 0.6276392745970201

Method.json Overall: 0.9224183559417725
Method.json Per-Field: {'methods': 0.2941076296922024, 'model': 1.0}
Method.json Dict-Symmetric: 0.6470538148461011

Knowledge.json Overall: 0.7235510349273682
Knowledge.json Per-Field: {'Assertions': 0.07258566468954086, 'Instances': 0.04830748215317726, 'concepts': 0.20660338724652927, 'model': 1.0, 'relations': 0.06529095768928528, 'Relations': 0.11508551985025406, 'instances': 0.0456581749022007, 'assertions': 0.12473531812429428}
Knowledge.json Dict-Symmetric: 0.20978331308191023

--- MeansEndAnalysis ---

Task.json Overall: 0.6522446870803833
Task.json Per-Field: {'tasks': 0.17001028084972253, 'model': 1.0}
Task.json Dict-Symmetric: 0.5850051404248613

Method.json Overall: 0.6079543828964233
Method.json Per-Field: {'methods': 0.10736260693520308, 'model': 1.0}
Method.json Dict-Symmetric: 0.5536813034676016

Knowledge.json Overall: 0.5251463651657104
Knowledge.json Per-Field: {'triples': 0.06096800044178963, 'model': 1.0, 'concepts': 0.31591672821059114, 'relations': 0.21198192089796067, 'assertions': 0.0931779146194458, 'instances': 0.33122166444857915}
Knowledge.json Dict-Symmetric: 0.3355443714363944

--- GenerateAndTest_v0 ---

Task.json Overall: 0.7876792550086975
Task.json Per-Field: {'tasks': 0.1983915893168795, 'model': 1.0}
Task.json Dict-Symmetric: 0.5991957946584398

Method.json Overall: 0.8761395215988159
Method.json Per-Field: {'methods': 0.25017813544487055, 'model': 1.0}
Method.json Dict-Symmetric: 0.6250890677224352

Knowledge.json Overall: 0.7958899736404419
Knowledge.json Per-Field: {'Assertions': 0.06668424606323242, 'model': 1.0, 'concepts': 0.2937023502257135, 'relations': 0.38197350377283457, 'assertions': 0.0963362529873848, 'instances': 0.3342375706467363}
Knowledge.json Dict-Symmetric: 0.3621556539493169

--- Classification_V0 ---

Task.json Overall: 0.8388476967811584
Task.json Per-Field: {'tasks': 0.3223594778128678, 'model': 1.0}
Task.json Dict-Symmetric: 0.6611797389064339

Method.json Overall: 0.9201768040657043
Method.json Per-Field: {'methods': 0.36075331077460365, 'model': 1.0}
Method.json Dict-Symmetric: 0.6803766553873019

Knowledge.json Overall: 0.7740333676338196
Knowledge.json Per-Field: {'Assertions': 0.04503213241696358, 'Instances': 0.06209949404001236, 'concepts': 0.2906369284261018, 'model': 1.0, 'relations': 0.040084198117256165, 'Relations': 0.06189362704753876, 'instances': 0.03751624375581741, 'assertions': 0.016099683940410614}
Knowledge.json Dict-Symmetric: 0.1941702884680126

--- ConstraintPropagation_v0 ---

Task.json Overall: 0.7881226539611816
Task.json Per-Field: {'tasks': 0.16573002533211062, 'model': 1.0}
Task.json Dict-Symmetric: 0.5828650126660553

Method.json Overall: 0.8833690285682678
Method.json Per-Field: {'methods': 0.19587581875731258, 'model': 1.0}
Method.json Dict-Symmetric: 0.5979379093786563

Knowledge.json Overall: 0.8591853976249695
Knowledge.json Per-Field: {'concepts': 0.5611145779490471, 'relations': 0.04940391331911087, 'assertions': 0.010218758136034012, 'instances': 0.3506832469018206, 'model': 1.0}
Knowledge.json Dict-Symmetric: 0.3942840992612025

--- Configuration_v0 ---

Task.json Overall: 0.7493712306022644
Task.json Per-Field: {'tasks': 0.07545361912343651, 'model': 1.0}
Task.json Dict-Symmetric: 0.5377268095617183

Method.json Overall: 0.9040258526802063
Method.json Per-Field: {'methods': 0.20865548087638758, 'model': 1.0}
Method.json Dict-Symmetric: 0.6043277404381938

Knowledge.json Overall: 0.7493516206741333
Knowledge.json Per-Field: {'concepts': 0.4878064171100656, 'relations': 0.03558094799518585, 'assertions': -0.010192383080720901, 'instances': 0.1962143410411146, 'model': 1.0}
Knowledge.json Dict-Symmetric: 0.341881864613129

--- AdvancedTopics_v0 ---

Task.json Overall: 0.8848377466201782
Task.json Per-Field: {'tasks': 0.24247537330873978, 'model': 1.0}
Task.json Dict-Symmetric: 0.6212376866543698

Method.json Overall: 0.9027212858200073
Method.json Per-Field: {'methods': 0.2388575258509151, 'model': 1.0}
Method.json Dict-Symmetric: 0.6194287629254576

Knowledge.json Overall: 0.6725671291351318
Knowledge.json Per-Field: {'concepts': 0.36676407728236227, 'relations': 0.030717073008418083, 'assertions': 0.07234449684619904, 'instances': 0.05694669112563133, 'Assertions': 0.08472730219364166, 'Instances': 0.057168349623680115, 'model': 1.0}
Knowledge.json Dict-Symmetric: 0.23838114143999034

--- MetaReasoning_v0 ---

Task.json Overall: 0.8705343008041382
Task.json Per-Field: {'tasks': 0.24631139524376056, 'model': 1.0}
Task.json Dict-Symmetric: 0.6231556976218803

Method.json Overall: 0.9328383803367615
Method.json Per-Field: {'methods': 0.22961368576750119, 'model': 1.0}
Method.json Dict-Symmetric: 0.6148068428837505

Knowledge.json Overall: 0.6306660175323486
Knowledge.json Per-Field: {'concepts': 0.4450383672490716, 'relations': 0.23629748972598463, 'assertions': 0.17493115018541905, 'instances': 0.06636354327201843, 'model': 1.0}
Knowledge.json Dict-Symmetric: 0.38452611008649873

--- ExplanationBasedLearning_V0 ---

Task.json Overall: 0.7537803053855896
Task.json Per-Field: {'tasks': 0.2214094022899945, 'model': 1.0}
Task.json Dict-Symmetric: 0.6107047011449973

Method.json Overall: 0.8820009827613831
Method.json Per-Field: {'methods': 0.2260767374194114, 'model': 1.0}
Method.json Dict-Symmetric: 0.6130383687097057

Knowledge.json Overall: 0.7492403984069824
Knowledge.json Per-Field: {'Assertions': 0.09152036160230637, 'Instances': 0.0497569665312767, 'concepts': 0.3781932262445076, 'model': 1.0, 'relations': 0.08382445573806763, 'Relations': 0.04377033933997154, 'instances': 0.055502213537693024, 'assertions': 0.07520635426044464}
Knowledge.json Dict-Symmetric: 0.22222173965678343

--- Planning_v0 ---

Task.json Overall: 0.9378424882888794
Task.json Per-Field: {'tasks': 0.2515954463881263, 'model': 1.0}
Task.json Dict-Symmetric: 0.6257977231940631

Method.json Overall: 0.8664980530738831
Method.json Per-Field: {'methods': 0.3231190338059284, 'model': 1.0}
Method.json Dict-Symmetric: 0.6615595169029642

Knowledge.json Overall: 0.7083539962768555
Knowledge.json Per-Field: {'concepts': 0.4131838246869544, 'relations': 0.3147808107081801, 'assertions': 0.10905894637107849, 'instances': 0.34570513842388606, 'model': 1.0}
Knowledge.json Dict-Symmetric: 0.4365457440380198

--- Scripts_v0 ---

Task.json Overall: 0.7953665256500244
Task.json Per-Field: {'tasks': 0.250931899413921, 'model': 1.0}
Task.json Dict-Symmetric: 0.6254659497069606

Method.json Overall: 0.8722612857818604
Method.json Per-Field: {'methods': 0.2364580807223802, 'model': 1.0}
Method.json Dict-Symmetric: 0.6182290403611901

Knowledge.json Overall: 0.7707914113998413
Knowledge.json Per-Field: {'Assertions': 0.09574715793132782, 'Instances': 0.14678864181041718, 'concepts': 0.4286331813935322, 'model': 1.0, 'relations': 0.06513336300849915, 'Relations': 0.0715184211730957, 'instances': 0.06332151591777802, 'assertions': 0.07555661350488663}
Knowledge.json Dict-Symmetric: 0.2433373618424421

--- Diagnosis_V0 ---

Task.json Overall: 0.9129812121391296
Task.json Per-Field: {'tasks': 0.320335695256723, 'model': 1.0}
Task.json Dict-Symmetric: 0.6601678476283614

Method.json Overall: 0.9316421151161194
Method.json Per-Field: {'methods': 0.358930254778205, 'model': 1.0}
Method.json Dict-Symmetric: 0.6794651273891025

Knowledge.json Overall: 0.8031787872314453
Knowledge.json Per-Field: {'Assertions': 0.07041376829147339, 'Instances': 0.06757915019989014, 'concepts': 0.2900547048840154, 'model': 1.0, 'relations': 0.055266667157411575, 'Relations': 0.09242470562458038, 'instances': 0.08482105284929276, 'assertions': 0.0950935035943985}
Knowledge.json Dict-Symmetric: 0.21945669407513277

--- SemanticNetworks_v0 ---

Task.json Overall: 0.5904308557510376
Task.json Per-Field: {'tasks': 0.2064295993342158, 'model': 1.0}
Task.json Dict-Symmetric: 0.6032147996671079

Method.json Overall: 0.7561803460121155
Method.json Per-Field: {'methods': 0.25341531939326717, 'model': 1.0}
Method.json Dict-Symmetric: 0.6267076596966336

Knowledge.json Overall: 0.4867182970046997
Knowledge.json Per-Field: {'Assertions': 0.09661585092544556, 'Instances': 0.05846691131591797, 'concepts': 0.32491330923695194, 'model': 1.0, 'relations': 0.01429422851651907, 'Relations': 0.06383401155471802, 'instances': 0.025365319103002548, 'assertions': 0.018457645550370216}
Knowledge.json Dict-Symmetric: 0.20024340952536568

--- VersionSpaces_v0 ---

Task.json Overall: 0.8305861949920654
Task.json Per-Field: {'tasks': 0.18067790284751614, 'model': 1.0}
Task.json Dict-Symmetric: 0.590338951423758

Method.json Overall: 0.8694285750389099
Method.json Per-Field: {'methods': 0.17123906195204996, 'model': 1.0}
Method.json Dict-Symmetric: 0.585619530976025

Knowledge.json Overall: 0.6890217661857605
Knowledge.json Per-Field: {'Assertions': 0.0966392233967781, 'model': 1.0, 'concepts': 0.17960195182822644, 'relations': 0.2604839069147905, 'assertions': 0.044869497418403625, 'instances': 0.23686941216389337}
Knowledge.json Dict-Symmetric: 0.30307733195368197

--- Understanding_v0 ---

Task.json Overall: 0.7406308650970459
Task.json Per-Field: {'tasks': 0.20571807612219664, 'model': 1.0}
Task.json Dict-Symmetric: 0.6028590380610983

Method.json Overall: 0.8565651774406433
Method.json Per-Field: {'methods': 0.22019976089738777, 'model': 1.0}
Method.json Dict-Symmetric: 0.6100998804486939

Knowledge.json Overall: 0.7281324863433838
Knowledge.json Per-Field: {'Assertions': 0.055253710597753525, 'model': 1.0, 'concepts': 0.18502795879418654, 'relations': 0.1995844989626979, 'assertions': 0.1257828176021576, 'instances': 0.22391337198504843}
Knowledge.json Dict-Symmetric: 0.2982603929903073

--- IncrementalConceptLearning_v0 ---

Task.json Overall: 0.857932448387146
Task.json Per-Field: {'tasks': 0.28459260760488, 'model': 1.0}
Task.json Dict-Symmetric: 0.64229630380244

Method.json Overall: 0.878852367401123
Method.json Per-Field: {'methods': 0.2700740698787606, 'model': 1.0}
Method.json Dict-Symmetric: 0.6350370349393804

Knowledge.json Overall: 0.47346535325050354
Knowledge.json Per-Field: {'Assertions': 0.10275313258171082, 'model': 1.0, 'concepts': 0.13328381486159438, 'relations': 0.26291421583543223, 'assertions': 0.02961903251707554, 'instances': 0.17988128206716514}
Knowledge.json Dict-Symmetric: 0.284741912977163

--- Frames_v0 ---

Task.json Overall: 0.6982267498970032
Task.json Per-Field: {'tasks': 0.15037653409776144, 'model': 1.0}
Task.json Dict-Symmetric: 0.5751882670488807

Method.json Overall: 0.8236693739891052
Method.json Per-Field: {'methods': 0.2879519967513391, 'model': 1.0}
Method.json Dict-Symmetric: 0.6439759983756695

Knowledge.json Overall: 0.8127628564834595
Knowledge.json Per-Field: {'concepts': 0.5122229657756786, 'relations': 0.42637101027648894, 'assertions': 0.06773791462182999, 'instances': 0.25881878628085053, 'model': 1.0}
Knowledge.json Dict-Symmetric: 0.4530301353909696

--- ProductionSystems_v0 ---

Task.json Overall: 0.855392336845398
Task.json Per-Field: {'tasks': 0.21848930695227214, 'model': 1.0}
Task.json Dict-Symmetric: 0.6092446534761361

Method.json Overall: 0.8928221464157104
Method.json Per-Field: {'methods': 0.26359209818707063, 'model': 1.0}
Method.json Dict-Symmetric: 0.6317960490935353

Knowledge.json Overall: 0.833121120929718
Knowledge.json Per-Field: {'Assertions': 0.06928500533103943, 'model': 1.0, 'concepts': 0.2701621327549219, 'relations': 0.39173462140994764, 'assertions': 0.09190809726715088, 'instances': 0.3198724789727552}
Knowledge.json Dict-Symmetric: 0.3571603892893025

--- AnalogicalReasoning_v0 ---

Task.json Overall: 0.829503059387207
Task.json Per-Field: {'tasks': 0.32336703230190783, 'model': 1.0}
Task.json Dict-Symmetric: 0.6616835161509539

Method.json Overall: 0.8896791338920593
Method.json Per-Field: {'methods': 0.28656907226481626, 'model': 1.0}
Method.json Dict-Symmetric: 0.6432845361324081

Knowledge.json Overall: 0.6203112602233887
Knowledge.json Per-Field: {'concepts': 0.2165529354531882, 'relations': 0.03972479701042175, 'assertions': 0.04874660074710846, 'instances': 0.04995458200573921, 'model': 1.0, 'Triples': 0.047787755727767944, 'Relations': 0.04100964590907097, 'Assertions': 0.04004677012562752, 'Instances': 0.08865267038345337}
Knowledge.json Dict-Symmetric: 0.17471952859581974

--- RecordingCases_v0 ---

Task.json Overall: 0.7520459890365601
Task.json Per-Field: {'tasks': 0.2837537294697194, 'model': 1.0}
Task.json Dict-Symmetric: 0.6418768647348597

Method.json Overall: 0.8559592366218567
Method.json Per-Field: {'methods': 0.27695760689583565, 'model': 1.0}
Method.json Dict-Symmetric: 0.6384788034479179

Knowledge.json Overall: 0.7547467947006226
Knowledge.json Per-Field: {'triples': 0.03063281439244747, 'model': 1.0, 'concepts': 0.24671389707364139, 'relations': 0.4574677422642708, 'assertions': 0.17173454957082868, 'instances': 0.2822028943124996}
Knowledge.json Dict-Symmetric: 0.3647919829356147'''

In [None]:
sesh1_vs_sesh2 = '''--- MetaReasoning ---

Task.json Overall: 1.0
Task.json Per-Field: {'tasks': 0.800000070532163, 'model': 0.9999998807907104}
Task.json Dict-Symmetric: 0.8999999756614367

Method.json Overall: 1.0
Method.json Per-Field: {'methods': 0.9933334326744079, 'model': 1.0}
Method.json Dict-Symmetric: 0.9966667163372039

Knowledge.json Overall: 1.0000001192092896
Knowledge.json Per-Field: {'relations': 1.0000000132454767, 'instances': 1.0000000596046448, 'model': 1.0, 'concepts': 1.000000011920929, 'assertions': 1.0}
Knowledge.json Dict-Symmetric: 1.00000001695421

--- Logic ---

Task.json Overall: 0.9999999403953552
Task.json Per-Field: {'tasks': 0.8000000556310018, 'model': 0.9999998807907104}
Task.json Dict-Symmetric: 0.8999999682108561

Method.json Overall: 1.0
Method.json Per-Field: {'methods': 0.9933333933353423, 'model': 1.0}
Method.json Dict-Symmetric: 0.9966666966676712

Knowledge.json Overall: 1.0
Knowledge.json Per-Field: {'relations': 1.000000069538752, 'instances': 1.0000000943740208, 'model': 1.0, 'concepts': 1.0000000596046448, 'assertions': 1.0000000596046448}
Knowledge.json Dict-Symmetric: 1.0000000566244125

--- Classification ---

Task.json Overall: 1.0000001192092896
Task.json Per-Field: {'tasks': 0.8000000814596812, 'model': 0.9999998807907104}
Task.json Dict-Symmetric: 0.8999999811251957

Method.json Overall: 1.0
Method.json Per-Field: {'methods': 0.9933334124883015, 'model': 1.0}
Method.json Dict-Symmetric: 0.9966667062441508

Knowledge.json Overall: 1.0
Knowledge.json Per-Field: {'relations': 1.0000000662273831, 'instances': 1.000000068119594, 'model': 1.0, 'concepts': 1.0000000715255737, 'assertions': 1.00000003973643}
Knowledge.json Dict-Symmetric: 1.000000049121796

--- Planning ---

Task.json Overall: 1.0
Task.json Per-Field: {'tasks': 0.8000000437100728, 'model': 0.9999998807907104}
Task.json Dict-Symmetric: 0.8999999622503916

Method.json Overall: 1.000000238418579
Method.json Per-Field: {'methods': 0.9933334323300256, 'model': 1.0}
Method.json Dict-Symmetric: 0.9966667161650128

Knowledge.json Overall: 1.0
Knowledge.json Per-Field: {'relations': 1.000000129143397, 'instances': 1.000000109275182, 'model': 1.0, 'concepts': 1.0000000894069672, 'assertions': 1.0000000794728596}
Knowledge.json Dict-Symmetric: 1.0000000814596812

--- Diagnosis ---

Task.json Overall: 1.0
Task.json Per-Field: {'tasks': 0.8000000516573588, 'model': 0.9999998807907104}
Task.json Dict-Symmetric: 0.8999999662240346

Method.json Overall: 1.0
Method.json Per-Field: {'methods': 0.993333406355646, 'model': 1.0}
Method.json Dict-Symmetric: 0.996666703177823

Knowledge.json Overall: 1.0
Knowledge.json Per-Field: {'relations': 1.0000000794728596, 'instances': 1.0000000953674317, 'model': 1.0, 'concepts': 1.0000000596046448, 'assertions': 1.0000000794728596}
Knowledge.json Dict-Symmetric: 1.0000000627835592

--- Scripts ---

Task.json Overall: 1.0
Task.json Per-Field: {'tasks': 0.8000000675519309, 'model': 0.9999998807907104}
Task.json Dict-Symmetric: 0.8999999741713207

Method.json Overall: 1.0000001192092896
Method.json Per-Field: {'methods': 0.9933333932028876, 'model': 1.0}
Method.json Dict-Symmetric: 0.9966666966014438

Knowledge.json Overall: 1.0
Knowledge.json Per-Field: {'relations': 1.0000000278155008, 'instances': 1.0000000562932756, 'model': 1.0, 'concepts': 1.0000000596046448, 'assertions': 1.0000001192092896}
Knowledge.json Dict-Symmetric: 1.0000000525845423

--- ConstraintPropagation ---

Task.json Overall: 1.0000001192092896
Task.json Per-Field: {'tasks': 0.8000000635782878, 'model': 0.9999998807907104}
Task.json Dict-Symmetric: 0.8999999721844991

Method.json Overall: 1.0000001192092896
Method.json Per-Field: {'methods': 0.9933334002097448, 'model': 1.0}
Method.json Dict-Symmetric: 0.9966667001048723

Knowledge.json Overall: 1.0
Knowledge.json Per-Field: {'relations': 1.0000000529819066, 'instances': 1.000000102179391, 'model': 1.0, 'concepts': 1.0000001192092896, 'assertions': 0.9999999403953552}
Knowledge.json Dict-Symmetric: 1.0000000429531883

--- Frames ---

Task.json Overall: 1.000000238418579
Task.json Per-Field: {'tasks': 0.8000000715255737, 'model': 0.9999998807907104}
Task.json Dict-Symmetric: 0.8999999761581421

Method.json Overall: 0.9999999403953552
Method.json Per-Field: {'methods': 0.9933334013356102, 'model': 1.0}
Method.json Dict-Symmetric: 0.9966667006678052

Knowledge.json Overall: 1.0
Knowledge.json Per-Field: {'relations': 1.0000000546375911, 'instances': 1.0000000782310963, 'model': 1.0, 'concepts': 1.0000000476837159, 'assertions': 1.0000000794728596}
Knowledge.json Dict-Symmetric: 1.0000000520050525

--- CaseBasedReasoning ---

Task.json Overall: 1.0000001192092896
Task.json Per-Field: {'tasks': 0.8000000596046447, 'model': 0.9999998807907104}
Task.json Dict-Symmetric: 0.8999999701976775

Method.json Overall: 1.000000238418579
Method.json Per-Field: {'methods': 0.9933334050765112, 'model': 1.0}
Method.json Dict-Symmetric: 0.9966667025382556

Knowledge.json Overall: 1.0
Knowledge.json Per-Field: {'relations': 1.0000000645716987, 'instances': 1.0000001043081284, 'model': 1.0, 'concepts': 1.0000000655651093, 'assertions': 1.0000001192092896}
Knowledge.json Dict-Symmetric: 1.0000000707308452'''

In [None]:
raw_vs_refined = '''--- Planning ---

Task.json Overall: 1.0
Task.json Per-Field: {'model': 1.0, 'tasks': 0.8000000357627869}
Task.json Dict-Symmetric: 0.9000000178813934

Method.json Overall: 0.9373612403869629
Method.json Per-Field: {'methods': 0.7218209965546719, 'model': 1.0}
Method.json Dict-Symmetric: 0.8609104982773359

Knowledge.json Overall: 0.87784743309021
Knowledge.json Per-Field: {'relations': 0.4190778953023255, 'concepts': 0.5191506831308028, 'assertions': 0.10905894637107849, 'instances': 0.47548044586488813, 'triples': 0.09631796181201935, 'model': 1.0}
Knowledge.json Dict-Symmetric: 0.4365143220801857

--- Logic ---

Task.json Overall: 0.9856604933738708
Task.json Per-Field: {'model': 1.0, 'tasks': 0.7796977771299759}
Task.json Dict-Symmetric: 0.889848888564988

Method.json Overall: 0.9508006572723389
Method.json Per-Field: {'methods': 0.6693888446490561, 'model': 1.0}
Method.json Dict-Symmetric: 0.8346944223245281

Knowledge.json Overall: 0.9455723762512207
Knowledge.json Per-Field: {'relations': 1.0000000049670537, 'concepts': 0.7637254431939278, 'assertions': 0.9999999602635702, 'instances': 0.9442672530810039, 'model': 1.0}
Knowledge.json Dict-Symmetric: 0.941598532301111

--- Classification ---

Task.json Overall: 1.0
Task.json Per-Field: {'model': 1.0, 'tasks': 0.8000000347693761}
Task.json Dict-Symmetric: 0.900000017384688

Method.json Overall: 0.9732804894447327
Method.json Per-Field: {'methods': 0.5552266603628144, 'model': 1.0}
Method.json Dict-Symmetric: 0.7776133301814072

Knowledge.json Overall: 0.8800891041755676
Knowledge.json Per-Field: {'relations': 0.8764857782257929, 'concepts': 0.9120013180587974, 'assertions': 0.6765211621920267, 'instances': 0.6488825847817244, 'model': 1.0}
Knowledge.json Dict-Symmetric: 0.8227781686516682

--- Frames ---

Task.json Overall: 0.6491957902908325
Task.json Per-Field: {'model': 1.0, 'tasks': 0.1436644364998395}
Task.json Dict-Symmetric: 0.5718322182499198

Method.json Overall: 0.7984248399734497
Method.json Per-Field: {'methods': 0.33168469045301485, 'model': 1.0}
Method.json Dict-Symmetric: 0.6658423452265074

Knowledge.json Overall: 0.9609134197235107
Knowledge.json Per-Field: {'relations': 0.8660441771859215, 'concepts': 0.7573215042240917, 'assertions': 1.0000000794728596, 'instances': 0.8790854718536139, 'model': 1.0}
Knowledge.json Dict-Symmetric: 0.9004902465472974

--- Diagnosis ---

Task.json Overall: 0.9140486717224121
Task.json Per-Field: {'model': 1.0, 'tasks': 0.3714115837664933}
Task.json Dict-Symmetric: 0.6857057918832467

Method.json Overall: 0.9316421151161194
Method.json Per-Field: {'model': 1.0, 'methods': 0.35648576050294023}
Method.json Dict-Symmetric: 0.6782428802514702

Knowledge.json Overall: 0.8228217959403992
Knowledge.json Per-Field: {'assertions': 0.0950935035943985, 'instances': 0.08482105284929276, 'relations': 0.055266667157411575, 'Triples': 0.09227254986763, 'Relations': 0.08393321931362152, 'concepts': 0.5662353567779064, 'Assertions': 0.03338649123907089, 'Instances': 0.06165877729654312, 'model': 1.0}
Knowledge.json Dict-Symmetric: 0.23029640201065277'''

# Result String Processing

The last step is to **parse similarity results from text output and summarise them statistically**.
It extracts values using regular expressions, flattens per-field scores, and computes mean and standard deviation for each metric.

## Features
- Captures **Overall**, **Dict-Symmetric**, and **Per-Field** similarity values from raw text.
- Safely evaluates per-field dictionaries using `ast.literal_eval`.
- Aggregates values across multiple files (`Task.json`, `Method.json`, `Knowledge.json`).
- Produces summary statistics (mean and standard deviation).
- Prints results in a clear tabular format.

## Dependencies
- Python standard libraries: `re`, `ast`, `statistics`.

## Usage
1. Pass raw result text to `process_result_string(text)`.
2. Call `print_summary(summary)` to display formatted statistics.

This provides a concise way to **aggregate and review semantic similarity statistics across multiple JSON comparisons**, collecting metrics per component (`Task`, `Method`, and `Knowledge` across lessons).

In [None]:
# Import regular expressions, safe evaluation of Python literals, and statistics functions
import re, ast, statistics

# Compile regex patterns to capture similarity results from text output
overall_pattern = re.compile(r'(\w+\.json) Overall: ([0-9.]+)')
perfield_pattern = re.compile(r'(\w+\.json) Per-Field: ({.*})')
dictsym_pattern = re.compile(r'(\w+\.json) Dict-Symmetric: ([0-9.]+)')

# Initialise data structure to store values for each file and metric
data = {"Task.json": {"Overall": [], "Per-Field": [], "Dict-Symmetric": []},
        "Method.json": {"Overall": [], "Per-Field": [], "Dict-Symmetric": []},
        "Knowledge.json": {"Overall": [], "Per-Field": [], "Dict-Symmetric": []}}

# Function to process raw result strings and extract similarity values
def process_result_string(text):
  # Extract and store overall similarity values
  for match in overall_pattern.finditer(text):
    file, val = match.groups()
    data[file]["Overall"].append(float(val))

  # Extract and store per-field similarity values
  for match in perfield_pattern.finditer(text):
    file, dict_str = match.groups()
    pf = ast.literal_eval(dict_str)  # Safely parse dict string
    # Flatten all per-field values into one list
    data[file]["Per-Field"].extend(pf.values())

  # Extract and store dict-symmetric similarity values
  for match in dictsym_pattern.finditer(text):
    file, val = match.groups()
    data[file]["Dict-Symmetric"].append(float(val))

  # Summarise results with mean and standard deviation
  summary = {}
  for file, vals in data.items():
    summary[file] = {}
    for metric, arr in vals.items():
      if arr:  # Avoid empty lists
        summary[file][metric] = {
          "Mean": statistics.mean(arr),
          "SD": statistics.pstdev(arr)
        }

  return summary

# Function to print summary statistics in tabular format
def print_summary(summary):
  # Print header row
  print(f"{'File':^16}{'Metric':^16}{'Mean':^8}{'SD':^8}")
  print("-" * 48)
  # Print each file’s metrics with mean and standard deviation
  for file, metrics in summary.items():
    for metric, stats in metrics.items():
      mean = f"{stats['Mean']:.4f}"
      sd = f"{stats['SD']:.4f}"
      print(f"{file:<16} {metric:<16}{mean:<8}{sd:<8}")
    print()

# Process results for Fall vs Gem (Raw) comparison
# summary = process_result_string(fall_vs_gem)

# Process results for session comparison
# summary = process_result_string(sesh1_vs_sesh2)

# Process results for Gem Raw vs Refined comparison
summary = process_result_string(raw_vs_refined)

# Print formatted summary table
print_summary(summary)