# Analysis for synthetic languages

In [None]:
import copy
import itertools
import os
import glob
import time
import sys
import pickle

import numpy as np
import pandas as pd
import json
import seaborn as sns
from collections import defaultdict
from joblib import Parallel, delayed
from tqdm.notebook import tqdm
from numpyencoder import NumpyEncoder

# Workaround so we can re-use the project functions
module_path = os.path.abspath(os.path.join("../"))
if module_path not in sys.path:
    sys.path.append(module_path)

from tpg.dataset import ProgressiveDataset
from tpg.utils.npmi import (
    compute_compositional_ngrams_positionals_npmi,
    compute_compositional_ngrams_integers_npmi,
    compute_non_compositional_npmi,
)
from tpg.utils.dict_utils import default_to_regular

In [None]:
sns.set(palette="pastel")
sns.set_style("whitegrid")
palette = sns.color_palette()

import matplotlib.pylab as pylab

params = {
    "legend.title_fontsize": "32",
    "legend.fontsize": "24",
    "axes.labelsize": "32",
    "axes.titlesize": "32",
    "xtick.labelsize": "22",
    "ytick.labelsize": "26",
}
pylab.rcParams.update(params)

In [None]:
top_ns = [1, 2, 3, 5, 10, 15]
confidences = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

## Create Synthetic Langauge to test measures

In [None]:
rng = np.random.default_rng(42)

example_ds = ProgressiveDataset(
    seed=42,
    dataset_size=10000,
    num_points=60,
    num_distractors=4,
    repeat_chance=0,
    sequence_window=True,
    sequence_window_size=2,
    use_random=True,
    generate_special=None,
)

### Synthetic Compositional Position Variant

In [None]:
cut_inputs = []
messages = []
guesses = []
target_ids = []

# Create random mapping from 2 length ngrams to integers
random_tuples_sp = rng.choice(
    list(itertools.product([x for x in range(2, 26)], repeat=2)), size=60, replace=False
)

for data in example_ds:
    target_ids.append(data[3])
    guesses.append(data[3])
    cut_inputs.append(data[1])

    # Switch between which positional to use
    # here the positionals are position variant
    # We only use r1 and l1 as positions
    if rng.random() < 0.5:
        # For l1 we use 1 in pos 0
        # Unless target id is first pos
        # Then we use r1
        if data[1][0] == -1:
            int_r1 = data[1][1]
            message = np.concatenate((random_tuples_sp[int_r1], [1]))
        else:
            # Get the index of -1
            idx_n1 = np.where(data[1] == -1)[0][0]
            # Find integer to the left of the target
            int_l1 = data[1][idx_n1 - 1]
            message = np.concatenate(([1], random_tuples_sp[int_l1]))
    else:
        # For r1 we use 1 in pos 1
        # Unless target id is last pos
        # Then we use l1
        if data[1][4] == -1:
            int_l1 = data[1][3]
            message = np.concatenate(([1], random_tuples_sp[int_l1]))
        else:
            # Get the index of -1
            idx_n1 = np.where(data[1] == -1)[0][0]
            # Find integer to the right of the target
            int_r1 = data[1][idx_n1 + 1]
            message = np.concatenate((random_tuples_sp[int_r1], [1]))

    messages.append(message)


exchange_dict = {}
for y in range(len(example_ds)):
    exchange_dict[f"exchange_{y}"] = {
        "cut_inputs": cut_inputs[y],
        "message": messages[y],
        "guess": guesses[y],
        "target_id": target_ids[y],
    }

with open(
    os.path.join("./data/synthetic-logs/", "run-test-synthetic_pos-interactions.json"),
    "w",
) as f:
    json.dump(
        exchange_dict,
        f,
        cls=NumpyEncoder,
    )

### Synthetic Compositional Position Invariant

In [None]:
cut_inputs = []
messages = []
guesses = []
target_ids = []

# Create random mapping from 2 length ngrams to integers
random_tuples_spi = rng.choice(
    list(itertools.product([x for x in range(3, 26)], repeat=2)), size=60, replace=False
)

for data in example_ds:
    target_ids.append(data[3])
    guesses.append(data[3])
    cut_inputs.append(data[1])

    # Switch between which positional to use
    # here the positionals are position invariant
    # We only use r1 and l1 as positions
    if rng.random() < 0.5:
        # For l1 we use 1 in any position
        # Unless target id is first pos
        # Then we use r1 - 2 in any position
        if data[1][0] == -1:
            int_r1 = data[1][1]
            if rng.random() < 0.5:
                message = np.concatenate((random_tuples_spi[int_r1], [2]))
            else:
                message = np.concatenate(([2], random_tuples_spi[int_r1]))
        else:
            # Get the index of -1
            idx_n1 = np.where(data[1] == -1)[0][0]
            # Find integer to the left of the target
            int_l1 = data[1][idx_n1 - 1]
            if rng.random() < 0.5:
                message = np.concatenate((random_tuples_spi[int_l1], [1]))
            else:
                message = np.concatenate(([1], random_tuples_spi[int_l1]))
    else:
        # For r1 we use 1 in pos 1
        # Unless target id is last pos
        # Then we use l1
        if data[1][4] == -1:
            int_l1 = data[1][3]
            if rng.random() < 0.5:
                message = np.concatenate((random_tuples_spi[int_l1], [1]))
            else:
                message = np.concatenate(([1], random_tuples_spi[int_l1]))
        else:
            # Get the index of -1
            idx_n1 = np.where(data[1] == -1)[0][0]
            # Find integer to the right of the target
            int_r1 = data[1][idx_n1 + 1]
            if rng.random() < 0.5:
                message = np.concatenate((random_tuples_spi[int_r1], [2]))
            else:
                message = np.concatenate(([2], random_tuples_spi[int_r1]))

    messages.append(message)


exchange_dict = {}
for y in range(len(example_ds)):
    exchange_dict[f"exchange_{y}"] = {
        "cut_inputs": cut_inputs[y],
        "message": messages[y],
        "guess": guesses[y],
        "target_id": target_ids[y],
    }


with open(
    os.path.join(
        "./data/synthetic-logs/", "run-test-synthetic_posinv-interactions.json"
    ),
    "w",
) as f:
    json.dump(
        exchange_dict,
        f,
        cls=NumpyEncoder,
    )

### Synthetic Compositional w Specials

In [None]:
cut_inputs = []
messages = []
guesses = []
target_ids = []

# Create random mapping from 2 length ngrams to integers
random_tuples_spp = rng.choice(
    list(itertools.product([x for x in range(4, 26)], repeat=2)), size=60, replace=False
)
specials = [[0, 0, 0], [1, 1, 1], [-1, -1, -1], [2, 2, 2], [3, 3, 3]]

for data in example_ds:
    target_ids.append(data[3])
    guesses.append(data[3])
    cut_inputs.append(data[1])

    # Switch between which positional to use
    # here the positionals are position invariant
    # We only use l1 as position
    # But we also have specials for begin, begin+1, end etc
    # For l1 we use 4 in any position
    # Unless target id is first pos
    # Then we use r1 - 2 in any position

    if data[1][2] == -1:
        # Find integer to the left of the target
        int_l1 = data[1][1]
        if rng.random() < 0.5:
            message = np.concatenate((random_tuples_spp[int_l1], [4]))
        else:
            message = np.concatenate(([4], random_tuples_spp[int_l1]))
    else:
        idx = np.where(data[1] == -1)[0][0]
        message = copy.deepcopy(specials[idx])

    messages.append(message)

exchange_dict = {}
for y in range(len(example_ds)):
    exchange_dict[f"exchange_{y}"] = {
        "cut_inputs": cut_inputs[y],
        "message": messages[y],
        "guess": guesses[y],
        "target_id": target_ids[y],
    }
with open(
    os.path.join(
        "./data/synthetic-logs/", "run-test-synthetic_pos_spec-interactions.json"
    ),
    "w",
) as f:
    json.dump(
        exchange_dict,
        f,
        cls=NumpyEncoder,
    )

### Synthetic Non-Compositional with Specials

In [None]:
cut_inputs = []
messages = []
guesses = []
target_ids = []

# Create random mapping from 2 length ngrams to integers
random_tuples_nc = rng.choice(
    list(itertools.product([x for x in range(4, 26)], repeat=3)), size=60, replace=False
)
specials = [[0, 0, 0], [1, 1, 1], [-1, -1, -1], [2, 2, 2], [3, 3, 3]]

for data in example_ds:
    target_ids.append(data[3])
    guesses.append(data[3])
    cut_inputs.append(data[1])

    # Switch between which positional to use
    # here the positionals are position invariant
    # We only use l1 as position
    # But we also have specials for begin, begin+1, end etc
    # For l1 we use 4 in any position
    # Unless target id is first pos
    # Then we use r1 - 2 in any position

    if data[1][2] == -1:
        # Find integer to the left of the target
        int_l1 = data[1][1]
        message = copy.deepcopy(random_tuples_nc[int_l1])
    else:
        idx = np.where(data[1] == -1)[0][0]
        message = copy.deepcopy(specials[idx])

    messages.append(message)

exchange_dict = {}
for y in range(len(example_ds)):
    exchange_dict[f"exchange_{y}"] = {
        "cut_inputs": cut_inputs[y],
        "message": messages[y],
        "guess": guesses[y],
        "target_id": target_ids[y],
    }
with open(
    os.path.join(
        "./data/synthetic-logs/", "run-test-synthetic_nc_spec-interactions.json"
    ),
    "w",
) as f:
    json.dump(
        exchange_dict,
        f,
        cls=NumpyEncoder,
    )

## Load synthetic languages

In [None]:
all_files = glob.glob(os.path.join("./data/synthetic-logs/", "*.json"))
li = []
params = []
for filename in tqdm(all_files):
    split = filename.split("-")
    run_id = split[2]
    architecture = split[3]
    params.append([run_id, architecture])
    df = pd.read_json(filename, orient="index")
    for k in [
        "cut_inputs",
        "message",
        "guess",
        "target_id",
    ]:
        df[k] = df[k].apply(lambda x: np.array(x))
    li.append(df)

In [None]:
matches = {
    f"match_{x}": {
        "run_id": params[x][0],
        "architecture": params[x][1],
    }
    for x in range(len(li))
}

In [None]:
for idx, match in enumerate(matches):
    for col in li[idx].columns:
        arr = []
        for x in li[idx][col]:
            arr.append(x)
        arr = np.array(arr)
        matches[match][col] = arr

del li

In [None]:
for match in tqdm(matches):
    guesses = matches[match]["guess"].flatten()
    targets = matches[match]["target_id"].flatten()
    correct = sum(guesses == targets)
    total = len(targets)
    matches[match]["test_len"] = total
    matches[match]["test_acc"] = correct / total

In [None]:
def compute_trwd_stats(match_to_compute) -> (dict, dict):
    tpg_dict = defaultdict(
        lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
    )
    obs_counts_dict = {x: 0 for x in ["begin", "begin+1", "end-1", "end"]}
    for x in range(match_to_compute["test_len"]):
        if "total" not in tpg_dict[f'{match_to_compute["message"][x]}']:
            tpg_dict[f'{match_to_compute["message"][x]}']["total"] = 0
        if "correct" not in tpg_dict[f'{match_to_compute["message"][x]}']:
            tpg_dict[f'{match_to_compute["message"][x]}']["correct"] = 0
        if "indices" not in tpg_dict[f'{match_to_compute["message"][x]}']:
            tpg_dict[f'{match_to_compute["message"][x]}']["indices"] = []

        tpg_dict[f'{match_to_compute["message"][x]}']["total"] += 1
        if match_to_compute["target_id"][x][0] == match_to_compute["guess"][x]:
            tpg_dict[f'{match_to_compute["message"][x]}']["correct"] += 1

        s_obs = match_to_compute["cut_inputs"][x]
        if s_obs[0] == -1:
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["r1"][
                f"{s_obs[1]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["r2"][
                f"{s_obs[2]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["r3"][
                f"{s_obs[3]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["r4"][
                f"{s_obs[4]}"
            ] += 1

            if "begin" not in tpg_dict[f'{match_to_compute["message"][x]}']:
                tpg_dict[f'{match_to_compute["message"][x]}']["begin"] = 0
            tpg_dict[f'{match_to_compute["message"][x]}']["begin"] += 1
            obs_counts_dict["begin"] += 1

        elif s_obs[1] == -1:
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["l1"][
                f"{s_obs[0]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["r1"][
                f"{s_obs[2]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["r2"][
                f"{s_obs[3]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["r3"][
                f"{s_obs[4]}"
            ] += 1

            if "begin+1" not in tpg_dict[f'{match_to_compute["message"][x]}']:
                tpg_dict[f'{match_to_compute["message"][x]}']["begin+1"] = 0
            tpg_dict[f'{match_to_compute["message"][x]}']["begin+1"] += 1
            obs_counts_dict["begin+1"] += 1

        elif s_obs[2] == -1:
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["l1"][
                f"{s_obs[1]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["l2"][
                f"{s_obs[0]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["r1"][
                f"{s_obs[3]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["r2"][
                f"{s_obs[4]}"
            ] += 1
        elif s_obs[3] == -1:
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["l1"][
                f"{s_obs[2]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["l2"][
                f"{s_obs[1]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["l3"][
                f"{s_obs[0]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["r1"][
                f"{s_obs[4]}"
            ] += 1
            if "end-1" not in tpg_dict[f'{match_to_compute["message"][x]}']:
                tpg_dict[f'{match_to_compute["message"][x]}']["end-1"] = 0
            tpg_dict[f'{match_to_compute["message"][x]}']["end-1"] += 1
            obs_counts_dict["end-1"] += 1
        elif s_obs[4] == -1:
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["l1"][
                f"{s_obs[3]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["l2"][
                f"{s_obs[2]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["l3"][
                f"{s_obs[1]}"
            ] += 1
            tpg_dict[f'{match_to_compute["message"][x]}']["obs_neighbours"]["l4"][
                f"{s_obs[0]}"
            ] += 1
            if "end" not in tpg_dict[f'{match_to_compute["message"][x]}']:
                tpg_dict[f'{match_to_compute["message"][x]}']["end"] = 0
            tpg_dict[f'{match_to_compute["message"][x]}']["end"] += 1
            obs_counts_dict["end"] += 1

    for key, v in tpg_dict.items():
        values, counts = np.unique(
            np.array(tpg_dict[key]["indices"]), return_counts=True
        )
        tpg_dict[key]["indices_unq"] = {
            value: countt for value, countt in zip(values, counts)
        }
    return tpg_dict, obs_counts_dict

In [None]:
start_time = time.perf_counter()

results = Parallel(n_jobs=4, verbose=10)(
    delayed(compute_trwd_stats)(match_to_compute=matches[match]) for match in matches
)

for x in range(len(matches)):
    matches[f"match_{x}"]["tpg_stats"] = copy.deepcopy(results[x][0])
    matches[f"match_{x}"]["obs_counts"] = copy.deepcopy(results[x][1])

finish_time = time.perf_counter()
print(f"Computing stats finished in {finish_time-start_time} seconds")
del results

In [None]:
# Calculate the normalised pointwise mutual information for non-compositional messages
# There is some divisions by nans, so we ignore this for this block
np.seterr(divide="ignore", invalid="ignore")

# Check for the top_n integer/pos combinations
for top_n in top_ns:
    for match in tqdm(matches):
        non_compositional_npmi_dict = compute_non_compositional_npmi(
            matches[match], top_n
        )
        matches[match][f"nc_npmi_{top_n}"] = non_compositional_npmi_dict

np.seterr(divide="warn", invalid="warn")

In [None]:
# Generate all n-grams
# We do it the easy way-ish by generating all messages with say starting 1 and checking them all
# "16 20" in "[1 16 20]"
n_grams = defaultdict(dict)
for x in [1, 2, 3]:
    for n_gram in list(itertools.product([x for x in range(26)], repeat=x)):
        n_grams[n_gram]["length"] = x
n_grams = {
    str(n_gram)
    .replace("(", "")
    .replace(")", "")
    .replace(",", ""): n_grams[n_gram]["length"]
    for n_gram in n_grams.keys()
}

In [None]:
start_time = time.perf_counter()

for top_n in top_ns:
    results = Parallel(n_jobs=4, verbose=10)(
        delayed(compute_compositional_ngrams_integers_npmi)(
            match=matches[match], n_grams=n_grams, top_n=top_n
        )
        for match in matches
    )

    for x in range(len(matches)):
        matches[f"match_{x}"][f"ngram_npmi_integers_{top_n}"] = copy.deepcopy(
            results[x][0]
        )
        if "ngrams_pruned" not in matches[f"match_{x}"]:
            matches[f"match_{x}"]["ngrams_pruned"] = copy.deepcopy(results[x][1])

finish_time = time.perf_counter()
print(f"Computing stats finished in {finish_time-start_time} seconds")
del results

In [None]:
start_time = time.perf_counter()

for confidence in confidences:
    for top_n in top_ns:
        results = Parallel(n_jobs=1, verbose=10)(
            delayed(compute_compositional_ngrams_positionals_npmi)(
                match=matches[match],
                n_grams=n_grams,
                confidence=confidence,
                top_n=top_n,
                scale=10,
            )
            for match in matches
        )

        for x in range(len(matches)):
            matches[f"match_{x}"][
                f"ngram_npmi_positionals_{top_n}_{confidence}"
            ] = copy.deepcopy(results[x])

finish_time = time.perf_counter()
print(f"Computing metrics finished in {finish_time-start_time} seconds")
del results

In [None]:
# Find all the messages that are non-compositional
nc_dicts = {}

for top_n in tqdm(top_ns):
    for confidence in confidences:
        non_compositional_message_translation_dict = {}
        for match in matches:
            non_compositional_identified = []
            non_compositional_message_translation_dict[match] = {
                "arch": matches[match]["architecture"],
                "run_id": matches[match]["run_id"],
                "positional_messages": {
                    x: [] for x in ["begin", "begin+1", "end-1", "end"]
                },
                "other_messages": defaultdict(lambda: defaultdict(list)),
            }
            non_compositional_npmi_dict = matches[match][f"nc_npmi_{top_n}"]
            for msg in non_compositional_npmi_dict:
                for special in ["begin", "begin+1", "end-1", "end"]:
                    if non_compositional_npmi_dict[msg][special] >= confidence:
                        # print(f"{msg} is {special} in {match}")
                        non_compositional_identified.append(msg)
                        non_compositional_message_translation_dict[match][
                            "positional_messages"
                        ][special].append(
                            np.fromstring(
                                msg.replace("[", "").replace("]", "").strip(),
                                sep=" ",
                                dtype=np.int8,
                            )
                        )
                        matches[match]["non_compositional_emerged"] = 1
                for pos in non_compositional_npmi_dict[msg]:
                    if pos in ["begin", "begin+1", "end-1", "end"]:
                        continue
                    if non_compositional_npmi_dict[msg][pos]["npmi"] >= confidence:
                        ints = [
                            int(x)
                            for x in non_compositional_npmi_dict[msg][pos]["ints"]
                        ]
                        # print(f"{msg} is {pos} for {ints} in {match}")
                        for x in ints:
                            non_compositional_message_translation_dict[match][
                                "other_messages"
                            ][pos][x].append(
                                np.fromstring(
                                    msg.replace("[", "").replace("]", "").strip(),
                                    sep=" ",
                                    dtype=np.int8,
                                )
                            )

            for msg in non_compositional_identified:
                count = 0
                msg_c = [
                    x
                    for x in msg.replace("[", "").replace("]", "").strip().split(" ")
                    if x
                ]

                if msg_c[0] == msg_c[1] == msg_c[2]:
                    if len(msg_c[0]) == 1:
                        msg_c[0] = msg_c[0].join(
                            " "
                        )  # Make sure 1 is present as 1 not as 11, for example
                    for msg1 in matches[match]["tpg_stats"]:
                        if msg_c[0] in msg1:
                            count += 1
                else:
                    continue
                if count <= 2:
                    matches[match]["non_compositional_reserved_emerged"] = 1

        nc_dicts[
            f"topn_{top_n}-confidence_{confidence}"
        ] = non_compositional_message_translation_dict

In [None]:
# Find all n-grams that may represent some integers
c_dicts = {}
for top_n in tqdm(top_ns):
    for confidence in confidences:
        compositional_message_translation_dict = {}
        for match in matches:
            compositional_message_translation_dict[match] = {
                "arch": matches[match]["architecture"],
                "run_id": matches[match]["run_id"],
                "positional_ngrams": defaultdict(
                    lambda: defaultdict(list)
                ),  # format is {requested_pos_reference: {needed_pos: [ngrams]}}
                "integer_ngrams": defaultdict(
                    lambda: defaultdict(list)
                ),  # format is {requested_int_reference: {needed_pos: [ngrams]}}
            }
            ngram_npmi_integers_dict = matches[match][f"ngram_npmi_integers_{top_n}"]
            ngram_npmi_positionals_dict = matches[match][
                f"ngram_npmi_positionals_{top_n}_{confidence}"
            ]

            if len(ngram_npmi_positionals_dict.keys()) > 1:
                matches[match]["compositional_emerged"] = 1

            for ngram in ngram_npmi_integers_dict:
                ngram_np = np.array([x for x in ngram.split(" ") if x], dtype=np.uint8)
                for pos in ngram_npmi_integers_dict[ngram]:
                    if len(ngram_npmi_integers_dict[ngram][pos]) == 0:
                        continue
                    if ngram_npmi_integers_dict[ngram][pos]["value"] > confidence:
                        for x in ngram_npmi_integers_dict[ngram][pos]["integers"]:
                            compositional_message_translation_dict[match][
                                "integer_ngrams"
                            ][pos][int(x)].append(ngram_np)
            for ngram in ngram_npmi_positionals_dict:
                ngram_np = np.array([x for x in ngram.split(" ")], dtype=np.uint8)
                for pos in ngram_npmi_positionals_dict[ngram]:
                    if len(ngram_npmi_positionals_dict[ngram][pos]) == 0:
                        continue
                    for referent_pos in ngram_npmi_positionals_dict[ngram][pos]:
                        if (
                            ngram_npmi_positionals_dict[ngram][pos][referent_pos]
                            > confidence
                        ):
                            compositional_message_translation_dict[match][
                                "positional_ngrams"
                            ][pos][referent_pos].append(ngram_np)
        c_dicts[
            f"topn_{top_n}-confidence_{confidence}"
        ] = compositional_message_translation_dict

### Save the data
This data is used to debug, as it is very clean.

In [None]:
matches_dict = default_to_regular(matches)
with open("matches_debug.pickle", "wb") as handle:
    pickle.dump(matches_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
nc_dicts = default_to_regular(nc_dicts)
with open("dictionary_nc_debug.pickle", "wb") as handle:
    pickle.dump(
        nc_dicts,
        handle,
        protocol=pickle.HIGHEST_PROTOCOL,
    )

In [None]:
c_dicts = default_to_regular(c_dicts)
with open("dictionary_c_debug.pickle", "wb") as handle:
    pickle.dump(
        c_dicts,
        handle,
        protocol=pickle.HIGHEST_PROTOCOL,
    )