# BEN Tutorial

This tutorial copies almost directly from the documentation, it is recommended that
the user use this only for reference or if they just want to validate that the module
is operating properly without having to copy-paste.


Note: You will have to download the relevant files in order to run this.

In [1]:
from gerrytools.ben import *

### Compression

In [2]:
ben(
    mode="encode",
    input_file_path="./small_example.jsonl",
)

Running container ben_runner
Pulling Docker image mgggdev/replicate:v0.2
Running in encode mode
Encoding line: 6
Done!


In [3]:
ben(
    mode="x-encode",
    input_file_path="./small_example.jsonl.ben",
)

Running container ben_runner
Pulling Docker image mgggdev/replicate:v0.2
Running in xencode mode
Encoding line: 6
Done!


In [4]:
ben(
    mode="decode",
    input_file_path="./small_example.jsonl.ben",
    output_file_path="./re_small_example.jsonl",
)

Running container ben_runner
Pulling Docker image mgggdev/replicate:v0.2
Running in decode mode
Decoding sample: 6
Done!


In [5]:
ben(
    mode="x-decode",
    input_file_path="./small_example.jsonl.xben",
    output_file_path="./re_small_example_v2.jsonl",
)

Running container ben_runner
Pulling Docker image mgggdev/replicate:v0.2
Running in x-decode mode
Decoding sample: 6
Done!


In [6]:
ben(
    mode="xz-compress",
    input_file_path="./small_example.jsonl",
    output_file_path="./compressed_small_example.jsonl.xz",
)

Running container ben_runner
Pulling Docker image mgggdev/replicate:v0.2
Running in xz compress mode
File "/home/ben/io/compressed_small_example.jsonl.xz" already exists, do you want to overwrite it? (y/[n]): 

In [7]:
ben(
    mode="xz-decompress",
    input_file_path="./compressed_small_example.jsonl.xz",
    output_file_path="./decompressed_small_example.jsonl",
)

Running container ben_runner
Pulling Docker image mgggdev/replicate:v0.2
Running in xz decompress mode
File "/home/ben/io/decompressed_small_example.jsonl" already exists, do you want to overwrite it? (y/[n]): 


### Improving Compression via Relabeling

In [8]:
ben(
    mode="decode",
    input_file_path="100k_CO_chain.jsonl.xben"
)

Running container ben_runner
Pulling Docker image mgggdev/replicate:v0.2
Running in decode mode
Decoding sample: 100002
Done!


In [9]:
canonicalize_ben_file(
    input_file_path="100k_CO_chain.jsonl.ben"
)

Pulling Docker image mgggdev/replicate:v0.2
Running container reben_runner
Canonicalizing assignment vectors in ben file.
Relabeling line: 100002
Done!


In [10]:
relabel_json_file_by_key(
    dual_graph_path="CO_small.json",
    key="GEOID20",
    # uncomment the next line if you are running this in a Jupyter Notebook
    verbose=False
)

Pulling Docker image mgggdev/replicate:v0.2
Running container reben_runner


In [11]:
relabel_ben_file_with_map(
    input_file_path="100k_CO_chain_canonicalized_assignments.jsonl.ben",
    map_file_path="CO_small_sorted_by_GEOID20_map.json"
)

Pulling Docker image mgggdev/replicate:v0.2
Running container reben_runner
Relabeling ben file according to map file /home/ben/io/CO_small_sorted_by_GEOID20_map.json
Relabeling line: 100002
Done!


In [12]:
ben(
    mode="x-encode",
    input_file_path="100k_CO_chain_canonicalized_assignments_sorted_by_GEOID20.jsonl.ben"
)

Running container ben_runner
Pulling Docker image mgggdev/replicate:v0.2
Running in xencode mode
Encoding line: 100002
Done!


### Parsing Forest Recom and SMC Output

In [13]:
import json

with open("./NC_pct21/42_atlas_gamma0.0_10.jsonl") as f:
    for i, line in enumerate(f):
        if i == 2:
            print(json.loads(line)["levels in graph"])
            break

['county', 'prec_id']


In [14]:
msms_parse(
    mode="standard_jsonl",
    region="county",
    subregion="prec_id",
    dual_graph_path="./NC_pct21.json",
    input_file_path="./NC_pct21/42_atlas_gamma0.0_10.jsonl",
    output_file_path="./NC_pct21/42_atlas_gamma0.0_10_standardized.jsonl"
)

Pulling Docker image mgggdev/replicate:v0.2
Running container parse_msms_runner
Reading dual-graph shapefile
Processing sample 10
Done!


In [15]:
smc_parse(
    mode="standard_jsonl",
    input_file_path="./4x4_grid/SMC_42_29_assignments.csv",
    output_file_path="./4x4_grid/SMC_42_29.jsonl"
)

Pulling Docker image mgggdev/replicate:v0.2
Running container parse_msms_runner
Processing sample 29
Done!


### Replaying a Chain

In [18]:
from gerrychain import Graph, Partition
from gerrychain.updaters import Tally

graph = Graph.from_json("CO_small.json")
def pop_tally(graph, new_assignment):
    partition = Partition(
        graph=graph,
        assignment=new_assignment,
        updaters={
            "population": Tally("TOTPOP20", alias="population"),
        }
    )
    return partition["population"]

In [19]:
for i, assignment in enumerate(ben_replay("100k_CO_chain.jsonl.ben")):
    print(pop_tally(graph, assignment))
    if i > 9:
        break

Running container ben_runner
Pulling Docker image mgggdev/replicate:v0.2
{8: 721664, 5: 721714, 4: 721794, 3: 721730, 2: 721720, 6: 721681, 1: 721714, 7: 721697}
{8: 721664, 5: 721714, 4: 721794, 3: 721730, 2: 721720, 6: 721681, 1: 721714, 7: 721697}
{1: 715120, 5: 721714, 4: 721794, 3: 721730, 2: 721720, 8: 728258, 6: 721681, 7: 721697}
{1: 715120, 5: 721714, 4: 721794, 3: 721730, 2: 721720, 8: 728258, 6: 721681, 7: 721697}
{1: 715120, 5: 721714, 8: 722299, 3: 721730, 2: 721720, 4: 727753, 6: 721681, 7: 721697}
{1: 715120, 5: 721714, 8: 722299, 3: 721730, 2: 721720, 4: 727753, 6: 721681, 7: 721697}
{1: 715120, 5: 721714, 8: 722299, 3: 721730, 2: 721720, 4: 727753, 6: 721681, 7: 721697}
{1: 715120, 5: 721714, 8: 722299, 2: 737959, 3: 705491, 4: 727753, 6: 721681, 7: 721697}
{1: 715120, 5: 721714, 8: 722299, 2: 737959, 3: 705491, 4: 727753, 6: 721681, 7: 721697}
{1: 715120, 5: 721714, 8: 722299, 2: 737959, 3: 705491, 4: 727753, 6: 721681, 7: 721697}
{1: 715120, 5: 721714, 8: 722299, 2: 