# Advent of Code Parsers example: 2021 day 8

In [2]:
from pathlib import Path
from pprint import pprint

with (Path(globals()["_dh"][0]) / "input.txt").open() as f:
    raw_data = f.read()

for line in raw_data.splitlines()[:10]:
    print(line)

ecdbfag deacfb acdgb cdg acdbf gdfb efacdg gd cagdbf beacg | cdg dcebgaf gbdf bdacg
fadecg gdbecaf agbfd fgdcb gab ebagdf feabcg deab gdefa ab | adfbg ab fcgdbae bfgecda
cgebad edfagcb fg fedg ebfca gcefb fcedgb dbagcf cgf cdbeg | cfg acfbe bcgdafe dgeafcb
bgcde cbefg gd dbeafc afbcgde bedgca gacd dbg cedba fbegda | agfcebd adgfbce dgb fgceb
gadcbe gcade debfac fdagce egdbf cfedg fbgcade gafc dcf fc | fcga eacfdg gfca fdcbea
dbgcae gdeaf cefga cfa dcbgfa cgfabe cefb cf ebfgcda acgbe | gebacdf fcgdeab bacdge cfeb
fgde efc dacgf cbdgfa fe abdcfe afdbecg gaefdc gcfae abceg | ef agdcbf bfdagec efdg
egfacd bfcdeg ac facbg acbe cfa fbgace fgecb gfdba fbdcgae | ac ca ca fgeacb
acefg dae dfbec abfcedg cfdea dgeafc ad dfag eacgbd bcagfe | dfag gfad dgfa beagfcd
gae cabfgd fbcag ecbgad gfec ge agbfe gdefbca bfeda cfbgea | geabf gecf efgc gafbc


Again, this depents on how you want your formatted input to be.

For a basic case, we might go for a 2-tuple of lists of strings for each line.

In [4]:
from aocp import ListParser, TupleParser

parser = ListParser(TupleParser(ListParser()))

pprint(parser.parse(raw_data)[:10], width=120)

[(['ecdbfag', 'deacfb', 'acdgb', 'cdg', 'acdbf', 'gdfb', 'efacdg', 'gd', 'cagdbf', 'beacg'],
  ['cdg', 'dcebgaf', 'gbdf', 'bdacg']),
 (['fadecg', 'gdbecaf', 'agbfd', 'fgdcb', 'gab', 'ebagdf', 'feabcg', 'deab', 'gdefa', 'ab'],
  ['adfbg', 'ab', 'fcgdbae', 'bfgecda']),
 (['cgebad', 'edfagcb', 'fg', 'fedg', 'ebfca', 'gcefb', 'fcedgb', 'dbagcf', 'cgf', 'cdbeg'],
  ['cfg', 'acfbe', 'bcgdafe', 'dgeafcb']),
 (['bgcde', 'cbefg', 'gd', 'dbeafc', 'afbcgde', 'bedgca', 'gacd', 'dbg', 'cedba', 'fbegda'],
  ['agfcebd', 'adgfbce', 'dgb', 'fgceb']),
 (['gadcbe', 'gcade', 'debfac', 'fdagce', 'egdbf', 'cfedg', 'fbgcade', 'gafc', 'dcf', 'fc'],
  ['fcga', 'eacfdg', 'gfca', 'fdcbea']),
 (['dbgcae', 'gdeaf', 'cefga', 'cfa', 'dcbgfa', 'cgfabe', 'cefb', 'cf', 'ebfgcda', 'acgbe'],
  ['gebacdf', 'fcgdeab', 'bacdge', 'cfeb']),
 (['fgde', 'efc', 'dacgf', 'cbdgfa', 'fe', 'abdcfe', 'afdbecg', 'gaefdc', 'gcfae', 'abceg'],
  ['ef', 'agdcbf', 'bfdagec', 'efdg']),
 (['egfacd', 'bfcdeg', 'ac', 'facbg', 'acbe', 'cfa', 'f

We might also want to sort the strings in each list alphabetically, which is useful for this problem.

In [5]:
from aocp import SortTransform

parser = ListParser(TupleParser(ListParser(SortTransform())))

pprint(parser.parse(raw_data)[:10], width=120)

[(['abcdefg', 'abcdef', 'abcdg', 'cdg', 'abcdf', 'bdfg', 'acdefg', 'dg', 'abcdfg', 'abceg'],
  ['cdg', 'abcdefg', 'bdfg', 'abcdg']),
 (['acdefg', 'abcdefg', 'abdfg', 'bcdfg', 'abg', 'abdefg', 'abcefg', 'abde', 'adefg', 'ab'],
  ['abdfg', 'ab', 'abcdefg', 'abcdefg']),
 (['abcdeg', 'abcdefg', 'fg', 'defg', 'abcef', 'bcefg', 'bcdefg', 'abcdfg', 'cfg', 'bcdeg'],
  ['cfg', 'abcef', 'abcdefg', 'abcdefg']),
 (['bcdeg', 'bcefg', 'dg', 'abcdef', 'abcdefg', 'abcdeg', 'acdg', 'bdg', 'abcde', 'abdefg'],
  ['abcdefg', 'abcdefg', 'bdg', 'bcefg']),
 (['abcdeg', 'acdeg', 'abcdef', 'acdefg', 'bdefg', 'cdefg', 'abcdefg', 'acfg', 'cdf', 'cf'],
  ['acfg', 'acdefg', 'acfg', 'abcdef']),
 (['abcdeg', 'adefg', 'acefg', 'acf', 'abcdfg', 'abcefg', 'bcef', 'cf', 'abcdefg', 'abceg'],
  ['abcdefg', 'abcdefg', 'abcdeg', 'bcef']),
 (['defg', 'cef', 'acdfg', 'abcdfg', 'ef', 'abcdef', 'abcdefg', 'acdefg', 'acefg', 'abceg'],
  ['ef', 'abcdfg', 'abcdefg', 'defg']),
 (['acdefg', 'bcdefg', 'ac', 'abcfg', 'abce', 'acf', 'a

Alternatively, we could treat the strings as sets of characters, which is also useful for the problem.

In [8]:
from aocp import SetParser

parser = ListParser(TupleParser(ListParser(SetParser())))

pprint(parser.parse(raw_data)[:2], width=120)

[([{'f', 'c', 'b', 'd', 'g', 'e', 'a'},
   {'f', 'c', 'b', 'd', 'e', 'a'},
   {'c', 'b', 'd', 'g', 'a'},
   {'c', 'g', 'd'},
   {'f', 'c', 'b', 'd', 'a'},
   {'f', 'g', 'b', 'd'},
   {'f', 'c', 'd', 'g', 'e', 'a'},
   {'g', 'd'},
   {'f', 'c', 'b', 'd', 'g', 'a'},
   {'c', 'b', 'g', 'e', 'a'}],
  [{'c', 'g', 'd'}, {'f', 'c', 'b', 'd', 'g', 'e', 'a'}, {'b', 'g', 'd', 'f'}, {'c', 'b', 'd', 'g', 'a'}]),
 ([{'f', 'c', 'd', 'g', 'e', 'a'},
   {'f', 'c', 'b', 'd', 'g', 'e', 'a'},
   {'f', 'b', 'd', 'g', 'a'},
   {'f', 'c', 'b', 'd', 'g'},
   {'b', 'g', 'a'},
   {'f', 'b', 'd', 'g', 'e', 'a'},
   {'f', 'c', 'b', 'g', 'e', 'a'},
   {'b', 'e', 'd', 'a'},
   {'f', 'd', 'g', 'e', 'a'},
   {'b', 'a'}],
  [{'f', 'b', 'd', 'g', 'a'}, {'b', 'a'}, {'f', 'c', 'b', 'd', 'g', 'e', 'a'}, {'f', 'c', 'b', 'd', 'g', 'e', 'a'}])]
