# --- Day 7: No Space Left On Device ---

Problem statement: See https://adventofcode.com/2022/day/7

My approach:

Examine the terminal output line by line.
Keep track of where you are (path), all the current directories including parents (indirs), and the accumulated size of all discovered directories (sizes).

If the line begins with \$, interpret as a command. There are two recognized commands, cd and ls.
* The cd command can navigate:
  * to the top
   * set the path to /
   * clear the list of parent directories
  * down one level
   * add the directory name to the path and the list of current directories
   * if this is a new directory, add it to the dictionary of discovered directories
  * up one level
   * remove the current path from the list of current directories 
   * remove the current directory from the end of the path
* The ls command is followed by multiple lines of data.
  * Examine the subsequent lines until you reach the end of the input or the next line begins with \$
  * If it's a directory, ignore
  * Otherwise assume it's a file and add the file size to all current directories, including parents. 
  
This builds a dictionary of all directories with their sizes. Cast it to a pandas dataframe to simplify selection and aggregation.


In [1]:
import re
import pandas as pd

sizes = {}   # accumulate sizes of known directories as you navigate the tree
indirs = []  # keep track of all the directories you are in, including ancestors
path = ''
    
with open('data/terminaloutput.txt', 'r') as f:
    term_out = f.read().split('\n')

while len(term_out) > 0:
    line = term_out.pop(0)

    if line.split()[0] == '$':
        if line.split()[1] == 'cd':
            if line.split()[2] == '..':
                indirs.remove(path)
                path = path[:[m.start() for m in re.finditer('/', path)].pop(-2)+1]
            else:
                if line.split()[2] =='/':
                    path = line.split()[2]
                    indirs = [path]
                else:
                    path += line.split()[2] + '/'
                    indirs.append(path)
                if not path in sizes:
                    sizes[path] = 0
                
        elif line.split()[1] == 'ls':
            while (len(term_out) > 0) and (not term_out[0].split()[0] == '$'):
                line = term_out.pop(0)
                if line.split()[0] != 'dir':
                    for d in indirs:
                        sizes[d] += int(line.split()[0])

                        
sizes

{'/': 43441553,
 '/gqcclj/': 860600,
 '/lmtpm/': 22094556,
 '/lmtpm/clffsvcw/': 311933,
 '/lmtpm/dcqnblb/': 1177726,
 '/lmtpm/dcqnblb/dcqnblb/': 209018,
 '/lmtpm/dcqnblb/dcqnblb/zpswzfvg/': 206998,
 '/lmtpm/dcqnblb/gfn/': 277530,
 '/lmtpm/dcqnblb/lpswsp/': 173180,
 '/lmtpm/dcqnblb/lvt/': 215575,
 '/lmtpm/dcqnblb/lvt/hjllwsvl/': 64695,
 '/lmtpm/dcqnblb/lvt/hjllwsvl/wqnc/': 64695,
 '/lmtpm/dcqnblb/lvt/ptbt/': 150880,
 '/lmtpm/dcqnblb/zprprf/': 302423,
 '/lmtpm/dcqnblb/zprprf/ldzslndn/': 238155,
 '/lmtpm/dcqnblb/zprprf/ldzslndn/bwqqsbhg/': 108701,
 '/lmtpm/dcqnblb/zprprf/qftt/': 64268,
 '/lmtpm/dtpwln/': 1924803,
 '/lmtpm/dtpwln/dpwg/': 1440961,
 '/lmtpm/dtpwln/dpwg/pbmf/': 210083,
 '/lmtpm/dtpwln/dpwg/rmpvj/': 466209,
 '/lmtpm/dtpwln/dpwg/scgc/': 182115,
 '/lmtpm/dtpwln/ldzslndn/': 201992,
 '/lmtpm/dtpwln/znnsqqh/': 85635,
 '/lmtpm/fvt/': 699155,
 '/lmtpm/fvt/dcqnblb/': 346098,
 '/lmtpm/fvt/dcqnblb/dcqnblb/': 269901,
 '/lmtpm/fvt/dcqnblb/lbnflwsh/': 76197,
 '/lmtpm/fvt/gnc/': 277193,
 '/

In [2]:
df = pd.DataFrame.from_dict(sizes, orient='index', columns=['size'])
df

Unnamed: 0,size
/,43441553
/gqcclj/,860600
/lmtpm/,22094556
/lmtpm/clffsvcw/,311933
/lmtpm/dcqnblb/,1177726
...,...
/qcq/jjqsmfhr/fqndtlgq/,271528
/qcq/rjbqtrq/,168617
/qcq/vwmpnbts/,523625
/vwqwlqrt/,281998


In [3]:
max100k = df[df['size'] <= 100000]

sum(max100k['size'])

1844187

## --- Part Two ---

In [4]:
capacity = 70000000
update = 30000000
free = capacity - sizes['/']

df.loc[df['size'] >= (update - free), 'size'].min()

4978279