# Advent of Code

In [1]:
import pandas as pd
import numpy as np

# Read in a single column of data as a Series in Pandas

* Data stored as a text file of one item per row

* Set `squeeze` to `True`
* Set `header` to `None` if first row has no column name

In [2]:
!head -n 10 data/day1.txt

191
192
201
205
206
203
188
189
199
206


## 1a

In [3]:
s = pd.read_csv('data/day1.txt', header=None).squeeze()
s.diff().gt(0).sum()

1374

## 1b

In [4]:
s.rolling(3).sum().diff().gt(0).sum()

1418

## 2a

In [5]:
df = pd.read_csv('data/day2.txt', header=None, sep=' ', names=['direction', 'amount'])
df.head()

Unnamed: 0,direction,amount
0,forward,3
1,down,7
2,forward,8
3,down,9
4,forward,3


In [6]:
s = df.groupby('direction')['amount'].sum()
s

direction
down       2205
forward    1832
up         1033
Name: amount, dtype: int64

In [7]:
s['forward'] * (s['down'] - s['up'])

2147104

## 2b

In [8]:
# numpy
is_forward = df['direction'] == 'forward'
is_up = df['direction'] == 'up'
aim = np.where(is_forward, 0, 
         np.where(is_up, df['amount'] * -1, df['amount'])).cumsum()

depth = np.where(is_forward, aim * df['amount'], 0).sum()
horiz = df.loc[is_forward, 'amount'].sum()
depth * horiz

2044620088

In [9]:
# pandas
df['aim'] = (df['amount'] * (df['direction'] == 'down') - 
             df['amount'] * (df['direction'] == 'up')).cumsum()
df['depth'] = df['aim'] * df['amount'] * (df['direction'] == 'forward')
df

Unnamed: 0,direction,amount,aim,depth
0,forward,3,0,0
1,down,7,7,0
2,forward,8,7,56
3,down,9,16,0
4,forward,3,16,48
...,...,...,...,...
995,down,9,1155,0
996,down,2,1157,0
997,down,6,1163,0
998,down,9,1172,0


In [10]:
(df['amount'] * (df['direction'] == 'forward')).sum() * df['depth'].sum()

2044620088

## 3a

In [11]:
s = pd.read_csv('data/day3.txt', header=None, dtype='str').squeeze()
df = s.str.split("", expand=True).iloc[:, 1:-1]
df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
0,0,1,1,1,0,1,1,0,1,1,1,0
1,0,1,0,1,1,0,0,0,1,1,0,1
2,1,0,0,1,1,1,0,0,0,1,1,0
3,0,1,1,1,1,0,1,0,1,0,0,0
4,1,0,1,1,0,1,0,0,0,1,0,0


In [12]:
s1 = df.mode().squeeze()
s1

1     0
2     1
3     1
4     1
5     0
6     0
7     1
8     0
9     1
10    1
11    0
12    0
Name: 0, dtype: object

In [13]:
a = int(s1.sum(), base=2)
a

1836

In [14]:
b = int((1 - s1.astype('int')).astype('str').sum(), base=2)
a * b

4147524

## 3b

In [15]:
df = s.str.split('', expand=True).iloc[:, 1:-1].astype('int')
df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
0,0,1,1,1,0,1,1,0,1,1,1,0
1,0,1,0,1,1,0,0,0,1,1,0,1
2,1,0,0,1,1,1,0,0,0,1,1,0
3,0,1,1,1,1,0,1,0,1,0,0,0
4,1,0,1,1,0,1,0,0,0,1,0,0


In [16]:
# one solution
final = 1
for small in (False, True):
    df1 = df
    for col in df1.columns:
        if len(df1) == 1:
            continue
        mode = df1[col].mode()
        if len(mode) == 1:
            mode = mode.iloc[0]
        else:
            mode = 1
        if small:
            mode = 1 - mode
        df1 = df1[df1[col] == mode]
    final *= int(df1.squeeze().astype('str').sum(), base=2)
final

3570354

In [17]:
# another solution
final = 1
for asc in (True, False):
    df1 = df
    for i in range(1, 13):
        vc = df1[i].value_counts().reset_index().sort_values([i, 'index'], ascending=asc)
        val = vc.iloc[0, 0]
        df1 = df1[df1[i] == val]
    final *= int(df1.squeeze().astype('str').sum(), base=2)
final

3570354

## 4a

In [18]:
s = pd.read_csv('data/day4.txt', nrows=1,  header=None).squeeze()
s.head()

0    23
1    30
2    70
3    61
4    79
Name: 0, dtype: int64

In [19]:
df = pd.read_csv('data/day4.txt', skiprows=[0, 1], header=None, skip_blank_lines=True, sep='\s+')
df = df.set_index(df.index // 5)
df

Unnamed: 0,0,1,2,3,4
0,50,98,65,14,47
0,0,22,3,83,46
0,87,93,81,84,58
0,40,35,28,74,48
0,45,99,59,37,64
...,...,...,...,...,...
99,88,62,76,78,95
99,64,65,36,58,22
99,7,21,98,93,42
99,79,99,9,89,10


In [20]:
df1 = df
for num in s:
    df1 = df1.mask(df1 == num)
    is_bingo = df1.isna()
    horiz = is_bingo.all(axis=1)[lambda x: x]
    vert = is_bingo.groupby(level=0).all().any(axis=1)[lambda x: x]
    if len(horiz):
        winner = horiz.index[0]
        break
    if len(vert):
        winner = vert.index[0]
        break
df1.loc[winner].stack().sum() * num    

31424.0

## 4b

In [21]:
%%time
df1 = df
for num in s:
    df1 = df1.mask(df1 == num)
    is_bingo = df1.isna()
    horiz = is_bingo.all(axis=1)[lambda x: x]
    vert = is_bingo.groupby(level=0).all().any(axis=1)[lambda x: x]
    winners = horiz.index.union(vert.index)
    if len(df1) == 5:
        total = df1.stack().sum() * num  
    df1 = df1[~df1.index.isin(winners)]
    if len(df1) == 0:
        break
total

CPU times: user 56.6 ms, sys: 1.98 ms, total: 58.6 ms
Wall time: 57.6 ms


23042.0

## 5a

In [22]:
df = pd.read_csv('data/day5.txt', sep='\D+', 
            header=None, engine='python',
           names=['x1', 'y1', 'x2', 'y2'])
df.head()

Unnamed: 0,x1,y1,x2,y2
0,599,531,599,32
1,435,904,435,489
2,768,714,768,187
3,845,552,596,801
4,167,680,167,445


In [23]:
dfs = []
for a, b in ('xy', 'yx'):
    df_temp = (df.query(f'{a}1 == {a}2')
                 .set_index(f'{a}1')[[f'{b}1', f'{b}2']]
                 .apply(lambda x: range(min(x), max(x) + 1), axis=1)
                 .explode()
                 .reset_index(name=f'{b}1'))
    dfs.append(df_temp)
pd.concat(dfs).value_counts().gt(1).sum()

7644

## 5b

In [24]:
df1 = df.query('abs(x1 - x2) == abs(y1 - y2)')
df1.head()

Unnamed: 0,x1,y1,x2,y2
3,845,552,596,801
6,780,295,179,896
7,310,539,602,831
12,689,815,73,199
16,315,532,773,74


In [25]:
dfs = []
for a, b in ('xy', 'yx'):
    df_temp = (df.query(f'{a}1 == {a}2')
                 .set_index(f'{a}1')[[f'{b}1', f'{b}2']]
                 .apply(lambda x: range(min(x), max(x) + 1), axis=1)
                 .explode()
                 .reset_index(name=f'{b}1'))
    dfs.append(df_temp)

x1 = df1.apply(lambda x: np.linspace(x['x1'], x['x2'], abs(x['x2'] - x['x1']) + 1), axis=1).explode()
y1 = df1.apply(lambda x: np.linspace(x['y1'], x['y2'], abs(x['y2'] - x['y1']) + 1), axis=1).explode()
df_diag = pd.DataFrame({'x1': x1, 'y1': y1}).astype('int')
dfs.append(df_diag)

In [26]:
pd.concat(dfs).value_counts().gt(1).sum()

18627

## 6a and 6b

In [27]:
s = pd.read_csv('data/day6.txt', header=None).squeeze()
s.head()

0    1
1    1
2    3
3    5
4    1
Name: 0, dtype: int64

In [28]:
s_count = s.value_counts()
s_count

1    222
2     24
3     19
5     18
4     17
Name: 0, dtype: int64

Use pandas series like a dictionary

In [29]:
s_count = s.value_counts()
for i in range(80):
    s_count.loc[7] = s_count.get(7, 0) + s_count.get(0, 0)
    s_count.loc[9] = s_count.get(0, 0)
    s_count = s_count.drop(index=0, errors='ignore')
    s_count.index -= 1
s_count.sum()

396210

In [30]:
s_count = s.value_counts()
for i in range(256):
    s_count.loc[7] = s_count.get(7, 0) + s_count.get(0, 0)
    s_count.loc[9] = s_count.get(0, 0)
    s_count = s_count.drop(index=0, errors='ignore')
    s_count.index -= 1
s_count.sum()

1770823541496

In [31]:
s_count = s.value_counts()
for i in range(80):
    s_count.index -= 1
    s_count.loc[6] = s_count.get(6, 0) + s_count.get(-1, 0)
    s_count.loc[8] = s_count.get(-1, 0)
    s_count = s_count.drop(index=-1, errors='ignore')
s_count.sum()

396210

## 7a

In [32]:
s = pd.read_csv('data/day7.txt', header=None).squeeze()
s.head()

0    1101
1       1
2      29
3      67
4    1102
Name: 0, dtype: int64

In [33]:
s.median()

330.0

In [34]:
min_val = 0
fuel = np.inf
for i in range(s.max()):
    cur_fuel = (s - i).abs().sum()
    if cur_fuel < fuel:
        fuel = cur_fuel
        min_val = i
min_val, fuel

(330, 329389)

## 7b

In [35]:
min_val = 0
fuel = np.inf
for i in range(s.max()):
    cur_fuel = (s - i).abs()
    cur_fuel = (cur_fuel * (cur_fuel + 1) / 2).sum()
    if cur_fuel < fuel:
        fuel = cur_fuel
        min_val = i
min_val, fuel

(459, 86397080.0)

## 8a

In [36]:
df = pd.read_csv('data/day8.txt', sep=r' \|? *', 
                 header=None, engine='python')
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,ecbad,fdeacg,gaecbd,gbae,gfcdbea,cadge,fcagdb,abc,cfdbe,ab,beag,bac,dacgbe,aegb
1,gad,agcfb,afegcd,afed,gacdf,gdfce,ad,cfdgbe,cfgdeba,bdcgea,gbdecf,cdgeaf,abcgde,ad
2,ebadf,ag,efgcdab,fgced,edgbcf,begcfa,adgef,gcaedf,afg,dgca,agcd,agdc,fagcbde,gfa
3,bgaefd,gfcbe,fgeda,dbf,dafbgc,dfbge,bd,bdgcaef,dfecga,ebad,dbf,db,edab,dbf
4,ebfcgd,fedbc,adc,da,gafdce,bdaec,bdcegaf,edbafc,bdfa,gbcea,afbcedg,cedbf,decfb,eacbg


In [37]:
df.iloc[:, -4:].stack().str.len().isin([2, 3, 4, 7]).sum().sum()

456

## 8b

In [38]:
s = df.iloc[0, :10].apply(set)
s_len = s.str.len()
d = {}
for i, val in s_len.items():
    if val == 2:
        d[1] = s.pop(i)
    elif val == 3:
        d[7] = s.pop(i)
    elif val == 4:
        d[4] = s.pop(i)
    elif val == 7:
        d[8] = s.pop(i)
middle_l = d[4] - d[1]
bottom_l = d[8] - d[4] - d[7]
capital_e = d[8] - d[1]
almost_0 = bottom_l | d[7]
almost_9 = d[4] | d[7]

for val in s:
    if len(val) == 5:
        if bottom_l.issubset(val):
            d[2] = val
        elif middle_l.issubset(val):
            d[5] = val
        elif d[7].issubset(val):
            d[3] = val
    if len(val) == 6:
        if almost_0.issubset(val):
            d[0] = val
        elif capital_e.issubset(val):
            d[6] = val
        elif almost_9.issubset(val):
            d[9] = val

In [39]:
df.iloc[:, -4:].applymap(set)

Unnamed: 0,10,11,12,13
0,"{a, b, g, e}","{c, a, b}","{c, d, e, b, a, g}","{b, a, g, e}"
1,"{f, c, d, e, b, g}","{f, c, d, e, a, g}","{c, d, e, b, a, g}","{a, d}"
2,"{c, a, g, d}","{c, a, g, d}","{f, c, d, e, a, g, b}","{f, a, g}"
3,"{f, b, d}","{b, d}","{b, a, d, e}","{f, b, d}"
4,"{f, c, d, e, b, a, g}","{f, c, d, e, b}","{f, c, d, e, b}","{c, e, a, g, b}"
...,...,...,...,...
195,"{f, d, e, a, b, g}","{f, e}","{f, b, e}","{f, b, e}"
196,"{f, c, d, e, b, g, a}","{f, c, e, a, g, b}","{c, d, b, a, g}","{a, g, d, e}"
197,"{b, c, g, d}","{f, d, e, b, a}","{f, c, b, a, g}","{f, c, e, a, b, g}"
198,"{c, a, g, e}","{f, c, d, e, g}","{f, c, d, e, a}","{a, c, g, e}"


In [40]:
def code(s):
    s = s.apply(frozenset)
    s1 = s.iloc[:10].copy()
    s_len = s1.str.len()
    d = {}
    for i, val in s_len.items():
        if val == 2:
            d[1] = s1.pop(i)
        elif val == 3:
            d[7] = s1.pop(i)
        elif val == 4:
            d[4] = s1.pop(i)
        elif val == 7:
            d[8] = s1.pop(i)
    middle_l = d[4] - d[1]
    bottom_l = d[8] - d[4] - d[7]
    capital_e = d[8] - d[1]
    almost_0 = bottom_l | d[7]
    almost_9 = d[4] | d[7]

    for val in s1:
        if len(val) == 5:
            if bottom_l.issubset(val):
                d[2] = val
            elif middle_l.issubset(val):
                d[5] = val
            elif d[7].issubset(val):
                d[3] = val
        if len(val) == 6:
            if almost_0.issubset(val):
                d[0] = val
            elif capital_e.issubset(val):
                d[6] = val
            elif almost_9.issubset(val):
                d[9] = val
    d_reverse = {frozenset(val):key for key, val in d.items()}
    code = ''
    for val in s.iloc[-4:]:
        code += str(d_reverse[val])
    return int(code)

In [41]:
df.apply(code, axis=1).sum()

1091609

## 9a

In [42]:
pd.options.display.max_columns = 50
s = pd.read_csv('data/day9.txt', header=None, dtype='str').squeeze()
df = s.str.split('', expand=True).iloc[:, 1:-1].astype('int')
df.columns = range(len(df.columns))
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,...,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99
0,5,4,5,6,7,8,9,3,4,9,8,8,6,4,5,6,8,9,0,1,2,3,9,8,5,...,5,7,8,9,7,7,8,9,9,9,9,8,9,6,5,2,3,4,9,8,7,9,8,9,9
1,4,3,4,9,8,9,1,2,9,8,7,6,5,3,4,8,7,8,9,3,3,9,8,7,5,...,7,8,9,3,5,6,9,9,9,8,8,7,9,9,3,1,9,9,8,7,6,5,6,6,8
2,1,2,9,8,9,1,0,9,8,9,8,7,3,2,3,4,5,9,5,4,9,8,7,6,4,...,8,9,5,4,6,7,8,9,8,7,9,6,8,8,9,9,8,9,7,6,5,4,4,5,7
3,2,9,8,7,9,3,9,8,7,5,4,3,2,1,2,3,4,8,9,9,9,9,9,5,3,...,9,9,6,5,7,8,9,8,7,6,8,5,6,7,8,9,7,8,9,5,4,3,3,4,5
4,9,8,9,6,8,9,9,9,8,4,3,2,1,0,1,2,6,6,8,8,9,9,8,6,5,...,7,8,9,6,8,9,8,7,6,5,9,4,5,6,9,7,6,9,8,7,2,2,2,5,6


In [43]:
mask = ((df.diff().fillna(-1) < 0) & 
        (df.diff(-1).fillna(-1) < 0) & 
        (df.diff(axis=1).fillna(-1) < 0) & 
        (df.diff(-1, axis=1).fillna(-1) < 0))
df.where(mask).stack().add(1).sum()

506.0

## 9b

In [44]:
low_points = df.where(mask).stack().astype('int')
low_points.head()

0  18    0
   38    1
   61    2
1  47    4
   72    1
dtype: int64

In [45]:
def get_next_indexes(cur_idx):
    global known_indexes
    cur_val = df.iloc[cur_idx[0], cur_idx[1]]
    left = cur_idx[0], cur_idx[1] - 1
    right = cur_idx[0], cur_idx[1] + 1
    up = cur_idx[0] - 1, cur_idx[1]
    down = cur_idx[0] + 1, cur_idx[1]
    new_indexes = [left, right, up, down]
    kept_indexes = {idx for idx in new_indexes if idx not in known_indexes and
                                                  idx[0] >= 0 and idx[1] >= 0 and 
                                                  idx[0] < 100 and idx[1] < 100 and
                                                  cur_val < df.iloc[idx[0], idx[1]] < 9}
    known_indexes = known_indexes | kept_indexes

    for idx in kept_indexes:
        get_next_indexes(idx)

In [46]:
basins = []
for cur_idx in low_points.index:
    known_indexes = {cur_idx}
    get_next_indexes(cur_idx)
    basins.append(len(known_indexes))

In [47]:
pd.Series(basins).nlargest(3).prod()

931200

## 10 a and b

In [48]:
s = pd.read_csv('data/day10.txt', header=None).squeeze()
s.iloc[0]

'{<{<{[<[{{([{{{}()}<{}()>}<[{}]>]<<{{}<>}([]<>)>[({}<>){()[]}]>)[[([(){}][<><>])<<()>{()()}>]]}}[(<{[(<>'

In [49]:
left = '([{<'
right = ')]}>'
points = {')': 3, ']': 57, '}': 1197, '>': 25137}
def find_points(chars):
    stack = []
    for char in chars:
        # find corrupted
        if char in left:
            stack.append(char)
        else:
            matched_left = left[right.index(char)]
            if stack.pop() != matched_left:
                return points[char]
    
    # only run for uncomplete
    score = 0
    for char in reversed(stack):
        point = left.index(char) + 1
        score = score * 5 + point
    return -score

In [50]:
s1 = s.apply(find_points)

In [51]:
s1[s1 > 0].sum()

319233

In [52]:
-s1[s1 < 0].median()

1118976874.0