# Advent of Code 2022 with Pandas

* https://adventofcode.com/2022/

In [1]:
import pandas as pd
import numpy as np

def display_data(day, n):
    '''
    Displays the top n lines from day data
    
    Example
    -------
    display_data(1, 10) # displays the top 10 lines from the first day's data
    '''
    with open(f'data/day{day}.txt') as f:
        for _ in range(n):
            print(f.readline(), end='')

# Day 1a

Find max total in a group. Each group is separated by a blank line in the file.

In [2]:
display_data(1, 20)

9524
12618
6755
2121
12876
11330

4029
11446
11571
2683
3454
12411

6226
9901
6735
7602
4045
4643


In [3]:
s = pd.read_csv('data/day1.txt', header=None, 
                skip_blank_lines=False).squeeze()
s.head(20)

0      9524.0
1     12618.0
2      6755.0
3      2121.0
4     12876.0
5     11330.0
6         NaN
7      4029.0
8     11446.0
9     11571.0
10     2683.0
11     3454.0
12    12411.0
13        NaN
14     6226.0
15     9901.0
16     6735.0
17     7602.0
18     4045.0
19     4643.0
Name: 0, dtype: float64

In [7]:
groups = s.isna().cumsum()
groups.head(20)


0     0
1     0
2     0
3     0
4     0
5     0
6     1
7     1
8     1
9     1
10    1
11    1
12    1
13    2
14    2
15    2
16    2
17    2
18    2
19    2
Name: 0, dtype: int64

In [8]:
group_total = s.groupby(groups).sum()
group_total.head()

0
0    55224.0
1    45594.0
2    46528.0
3    44989.0
4    34883.0
Name: 0, dtype: float64

In [9]:
group_total.max()

73211.0

# Day 1b

Total the largest 3 groups

In [11]:
group_total.nlargest(3).sum()

213958.0

## Master Data Analysis with Python

[Comprehensive text on doing data analysis with Pandas][1]

* 500+ exercises
* Video lessons
* Certification exams

[1]: https://dunderdata.com/master-data-analysis-with-python