# Advent of Code Day 1

In [1]:
## Set notebook to auto reload updated modules
from __future__ import annotations

%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import json

import pandas as pd

In [3]:
def display_df_without_index(df: pd.DataFrame, head: int | None = None, tail: int | None = None) -> None:
    """Display a Pandas DataFrame in a Jupyter notebook, without the DataFrame index column.

    Params:
        df (pandas.DataFrame): The Pandas DataFrame to print without the index column.

    Returns:
        None
    Raises:
        ValueError: When both `head` and `tail` values are passed.

    """
    if head and tail:
        raise ValueError("Cannot pass a value for both head and tail, you must use one or the other.")
    try:
        if head:
            display(df.head(head).style.hide(axis="index"))
        else:
            display(df.tail(tail).style.hide(axis="index"))
    except Exception as exc:
        msg: str = f"({type(exc)}) Error displaying Pandas DataFrame. Details: {exc}"
        print(f"[ERROR] {msg}")

        raise exc

In [4]:
## Set path to text file with inputs from advent of code website
#  Note: The inputs are 2 integers separated by 3 space characters, ex:
#  1234567   8901234
input_file = "./inputs"

In [5]:
## Check for existence of the input file
if not Path(input_file).exists():
    raise FileNotFoundError(f"Could not find input file: {input_file}")
else:
    display(f"[SUCCESS] Found input file: {input_file}. Reading from file.")

'[SUCCESS] Found input file: ./inputs. Reading from file.'

In [6]:
## Read the input file's lines into a variable
with open(input_file, "r") as f:
    lines = f.readlines()

display(f"Read [{len(lines)}] lines from input file.")

'Read [1000] lines from input file.'

## Part 1

In [7]:
## Initialize 2 lists
lst1 = []
lst2 = []

In [8]:
## Separate lines on triple space character, create 2 lists from left & right number
for line in lines:
    line1, line2 = line.strip().split("   ")
    lst1.append(int(line1))
    lst2.append(int(line2))

display(f"Split lines in '{input_file}' into 2 lists. List 1 has [{len(lst1)}] items and list 2 has [{len(lst2)}] items.")

"Split lines in './inputs' into 2 lists. List 1 has [1000] items and list 2 has [1000] items."

In [9]:
## Sort lists so the numbers are in ascending order
lst1.sort()
lst2.sort()

In [10]:
display(f"List 1 preview: {lst1[:10]}")
display(f"List 2 preview: {lst2[:10]}")

'List 1 preview: [10219, 10238, 10262, 10295, 10379, 10508, 10567, 10653, 10686, 10807]'

'List 2 preview: [10069, 10118, 10760, 10796, 11405, 11658, 11658, 11658, 11658, 11658]'

In [11]:
## Create list dataframes
lst1_df = pd.DataFrame(lst1, columns=["locationID"])
lst2_df = pd.DataFrame(lst2, columns=["locationID"])

In [12]:
## Display preview of list 1
display("List 1 preview:")
display_df_without_index(df=lst1_df, head=10)

## Display preview of list 2
display("List 2 preview:")
display_df_without_index(df=lst2_df, head=10)

'List 1 preview:'

locationID
10219
10238
10262
10295
10379
10508
10567
10653
10686
10807


'List 2 preview:'

locationID
10069
10118
10760
10796
11405
11658
11658
11658
11658
11658


In [13]:
lst1_df.dtypes

locationID    int64
dtype: object

In [14]:
lst2_df.dtypes

locationID    int64
dtype: object

In [15]:
## Get list of distances between values in each dataframe
distances = (lst1_df['locationID'] - lst2_df['locationID']).abs().tolist()
distances.sort()
## Drop 0 values from list
distances = [i for i in distances if i > 0]

display("Distances between points preview:")
display(distances[:10])

'Distances between points preview:'

[2, 15, 19, 23, 29, 32, 38, 38, 45, 46]

In [16]:
## Get total of all distance values
total_distance = sum(distances)
display(f"The total distance between all list items is: {total_distance}")

'The total distance between all list items is: 1941353'

In [17]:
part1_solution = {"total_distance": total_distance, "distances": distances, "inputs": {"list1": lst1, "list2": lst2}}

## Part 2

In [47]:
similarity_scores = []

for lst1_val in lst1:
    lst2_occurences = len([int(i) for i in lst2 if i == lst1_val]) or 0
    if lst2_occurences == 0:
        continue

    similarity_scores.append({"list1_val": lst1_val, "list2_occurences": lst2_occurences, "similarity_score": lst1_val * lst2_occurences})


In [48]:
similarity_scores[:10]

[{'list1_val': 11658, 'list2_occurences': 12, 'similarity_score': 139896},
 {'list1_val': 13064, 'list2_occurences': 15, 'similarity_score': 195960},
 {'list1_val': 18604, 'list2_occurences': 11, 'similarity_score': 204644},
 {'list1_val': 18949, 'list2_occurences': 16, 'similarity_score': 303184},
 {'list1_val': 21557, 'list2_occurences': 14, 'similarity_score': 301798},
 {'list1_val': 25877, 'list2_occurences': 13, 'similarity_score': 336401},
 {'list1_val': 30334, 'list2_occurences': 5, 'similarity_score': 151670},
 {'list1_val': 33525, 'list2_occurences': 12, 'similarity_score': 402300},
 {'list1_val': 36755, 'list2_occurences': 8, 'similarity_score': 294040},
 {'list1_val': 37822, 'list2_occurences': 6, 'similarity_score': 226932}]

In [49]:
similarity_scores_df = pd.DataFrame(similarity_scores)
display_df_without_index(similarity_scores_df, head=10)

list1_val,list2_occurences,similarity_score
11658,12,139896
13064,15,195960
18604,11,204644
18949,16,303184
21557,14,301798
25877,13,336401
30334,5,151670
33525,12,402300
36755,8,294040
37822,6,226932


In [50]:
total_similarity_score = int(similarity_scores_df['similarity_score'].sum())
int(total_similarity_score)

22539317

In [51]:
part2_solution = {"total_similarity_score": total_similarity_score, "similarity_scores": similarity_scores}

# Results

In [52]:
type(part1_solution)

dict

In [53]:
type(part2_solution)

dict

In [54]:
results = {"part1": part1_solution, "part2": part2_solution}

In [55]:
try:
    json_data = json.dumps(results, indent=4)
except Exception as exc:
    msg = f"({type(exc)}) Error dumping results dict to JSON. Details: {exc}"
    display(f"[ERROR] {msg}")

In [56]:
with open("solutions.json", "w") as f:
    f.write(json_data)