# Imports

In [1]:
import math
import sys
import numpy as np
from collections import Counter, defaultdict
from itertools import combinations, permutations
import time
import os
from pathlib import Path
import pandas as pd


# Parse Input 

In [202]:
def parse_input(raw_input):
    with open(raw_input) as f:
        puzzle_input = f.read().splitlines()
        puzzle_input = sorted(puzzle_input) # Test input is sorted, actual input is not
    guards = []
    asleep = []
    awake = []
    date = []
    
    # Handle first line
    curr_guard, curr_status, curr_time, curr_date = parse_line(puzzle_input[0], 0)
    
    for line in puzzle_input[1:]:
        # prev_guard = curr_guard
        curr_guard, curr_status, curr_time, curr_date = parse_line(line, curr_guard)
        
        if curr_status == "awake":
            
            # Assumption: Guards end their shift awake
            # Assumption: data in order
            if (curr_time < asleep[-1]) | (curr_guard != guards[-1]):

                continue
            # guards.append(curr_guard)
            else:
                awake.append(curr_time)
            # date.append(curr_date)

        else:
            asleep.append(curr_time)
            guards.append(curr_guard)
            date.append(curr_date)
    # asleep.append(59)    
    
    puzzle_input = pd.DataFrame({"guard":guards, "asleep":asleep, "awake":awake, "date": date})
    
    puzzle_input = input_df_parsing(puzzle_input)
    return puzzle_input

# Helper Functions
def parse_line(line, curr_guard):
    date = line[1:11]
    hour = int(line[12:14])
    minute = int(line[15:17])
    time = minute if hour == 0 else minute - 60

    if line[19] == "f":
        status = "asleep"
    else:
        if line[19] == "G":
            curr_guard = int(line[19:].split(' ')[1][1:])
            # time = 0
        status = "awake"
    
    return curr_guard, status, time, date

In [203]:
def input_df_parsing(df):
    df.loc[:,"mins_asleep"] = df.awake - df.asleep
    df.loc[:,"asleep_mins"] = df.apply(lambda x: list(range(x["asleep"], x["awake"])), axis=1)

    df_agg = df.groupby("guard").agg({"mins_asleep":"sum", "asleep_mins":"sum"}).reset_index()
    df_agg[["sleepiest_min","num_times"]] = df_agg.asleep_mins.apply(lambda x: Counter(x).most_common(1)[0]).apply(pd.Series)

    return df_agg

In [206]:
def get_sleepiest_guard(df):
    df = df.sort_values(by="mins_asleep", ascending=False)
    return df.iloc[0].guard, df_agg.iloc[0].mins_asleep

In [167]:
def get_most_common_min(df):
    df = df[df.guard == sleepiest_guard]

    sleepiest_minute = Counter(list(df.asleep_mins.explode())).most_common(1)
    return sleepiest_minute[0]

In [219]:
def part_one(df):
    
    sleepiest_guard, mins_asleep = get_sleepiest_guard(df)
    df = df[df.guard == sleepiest_guard]
    most_common_min = df.iloc[0].sleepiest_min
    return sleepiest_guard * most_common_min

In [226]:
def part_two(df):
    sleepiest_instance = df.sort_values(by="num_times", ascending=False).iloc[0]
    sleepiest_guard = sleepiest_instance.guard
    most_common_min = sleepiest_instance.sleepiest_min
    return sleepiest_guard * most_common_min

# Do the thing

In [231]:
path = os.getcwd()

# Assumption: my test and final input files also follow these naming conventions
input_file = Path(path)/"input.txt"

In [232]:
df = parse_input(input_file)

In [233]:
part_one(df)

38813

In [234]:
part_two(df)

141071