In [1]:
with open('input.txt') as file:
    data = [x.strip('\n') for x in file.readlines()]

In [2]:
data[0]

'[1518-11-22 00:00] Guard #1231 begins shift'

In [3]:
import pandas as pd

In [4]:
timestamp = [x.split(']')[0].strip('[') for x in data]

In [5]:
import re

In [6]:
def find_guard(string):
    match = re.search(string=string, pattern='Guard #\d+')
    if match is not None:
        return match.group(0)
    return None

In [7]:
guard = [find_guard(x.split(']')[1]) for x in data]

In [8]:
def find_status(string):
    s = string.split(']')[1]
    if 'begins shift' in s:
        return 'begins shift'
    if 'falls asleep' in s:
        return 'falls asleep'
    if 'wakes up' in s:
        return 'wakes up'
    return None

In [9]:
status = [find_status(x) for x in data]

In [91]:
df = pd.DataFrame({'time': timestamp, 'guard': guard, 'status': status})

In [92]:
df = df.sort_values('time', ascending=True).reset_index(drop=True)

In [93]:
df['guard'] = df.guard.fillna(method='ffill')

In [94]:
df['hour'] = df['time'].apply(lambda x: x[-5:-3])
df['minute'] = df['time'].apply(lambda x: x[-2:])

In [95]:
df['prev_minute'] = df.minute.shift()

In [96]:
df['time_asleep'] = df.minute.astype(float) - df.prev_minute.astype(float)
df['time_asleep'][df.status != 'wakes up'] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [109]:
most_asleep_guard_df = df.groupby('guard').agg({'time_asleep': sum})
most_asleep_guard = most_asleep_guard_df.sort_values('time_asleep', ascending=False).iloc[0].name
most_asleep_guard_id = int(most_asleep_guard.split('#')[1])
most_asleep_guard, most_asleep_guard_id

('Guard #3371', 3371)

In [100]:
def list_minutes_slept(row):
    if row['status'] != 'wakes up':
        return []
    start, stop = int(row['prev_minute']), int(row['minute'])
    return list(range(start, stop))

In [101]:
df['minutes_slept'] = df.apply(list_minutes_slept, axis=1)

In [123]:
from collections import Counter

In [124]:
def count_sleep_minutes(guard):
    asleep_minutes = [
        item 
        for sublist in 
        df[(df.guard == guard) & (df.status == 'wakes up')]['minutes_slept'].tolist()
        for item in sublist]
    return Counter(asleep_minutes)

In [128]:
most_asleep_minute = count_sleep_minutes(most_asleep_guard).most_common()[0]
most_asleep_minute

(39, 14)

In [130]:
print('Part One:', most_asleep_guard_id * most_asleep_minute[0])

Part One: 131469


In [151]:
count_all_guards = [
    {guard: count_sleep_minutes(guard)} 
    for guard in df.guard.unique()
]

In [160]:
count_all_guards_most_sleepy = [minute.most_common() for counters in count_all_guards for minute in counters.values()]
count_all_guards_most_sleepy

[[(48, 12),
  (49, 12),
  (51, 12),
  (20, 11),
  (21, 11),
  (22, 11),
  (23, 11),
  (50, 11),
  (16, 10),
  (17, 10),
  (18, 10),
  (19, 10),
  (24, 10),
  (25, 10),
  (26, 10),
  (27, 10),
  (43, 10),
  (44, 10),
  (45, 10),
  (46, 10),
  (47, 10),
  (52, 10),
  (14, 9),
  (15, 9),
  (40, 9),
  (42, 9),
  (53, 8),
  (28, 8),
  (29, 8),
  (36, 8),
  (37, 8),
  (41, 8),
  (12, 7),
  (13, 7),
  (30, 7),
  (31, 7),
  (35, 7),
  (38, 7),
  (39, 7),
  (7, 6),
  (8, 6),
  (9, 6),
  (10, 6),
  (11, 6),
  (54, 6),
  (32, 6),
  (33, 6),
  (34, 6),
  (5, 5),
  (6, 5),
  (55, 4),
  (56, 4),
  (3, 4),
  (4, 4),
  (57, 3),
  (2, 2),
  (58, 1),
  (0, 1),
  (1, 1)],
 [(51, 19),
  (50, 16),
  (52, 15),
  (28, 14),
  (29, 14),
  (30, 14),
  (31, 14),
  (32, 14),
  (27, 13),
  (33, 13),
  (49, 13),
  (26, 12),
  (34, 11),
  (35, 11),
  (36, 11),
  (37, 11),
  (48, 11),
  (53, 11),
  (22, 10),
  (23, 10),
  (24, 10),
  (25, 10),
  (55, 10),
  (38, 9),
  (39, 9),
  (40, 9),
  (47, 9),
  (54, 9),
  (41, 