In [1]:
import re
from collections import defaultdict
from datetime import datetime, timedelta

import pandas as pd
pd.options.display.max_rows = 999

In [2]:
data = pd.DataFrame(data={
    'id': [],
    'date': [],
    'activity': []
})

In [3]:
with open('./input.txt') as f:
    for line in f.readlines():
        m = re.match(
            r'\[\d+-(?P<month>\d+)-(?P<day>\d+)\s(?P<hour>\d+):(?P<minute>\d+)\]\s(?P<remainder>.+)',
            line)
        
        date = datetime(
            2018,
            int(m.group('month')),
            int(m.group('day')),
            hour=int(m.group('hour')),
            minute=int(m.group('minute')))
        
        guard_id = ''
        
        if '#' in line:
            m = re.match(r'.+#(?P<guard_id>\d+)', line)
            guard_id = m.group('guard_id')
            activity = 1
        elif 'asleep' in line:
            activity = 2
        else:
            activity = 3

        data = data.append(
            {
                'id': guard_id,
                'date': date,
                'activity': int(activity)
            }, 
            ignore_index=True)

In [4]:
data.head()

Unnamed: 0,id,date,activity
0,,2018-11-07 00:21:00,2.0
1,,2018-05-18 00:57:00,3.0
2,,2018-09-24 00:19:00,2.0
3,,2018-04-26 00:21:00,2.0
4,,2018-08-09 00:28:00,3.0


In [5]:
data.set_index('date', inplace=True)
data.sort_index(inplace=True)

data.head(10)

Unnamed: 0_level_0,id,activity
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-02-26 23:59:00,571.0,1.0
2018-02-27 00:17:00,,2.0
2018-02-27 00:51:00,,3.0
2018-02-27 00:56:00,,2.0
2018-02-27 00:59:00,,3.0
2018-02-28 00:00:00,263.0,1.0
2018-02-28 00:17:00,,2.0
2018-02-28 00:26:00,,3.0
2018-02-28 23:49:00,2909.0,1.0
2018-03-01 00:00:00,,2.0


In [6]:
def return_prev(series):
    new = list()
    previous_val = ''
    
    for val in series:
        if val:
            previous_val = val
            
        new.append(previous_val)
        
    return new

data = data.assign(id = lambda x: return_prev(x.id))

data.head(10)

Unnamed: 0_level_0,id,activity
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-02-26 23:59:00,571,1.0
2018-02-27 00:17:00,571,2.0
2018-02-27 00:51:00,571,3.0
2018-02-27 00:56:00,571,2.0
2018-02-27 00:59:00,571,3.0
2018-02-28 00:00:00,263,1.0
2018-02-28 00:17:00,263,2.0
2018-02-28 00:26:00,263,3.0
2018-02-28 23:49:00,2909,1.0
2018-03-01 00:00:00,2909,2.0


In [7]:
ids, minute = list(), list()

current_date = None
for idx, row in data.iterrows():
    if row.activity == 2:
        current_date = idx
        
    elif row.activity == 3:
        for i in range(int((idx - current_date).seconds / 60)):
            ids.append(row.id)
            minute.append((current_date + timedelta(minutes=i)).minute)

In [8]:
final = pd.DataFrame(data={'ids': ids, 'minute': minute})
final.head(10)

Unnamed: 0,ids,minute
0,571,17
1,571,18
2,571,19
3,571,20
4,571,21
5,571,22
6,571,23
7,571,24
8,571,25
9,571,26


In [10]:
from collections import Counter

top_id, id_count = Counter(final.ids).most_common()[0]
top_minute, minute_count = Counter(final[final.ids == top_id].minute).most_common()[0]
print(f"Pt 1: {top_id} * {top_minute}: {int(top_id) * top_minute}")

Pt 1: 1823 * 41: 74743


In [20]:
sorted(final.groupby(final.columns.tolist(), as_index=False).size().to_dict().items(), key = lambda x: x[1], reverse=True)

[(('3011', 44), 18),
 (('3011', 43), 17),
 (('3011', 45), 17),
 (('3011', 42), 16),
 (('3011', 46), 16),
 (('157', 39), 15),
 (('3011', 41), 15),
 (('157', 40), 14),
 (('1823', 41), 14),
 (('2729', 45), 14),
 (('2729', 46), 14),
 (('3011', 47), 14),
 (('157', 31), 13),
 (('157', 32), 13),
 (('157', 38), 13),
 (('1823', 21), 13),
 (('1823', 22), 13),
 (('1823', 30), 13),
 (('1823', 31), 13),
 (('1823', 32), 13),
 (('1823', 33), 13),
 (('1823', 34), 13),
 (('1823', 39), 13),
 (('1823', 40), 13),
 (('1823', 42), 13),
 (('2729', 47), 13),
 (('2909', 50), 13),
 (('2909', 51), 13),
 (('2909', 52), 13),
 (('3011', 40), 13),
 (('1279', 28), 12),
 (('1279', 29), 12),
 (('1481', 44), 12),
 (('157', 30), 12),
 (('157', 33), 12),
 (('157', 35), 12),
 (('157', 36), 12),
 (('157', 37), 12),
 (('157', 41), 12),
 (('157', 42), 12),
 (('1823', 20), 12),
 (('1823', 23), 12),
 (('1823', 24), 12),
 (('1823', 25), 12),
 (('1823', 26), 12),
 (('1823', 27), 12),
 (('1823', 28), 12),
 (('1823', 29), 12),
 (('

In [21]:
3011 * 44

132484