In [242]:
import os
import csv
import re
from collections import defaultdict
import copy 

In [237]:
f = open('litclock_annotated.csv')
reader = csv.DictReader(f, delimiter='|')
quotes = list(reader)

In [158]:
buckets = defaultdict(list)
for row in quotes:
    buckets[row['time']].append(row)
print(f"Number of time buckets: {len(buckets.keys())}")
print(f"Number of minutes in a day: {60 * 24}")
print(f"diff: {len(buckets.keys()) - (60*24)}")

Number of time buckets: 895
Number of minutes in a day: 1440
diff: -545


In [159]:
[bucket["quote"][:60] for bucket in buckets["00:05"]]

['E.M. Security, normally so scrupulous with their fucking tru']

In [216]:
def does_quote_have_day_or_night_hints(quote):
    quote_string = quote["quote"].lower()
    time_string = quote["time_string"].lower()
    return any([
        re.search("(midnight|noon)", quote_string),
        re.search("(morn|nigh)", quote_string),
        re.search("(bed|sleep|wake|woke|rise|dawn|get up|dusk|sun)", quote_string),
        re.search("(am|pm|a.m.|p.m.)", quote_string),
        re.search("(0000|^0|^1[3-9]|^2[0-3])", time_string),
        re.search("(^.* 2[0-3])", time_string),

    ])

In [219]:
assert does_quote_have_day_or_night_hints({"quote":"The clock struck midnight", "time_string":"midnight"})
assert does_quote_have_day_or_night_hints({"quote":"It was morning, 06:00", "time_string": "06:00"})
assert does_quote_have_day_or_night_hints({"quote":"Let me go to bed, it is 8:45", "time_string":"8:45"})
assert does_quote_have_day_or_night_hints({"quote":"clock says 23:59", "time_string":"23:59"})
assert does_quote_have_day_or_night_hints({"quote":"well after 2245h", "time_string":"well after 2245h"})
assert not does_quote_have_day_or_night_hints({"quote":"only 12:45", "time_string":"12:45"})

In [226]:
def is_quote_duplicated_already(quote, buckets):
    time = quote["time"]
    newtime = get_other_time(time)
    newquote = copy.copy(quote)
    newquote["time"] = newtime
    return newquote in buckets.get(newtime, [])

def get_other_time(time):
    hour = int(time.split(":")[0])
    minute = int(time.split(":")[1])
    newhour = (hour + 12) % 24
    newtime = f"{newhour:02d}:{minute:02d}"
    return newtime

In [228]:
non_dupe_quote1 = {"time":"01:42", "quote":"non_dup_quote1"}
dupe_quote_1 = {"time":"03:00", "quote":"dup_quote1"}
dupe_quote_2 = {"time":"15:00", "quote":"dup_quote1"}
test_buckets = {
    "01:42": [non_dupe_quote1],
    "03:00": [dupe_quote_1],
    "15:00": [dupe_quote_2],
}
assert is_quote_duplicated_already(non_dupe_quote1, test_buckets) is False
assert is_quote_duplicated_already(dupe_quote_1, test_buckets) is True
assert is_quote_duplicated_already(dupe_quote_2, test_buckets) is True

assert get_other_time("00:01") == "12:01"
assert get_other_time("14:42") == "02:42"

In [232]:
newquotes = copy.copy(quotes)
for quote in quotes:
    if is_quote_duplicated_already(quote, buckets):
        continue
    elif does_quote_have_day_or_night_hints(quote):
        continue
    else:
        print(f"Duplicating: {quote}")
        newquote = copy.copy(quote)
        newquote["time"] = get_other_time(newquote["time"])
        newquotes.append(newquote)
        
len(newquotes)

Duplicating: OrderedDict([('time', '00:00'), ('time_string', 'twelve'), ('quote', "'Myself!' she gasped, 'and the clock is striking twelve! It's true, then! that's dreadful!'"), ('book', 'Wuthering Heights'), ('author', '')])
Duplicating: OrderedDict([('time', '00:00'), ('time_string', 'twelve'), ('quote', 'The clock striketh twelve O it strikes, it strikes! Now body, turn to air, Or Lucifer will bear thee quick to hell.'), ('book', 'Dr Faustus'), ('author', '')])
Duplicating: OrderedDict([('time', '00:15'), ('time_string', 'twelve-fifteen'), ('quote', 'At twelve-fifteen he got out of the van. He tucked the pistol under the waistband of his trousers and crossed the silent, deserted street to the Hudston house.'), ('book', 'Watchers'), ('author', 'Dean Koontz')])
Duplicating: OrderedDict([('time', '00:17'), ('time_string', 'seventeen minutes after twelve'), ('quote', 'Kava ordered two glasses of coffee for himself and his beloved and some cake. When the pair left, exactly seventeen minu

1885

In [233]:
newbuckets = {q["time"] for q in newquotes}
print(f"new diff: {len(newbuckets) - (60*24)}")

new diff: -441


In [243]:
with open('litclock_expanded_meridiem.csv', "w") as newcsv:
    newcsv_writer = csv.DictWriter(newcsv, delimiter="|", fieldnames=reader.fieldnames)
    newcsv_writer.writeheader()
    for q in newquotes:
        newcsv_writer.writerow(q)
os.system("sort litclock_expanded_meridiem.csv > 1.csv && mv 1.csv litclock_expanded_meridiem.csv")

0

## Analysis

What is the biggest gap in time? With 1440 unique minutes, and 999 available minutes to fill those slots, there is going to be period of time where there are gaps. What is the biggest gap? How "off" could this watch be at any given time?

In [246]:
def human_time_to_minute_number(time):
    hour = int(time.split(":")[0])
    minute = int(time.split(":")[1])
    return hour * 60 + minute
assert human_time_to_minute_number("00:01") == 1
assert human_time_to_minute_number("12:42") == 762

newbuckets = {q["time"] for q in newquotes}
newminutes = sorted([human_time_to_minute_number(t) for t in newbuckets])

In [262]:
def get_longest_gap(numbers):
    prev = numbers[0]
    max_gap = 0
    longest_gap_index = 0
    for index, n in enumerate(numbers):
        gap = n - numbers[index-1]
        if gap > max_gap:
            max_gap = gap
            longest_gap_index = index
    print(f"The longest gap is between {numbers[longest_gap_index-1]} and {numbers[longest_gap_index]}")
    print(f"With a gap of {max_gap}")
    return longest_gap_index - 1

assert get_longest_gap([1,2,4,6,20,24]) == 3 # "6"

The longest gap is between 6 and 20
With a gap of 14


In [264]:
get_longest_gap(newminutes)

The longest gap is between 346 and 352
With a gap of 6


248

In [267]:
newbuckets = defaultdict(list)
for row in newquotes:
    newbuckets[row['time']].append(row)
print(f"Number of time buckets: {len(newbuckets.keys())}")
print(f"Number of minutes in a day: {60 * 24}")
print(f"diff: {len(newbuckets.keys()) - (60*24)}")

Number of time buckets: 999
Number of minutes in a day: 1440
diff: -441


In [268]:
print(newbuckets["05:46"])

[OrderedDict([('time', '05:46'), ('time_string', '5.46am'), ('quote', "He wrote a legal-sounding phrase to the effect that the sentence had been carried out at 5.46am, adding, 'without a snag'."), ('book', 'A Whispered Name'), ('author', 'William Brodrick')]), OrderedDict([('time', '05:46'), ('time_string', 'fourteen minutes to six'), ('quote', '...those of the carriage clock on the bookshelf to fourteen minutes to six. In other words, it was exactly eight; and Mrs. Hignett acknowledged the fact...'), ('book', 'Three Men and a Maid'), ('author', 'P.G. Wodehouse')])]


In [269]:
print(newbuckets["05:52"])

[OrderedDict([('time', '05:52'), ('time_string', '5.52am'), ('quote', 'At 5.52am paramedics from the St. Petersburg Fire Department and SunStar Medic One ambulance service responded to a medical emergency call...'), ('book', 'Silent Witness'), ('author', 'Mark Fuhrman')])]
