In [3]:
import pandas as pd
import numpy as np

from functools import reduce
from collections import Counter

In [4]:
with open('../../inputs/0089_roman.txt', 'r') as f:
    roman_string = f.read()

This problem is relatively easy. We just have to parse the strings carefully. My method here will basically just be to convert the Roman Numeral string to a number, then convert the number back to a Minimal Roman Numeral string, and finally subtract the lengths to get the total improvement.

The key thing is you need to make sure to handle cases of subtractives. The way I did this in the Roman to Number function is to peek ahead an extra character--if it's a subtractive, do some slightly different behavior. In the Number to Roman function, I just use a grand case statement, since it's generally most efficient to use as big of a denomination as possible at each step.

And this approach worked quickly.

In [5]:
# quick dictionaries for switching romans to nums and back
roman_nums = {
    'I': 1, 'V': 5, 'X': 10, 'L': 50, 
    'C': 100, 'D': 500, 'M': 1000,
    'IV': 4, 'IX': 9, 'XL': 40,
    'XC': 90, 'CD': 400, 'CM': 900
}

# read each string separately
romans = roman_string.split('\n')

In [6]:
def roman_to_num(roman_str):
    num = 0

    i = 0
    while i < len(roman_str)-1:
        char1, char2 = roman_str[i], roman_str[i+1] 

        # if subtractive make sure to increment by 2
        if roman_nums[char1] < roman_nums[char2]:
            num += roman_nums[char1 + char2]
            i += 2
        else:
            num += roman_nums[char1]
            i += 1
    
    if i == len(roman_str) - 1:
        num += roman_nums[roman_str[i]]

    return num

In [7]:
def num_to_roman(num):
    # giant case statement build from biggest numbers first
    match num:
        case 0:
            return ''
        case num if num >= 1000:
            return 'M' + num_to_roman(num - 1000)
        case num if num >= 900:
            return 'CM' + num_to_roman(num - 900)
        case num if num >= 500:
            return 'D' + num_to_roman(num - 500)
        case num if num >= 400:
            return 'CD' + num_to_roman(num - 400)
        case num if num >= 100:
            return 'C' + num_to_roman(num - 100)
        case num if num >= 90:
            return 'XC' + num_to_roman(num - 90)
        case num if num >= 50:
            return 'L' + num_to_roman(num - 50)
        case num if num >= 40:
            return 'XL' + num_to_roman(num - 40)
        case num if num >= 10:
            return 'X' + num_to_roman(num - 10)
        case num if num >= 9:
            return 'IX' + num_to_roman(num - 9)
        case num if num >= 5:
            return 'V' + num_to_roman(num - 5)
        case num if num >= 4:
            return 'IV' + num_to_roman(num - 4)
        case num if num > 0:
            return 'I'*num
        case _:
            raise ValueError(f'{num}: number was negative, failed')
        

In [8]:
old_romans = ''
new_romans = ''
for roman in romans:
    old_romans += roman
    new_romans += num_to_roman(roman_to_num(roman))
    # print(roman, '\t ', num_to_roman(roman_to_num(roman)))

print(len(old_romans) - len(new_romans))

743
