# Day 9: Disk Fragmenter

## Import libraries

In [2]:
import copy

## Import data

In [4]:
# *** [IMPORT DATA] ***
# NOTE: In the given puzzle input:
# - A single string line that represents a disk map.
# - The disk map uses a dense format to represent the layout of files and free space on the disk.
# - The digits alternate between indicating the *length of a file* and the *length of free space*.
# - E.g. A disk map = '12345' represents: 1-block file; 2 blocks of free space; 3-block file; 4-blocks of free space; 5-block file.
# =====================================================================================================================
# ! Open the file for reading mode (= default mode if the mode is not specified)
file = open("../data/24_day-9_input-test.txt", "r") 

# Read all the data in the file
file_data = file.read().strip()

print(file_data)
# ====================================================================================================================

2333133121414131402


## Helper functions

In [None]:
def rearrange_disk_map(s):
    left = 0  # Start from the beginning of the string
    right = len(s) - 1  # Start from the end of the string

    while left < right:
        # If the current character is a '.', find the first number from the right
        if s[left] == '.':
            # Traverse from the right to find the first number
            while right > left and s[right] == '.':
                right -= 1
                
            # Swap the '.' with the number
            if right > left:
                s[left], s[right] = s[right], s[left]
                right -= 1

        left += 1 # END_WHILE

    # Return the rearranged string
    return s

In [None]:
def num_dots_before_digit(s, start_idx):
    ''' Find the index of the next '.' after the start position '''
    dot_idx = s.find('.', start_idx)
    #print(dot_idx)
    
    # If no '.' is found, return 0
    if dot_idx == -1:
        return 0
    
    ''' Find the index of the next digit after the "." '''
    next_digit_idx = dot_idx + 1
    
    while (next_digit_idx < len(s)) and (not (s[next_digit_idx].isdigit())):
        next_digit_idx += 1
    
    # If no digit is found, return 0
    if next_digit_idx == len(s):
        return 0
    
    # Return the number of '.' between the start position and the next digit
    return s.count('.', start_idx, next_digit_idx)

# Test the function
# idxs = ''
# counter = 0
s = '00...111...2...333.44.5555.6666.777.888899'

# for i in range(len(s)):
#     idxs += str(counter)
#     counter += 1

# print(idxs)
# print(s)

print(num_dots_before_digit(s, 0))  # Output: 3
print(num_dots_before_digit(s, 5))  # Output: 3
print(num_dots_before_digit(s, 15))  # Output: 1

01234567891011121314151617181920212223242526272829303132333435363738394041
00...111...2...333.44.5555.6666.777.888899
2
3
8
3
18
1


In [None]:
def get_file_block(s, num):
    ''' Find the last occurrence of the number in the string '''
    last_occurrence_index = s.rfind(str(num))
    
    # If the number is not found, return an empty string
    if last_occurrence_index == -1:
        return ''
    
    ''' Find the start index of the substring sequence '''
    start_index = last_occurrence_index
    while (start_index > 0) and (s[start_index - 1] == str(num)):
        start_index -= 1
    
    # Extract the substring sequence
    substring_sequence = s[start_index:last_occurrence_index + 1]
    
    return substring_sequence

# # Test the function
# s = '00...111...2...333.44.5555.6666.777.888899'
# print(get_file_block(s, 5))  # Output: '5555'
# print(get_file_block(s, 8))  # Output: '8888'
# print(get_file_block(s, 9))  # Output: '9'

5555
8888
99


In [None]:
def insert_string(A, B):
    """
    Inserts string B into string A, replacing the dots ('...') with B.
    If B is shorter than the number of dots, the remaining dots are preserved.

    Args:
        A (str): The original string with dots ('...').
        B (str): The string to be inserted.

    Returns:
        str: The resulting string with B inserted into A.
    """
    dot_index = A.find('..') # DOTS = FREE SPACE BLOCK
    #print(dot_index)
    num_dots = A.count('.')
    A_parts = A.split('..') # DOTS = FREE SPACE BLOCK
    C = A_parts[0] + B + '.' * (num_dots - len(B)) + A_parts[1]
    return C

# Example usage:
A = '1..3'
B = '99'
C = insert_string(A, B)
print(C)  # Output: '199.3'

1993


In [None]:
def rearrange_disk_map_p2(s):
    left = 0  # Start from the beginning of the string
    right = len(s) - 1  # Start from the end of the string

    while left < right:
        # TODO: If the current character is a '.', calculate the total number of '.' before the next number and then find the first file block from the right (E.g. '999')
        if s[left] == '.':
            # Traverse through the string from the right to find the first file block
            while right > left and s[right] == '.':
                right -= 1
                
            lSpaceBlock = num_dots_before_digit(s, left)
            rFileBlock = get_file_block(s, int(s[right])) # s[right] = the single digit of the current rightmost number in the string
            
            # TODO: Swap the entire left space block ('.') with the entire right file block
            if right > left:
                # ! IF the number of free space block of dots on the LHS is larger than the file block on the RHS, then swap them, ELSE check if the NEXT file block on the RHS is smaller than the LHS free space block of dots.
                # - E.g. LHS: '1...3' ; RHS: '99' ; Swapped = '199.3'
                if len(rFileBlock) < len(lSpaceBlock):
                    # TODO: Swap the blocks
                    s[left], s[right] = s[right], s[left]
                right -= 1
        
        left += 1 #END_WHILE

    # Return the rearranged string
    return s

## Part 1

In [None]:
# *** [PART 1] ***
# ! PROBLEM: The amphipod would like to move file blocks *one at a time* from the END of the disk to the LEFTMOST free space block (until there are no gaps remaining between file blocks - see examples on website).
# - Each file on disk also has an ID number based on the *order* of the files as they appear BEFORE they are rearranged, starting with ID 0.
# - E.g. A disk map '12345' has 3 files: 1-block file (ID 0); 3-block file (ID 1); 5-block file (ID 2).
# - - 'x' ID digits are used to represent EACH block (where 'x' = file block number) & '.' = free space.
# - E.g. A disk map '12345' becomes '0..111....22222' after rearrangement.
# - TODO: Calculate the resulting filesystem checksum of the RE-ARRANGED & MOVED disk map: Add up the result of multiplying each of the blocks' position with the file ID number it contains.
# - E.g. '0099811188827773336446555566...' = '(0 * 0 = 0) + (1 * 0 = 0) + (2 * 9 = 18) + (3 * 9 = 27) + (4 * 8 = 32) + ... ' (Multiply array item idx by value)
# ====================================================================================================================
# ! Create a deep (independent) copy of the data, such that changes made to the copy do not affect the original data to still test/re-run Part 1/2 with the correct INITIAL (and not modified) data
# - NOTE: Not using a deep copy will modify the original data after running Part 1/2, therefore no correct output will be calculated in repeated runs.
disk_map = copy.deepcopy(file_data)
arrTransformedDiskMap = []
rearrangedDiskMap = ''
blockCounter = 0
checkSum = 0

for i in range(len(disk_map)):
    if i % 2 == 0: # If the index is even (i.e., we're looking at a block)
        for j in range(int(disk_map[i])): #'x' ID digits are used to represent EACH block (where 'x' = file block number)
            # NOTE: DO NOT use 'transformedDiskMap +=' because if 'blockCounter' number > 1 digit, then when transform string to a list, it will break up all numbers in the string into singluar digits (E.g. '10' => '1','0'), therefore append each number AS A WHOLE into an array list
            arrTransformedDiskMap.append(str(blockCounter))
            
        blockCounter += 1
    elif i % 2 != 0: # If the index is odd (i.e., we're looking at a free space)
        for j in range(int(disk_map[i])):
            arrTransformedDiskMap.append('.') # Append '.' as many times as the CURRENT free space number

""" Re-arrange transformed disk map """
# Move file blocks (ID numbers) from the end of the disk to the leftmost free space block (until there are no gaps remaining between file blocks)
rearrangedDiskMap = rearrange_disk_map(arrTransformedDiskMap)
# print(rearrangedDiskMap)

for i in range(len(rearrangedDiskMap)):
    if rearrangedDiskMap[i] != '.':
        checkSum += (i * int(rearrangedDiskMap[i]))

print("Filesystem checksum (Part 1):", checkSum)
# ====================================================================================================================

## Part 2

In [None]:
# *** [PART 2] ***
# ! PROBLEM: Upon completion, two things immediately become clear. First, the disk definitely has a lot more contiguous free space, just like the amphipod hoped. Second, the computer is running much more slowly! Maybe introducing all of that file system fragmentation was a bad idea?
# - TODO: Rather than moving individual blocks, compact disk files by moving WHOLE files instead.
# - Attempt to move *whole files* to the *leftmost span* of free space blocks that could fit the file.
# - Attempt to move each file *exactly once* in order of *decreasing file ID number*, starting with the file with the *highest file ID number*.
# - If there is no span of free space to the left of a file that is large enough to fit the file, the file does not move.
#====================================================================================================================
# ! Create a deep (independent) copy of the data, such that changes made to the copy do not affect the original data to still test/re-run Part 1/2 with the correct INITIAL (and not modified) data
# - NOTE: Not using a deep copy will modify the original data after running Part 1/2, therefore no correct output will be calculated in repeated runs.
disk_map = copy.deepcopy(file_data)
arrTransformedDiskMap_p2 = []
rearrangedDiskMap_p2 = ''
blockCounter_p2 = 0
checkSum_p2 = 0

for i in range(len(disk_map)):
    if i % 2 == 0: # If the index is even (i.e., we're looking at a block)
        for j in range(int(disk_map[i])): #'x' ID digits are used to represent EACH block (where 'x' = file block number)
            # NOTE: DO NOT use 'transformedDiskMap +=' because if 'blockCounter' number > 1 digit, then when transform string to a list, it will break up all numbers in the string into singluar digits (E.g. '10' => '1','0'), therefore append each number AS A WHOLE into an array list
            arrTransformedDiskMap_p2.append(str(blockCounter_p2))
            
        blockCounter_p2 += 1
    elif i % 2 != 0: # If the index is odd (i.e., we're looking at a free space)
        for j in range(int(disk_map[i])):
            arrTransformedDiskMap_p2.append('.') # Append '.' as many times as the CURRENT free space number

""" Re-arrange transformed disk map """
# Move file blocks (ID numbers) from the end of the disk to the leftmost free space block (until there are no gaps remaining between file blocks)
rearrangedDiskMap_p2 = rearrange_disk_map_p2(arrTransformedDiskMap_p2)
# print(rearrangedDiskMap_p2)

for i in range(len(rearrangedDiskMap_p2)):
    if rearrangedDiskMap_p2[i] != '.':
        checkSum_p2 += (i * int(rearrangedDiskMap_p2[i]))

print("Filesystem checksum (Part 2):", checkSum_p2)



# # Test the function
# input_string = '00...111...2...333.44.5555.6666.777.888899'
# expected_output = '00992111777.44.333....5555.6666.....8888..'

# output = swap_dot_with_digits(input_string)
# print("Output:", output)
# print("Matches expected output:", output == expected_output)

