In [1]:
import numpy as np

# Part 1

Need to find fresh ingredient IDs for given input. An fresh ingredient ID is falls in at least one of the fresh ID ranges

### Data Input and Conversion

Import raw data as 2 separate numpy arrays: one for fresh ID ranges, another for ingredient IDs.

In [2]:
# ===== FRESH ID RANGES =====

# conditional format
conv_ranges = lambda a: a if '-' in a else ''

# import data
data_ranges = np.loadtxt(
    fname='Input5.txt',
    dtype='O',
    converters=conv_ranges
)

# filter non-ranges out
data_ranges = data_ranges[data_ranges != '']

In [3]:
data_ranges[:5]

array(['79225938624159-79772919561224', '445994870830027-449475105431654',
       '485052355932434-489723347156062',
       '252033180861215-252511696149831',
       '455027942886900-455027942886900'], dtype=object)

In [4]:
# convert fresh ID ranges to 2-column ndarray

# instantiate ndarray
id_ranges = np.zeros(
    shape=(len(data_ranges),),
    dtype=[('start', np.int64), ('end', np.int64)]
)

# iterate through raw data to populate array
for i,j in enumerate(data_ranges):
    _id_range = j.split('-')
    id_ranges['start'][i] = np.int64(_id_range[0])
    id_ranges['end'][i] = np.int64(_id_range[1])

In [5]:
id_ranges[:5]

array([( 79225938624159,  79772919561224),
       (445994870830027, 449475105431654),
       (485052355932434, 489723347156062),
       (252033180861215, 252511696149831),
       (455027942886900, 455027942886900)],
      dtype=[('start', '<i8'), ('end', '<i8')])

In [6]:
# ===== INGREDIENT IDS =====

# conditional format
conv_ingredients = lambda a: '' if '-' in a else int(a)  # convert to int

# import data
data_ingredients = np.loadtxt(
    fname='Input5.txt',
    dtype='O',
    converters=conv_ingredients
)

# filter non-IDs out
id_list = data_ingredients[data_ingredients != '']

In [7]:
id_list[:5]

array([483969530684122, 134846534475894, 324241214092481, 244191528770542,
       124507758892152], dtype=object)

### Helpers

In [8]:
def in_range(input_int: int, range_start: int, range_end: int) -> bool:
    """For a given integer input and range, determine whether integer is in range.
    
    Range is inclusive of start and end.
    """
    return (input_int >= range_start) & (input_int <= range_end)

### Process ID List

In [9]:
def check_ingredients(ingredient_ids: np.array, fresh_id_ranges: np.array) -> int:
    """Check how many of the ingredients fall within fresh ID ranges."""
    # instantiate counter for fresh IDs
    fresh_ingredients = 0
    
    # loop through list of ingredients...
    for ingredient in ingredient_ids:
        #Â ...against list of ranges...
        for i in range(len(fresh_id_ranges)):
            #... checking for freshness in any range
            if in_range(ingredient, fresh_id_ranges['start'][i], fresh_id_ranges['end'][i]):
                
                # increment counter of fresh ingredients
                fresh_ingredients += 1
                
                # next ingredient
                break

    return fresh_ingredients

In [10]:
check_ingredients(id_list, id_ranges)

885

# Part 2

Need to find all unique fresh IDs for fresh ID ranges.

In [11]:
def generate_fresh_ids(input_id_ranges: np.array) -> int:
    """For given id ranges: test, generate and sum number of unique fresh IDs."""
    
    # instantiate counter for unique fresh IDs
    fresh_id_counter = 0
    
    # note: some of the ranges are very large e.g. 500bio fresh IDs for one range.
    # to avoid generating the full array, the start/end of each range will be tested for overlap and edited accordingly
    # once complete, each unique range length will be counted.
    
    # duplicate array, available for editing
    fresh_id_ranges = input_id_ranges.copy()
    
    # instantiate loop condition
    ranges_edited = 0
        
    # loop through each range (aka TEST SUBJECT)...
    for i in range(len(fresh_id_ranges)):
        # ...and compare it to each range in fresh ID range list (aka LISTED RANGE)
        for j in range(len(fresh_id_ranges)):

            # test 1:

            # test subject encounters itself, move on (should only happen once)
            if i == j:
                continue  # next listed range

            # test 2:

            # test subject range is entirely larger than listed range - no overlap
            elif fresh_id_ranges['start'][i] > fresh_id_ranges['end'][j]:
                continue  # next listed range

            # test 3:

            # test subject range is entirely smaller than listed range - no overlap
            elif fresh_id_ranges['end'][i] < fresh_id_ranges['start'][j]:
                continue  # next listed range

            # test 4:

            # test subject is COMPLETELY encapsulated by listed range - set to zero
            elif (fresh_id_ranges['start'][i] >= fresh_id_ranges['start'][j]) & \
            (fresh_id_ranges['end'][i] <= fresh_id_ranges['end'][j]):

                # set test subject range to 0, to be filtered out from final fresh ID count
                fresh_id_ranges['start'][i] = 0
                fresh_id_ranges['end'][i] = 0

                break  # next test subject

            # test 5:

            # test subject COMPLETELY encapsulates list range - move on
            elif (fresh_id_ranges['start'][i] <= fresh_id_ranges['start'][j]) & \
            (fresh_id_ranges['end'][i] >= fresh_id_ranges['end'][j]):
                continue

            # ===== SOME FORM OF PARTIAL OVERLAP BETWEEN TEST SUBJECT AND LISTED RANGE ====

            # test 6:

            # start of test subject range overlaps into end of listed range
            elif (fresh_id_ranges['start'][i] > fresh_id_ranges['start'][j]) & \
            (fresh_id_ranges['end'][i] > fresh_id_ranges['end'][j]):

                # truncate test subject range start to one more than listed range end
                fresh_id_ranges['start'][i] = fresh_id_ranges['end'][j] + 1

            # test 6:

            # end of test subject range overlaps into start of listed range
            elif (fresh_id_ranges['start'][i] < fresh_id_ranges['start'][j]) & \
            (fresh_id_ranges['end'][i] < fresh_id_ranges['end'][j]):

                # truncate test subject range end to one less than listed range start
                fresh_id_ranges['end'][i] = fresh_id_ranges['start'][j] - 1
    
    # ===== UNIQUE FRESH ID RANGES OBTAINED ====
    
    # filter out overlapped ranges (already set to 0)
    fresh_id_ranges = fresh_id_ranges[fresh_id_ranges['start'] + fresh_id_ranges['end'] != 0]
    
    # sum the ranges 
    for i in range(len(fresh_id_ranges)):
        fresh_id_counter += fresh_id_ranges['end'][i] - fresh_id_ranges['start'][i] + 1

    return int(fresh_id_counter)

In [12]:
generate_fresh_ids(id_ranges)

348115621205535