In [1]:
%load_ext autoreload
%autoreload 2
import io_backend as io

IO Aware #2
=============


### Instructions / Notes:

**_Read these carefully_**

* **Be sure to use the latest `io_backend.py` file released with this activity**
* **RECOMMENDED: ONLY USE ONE D3 DISPLAY WINDOW AT A TIME; CLEAR THE OUTPUT ("Cell >> Current Output >> Clear") of all ones not being used!**
* Note that support for the visualization tools herein is somewhat more limited- however we still welcome your feedback on bugs, suggestions for improvement, etc!
* **See Piazza for submission instructions**
* _Have fun!_

### Bonus 2

**_[15 points]_**

Modify the below implementation of external merge (for two files) to implement the _double buffering_ optimization, assuming that you can read in two pages for the same IO cost as one.  Feel free to use the _buffer visualizer_ to see what your algorithm is doing!

In [None]:
def external_merge(b, fid1, fid2):
    """External 2-way merge (see External_Merge_Sort.ipynb for B-way)"""
    P = b.page_size
    
    # A FileWriter object will append elements to a file, creating
    # & flushing pages so as to take up one page in buffer
    fid_out = b.new_file()
    fw = io.FileWriter(b, fid_out)
    
    # Get the file lists- these are just lists of ids of pages in the file...
    pids1 = b.get_file(fid1)
    pids2 = b.get_file(fid2)
    
    # Read one page from each file on disk into the buffer
    page1 = b.read(fid1, pids1.pop(0))
    page2 = b.read(fid2, pids2.pop(0))
    
    # Run the merge over the pages in the two files
    while page1 or page2:
        
        # If page 1 is not None & empty, release it & try to load a new page
        if page1 and page1.is_empty():
            b.release(page1)
            page1 = b.read(fid1, pids1.pop(0)) if len(pids1) > 0 else None
        
        # If page 2 is not None & empty try to load a new page
        elif page2 and page2.is_empty():
            b.release(page2)
            page2 = b.read(fid2, pids2.pop(0)) if len(pids2) > 0 else None
        
        # Else, send the smaller of the page values to the FileWriter
        else:
            if page2 is None or (page1 and page1.peek() <= page2.peek()):
                fw.append(page1.pop())
            else:
                fw.append(page2.pop())
    fw.close()
    return fid_out

## Visualization & execution

Note that the task for this activity is to modify the **_general_** algorithm above, however you can feel free to test / visualize it with concrete numbers below!

In [None]:
# Create the buffer, specifying that sequential IO will have zero cost!
# Here we specify that each page has 2 values, and the buffer has 5 pages
b = io.Buffer(page_size=2, buffer_size=5, sequential_cost=0)

# Create two sorted files of random integer values
fid1 = io.new_rand_file(b, 10, 8, sorted=True)
fid2 = io.new_rand_file(b, 10, 8, sorted=True)

# Set the starting point for any animations here, to skip the setup part
b.display_set_mark()

# Merge the files!
merged_fid = external_merge(b, fid1, fid2)

In [None]:
b.display(speed=500)