In [1]:
from os import path
import pandas as pd
import numpy as np

In [2]:
traces = [
    ('T1' , '/home/tiratatp/Repositories/snia_traces/T1/LiveMapsBackEnd/Combined/disk1_filtered_1hrs_sheltered.txt'),
    ('T2' , '/home/tiratatp/Repositories/snia_traces/T2/DisplayAdsDataServer/Combined/disk0_1hrs_sheltered.txt'),
    ('T3' , '/home/tiratatp/Repositories/snia_traces/T3/DisplayAdsPayload/Combined/disk0_1hrs_sheltered.txt'),
    ('T4' , '/home/tiratatp/Repositories/snia_traces/T4/Exchange-Server-Traces/Combined/disk8_1hrs_sheltered.txt'),
    ('T5' , '/home/tiratatp/Repositories/snia_traces/T5/MSNStorageCFS/Combined/disk6_filtered_1hrs_sheltered.txt'),
    ('T6' , '/home/tiratatp/Repositories/snia_traces/T6/MSNStorageFileServer/Combined/disk5_filtered_1hrs_sheltered.txt'),
    ('T7' , '/home/tiratatp/Repositories/snia_traces/T7/BuildServer/Combined/disk0_filtered_1hrs_sheltered.txt'),
    ('T8' , '/home/tiratatp/Repositories/snia_traces/T8/DevelopmentToolsRelease/Combined/disk6_filtered_1hrs_sheltered.txt'),
    ('T9' , '/home/tiratatp/Repositories/snia_traces/T9/RadiusAuthentication/Combined/disk0_filtered_1hrs_sheltered.txt'),
    ('T10', '/home/tiratatp/Repositories/snia_traces/T10/RadiusBackEndSQLServer/Combined/disk4_filtered_1hrs_sheltered.txt'),    
]

last_block=1953525167

In [3]:
for trace, trace_file in traces:
    # read trace into memory
    all_sheltered_requests = pd.read_csv(trace_file, delimiter=' ', usecols=[2,3,5], \
                    header=None, names=['blkno', 'blkcount', 'is_shltr'], \
                    dtype={'blkno':np.int_, 'blkcount':np.int_, 'is_shltr':np.bool_}, \
                    na_filter=False, engine='c')
    
    # filter just the sheltered IO
    all_sheltered_requests = all_sheltered_requests[all_sheltered_requests['is_shltr']]
    
    # remove exact IO and then sort by blkno and tail
    merged_requests = all_sheltered_requests.drop_duplicates(subset=['blkno', 'blkcount'])\
                                            .sort(['blkno', 'blkcount'])
    merged_requests['tail'] = merged_requests['blkno'] + merged_requests['blkcount']
    
    # remove overlapping requests
    before_count = None
    after_count = len(merged_requests)    
    while before_count != after_count:
        before_count = after_count 
        # this should remove most of the overlapping IO
        # except the one that e.g
        # first IO  : ===========
        # second IO :    ===
        # third IO  :          ==
        # The above example is why we need a loop.
        merged_requests['is_ovrlp'] = merged_requests['tail'].shift(1) >= merged_requests['blkno']
        merged_requests['io_num'] = (~merged_requests['is_ovrlp']).astype(int).cumsum()   
        merged_requests = merged_requests.groupby('io_num', sort=False)\
                                            .agg({
                                                'blkno': 'min',            
                                                'tail': 'max',
                                            })
        after_count = len(merged_requests)
    
    # filter OOB
    filtered_requests = merged_requests[merged_requests['tail'] < last_block]
    
    print "COUNT All: %d After merging: %d After filter out OOB: %d" % (len(all_sheltered_requests), len(merged_requests), len(filtered_requests))
    print "BYTE All: %d After merging: %d After filter out OOB: %d" % \
            (all_sheltered_requests['blkcount'].sum(), \
             (merged_requests['tail'] - merged_requests['blkno']).sum(), \
             (filtered_requests['tail'] - filtered_requests['blkno']).sum())

    trace_path_comp = path.split(trace_file)
    trace_file_comp = trace_path_comp[1].split('.')
    out_file = path.join(trace_path_comp[0], "%s_writeback.txt" % trace_file_comp[0])
        
    with open(out_file, "w") as f:
        f.write("fio version 2 iolog\n")
        f.write("/dev/sdb1 add\n")
        f.write("/dev/sdb1 open\n")    
        for i in filtered_requests.itertuples():
            f.write("%s %s %d %d\n" % ("/dev/sdb1", "write", i[0], i[1]))
        f.write("/dev/sdb1 close\n")


COUNT All: 277 After merging: 100 After filter out OOB: 100
BYTE All: 3722 After merging: 3722 After filter out OOB: 3722
COUNT All: 25089 After merging: 77 After filter out OOB: 77
BYTE All: 432104 After merging: 405136 After filter out OOB: 405136
COUNT All: 29448 After merging: 203 After filter out OOB: 203
BYTE All: 458568 After merging: 409779 After filter out OOB: 409779
COUNT All: 270203 After merging: 2295 After filter out OOB: 2295
BYTE All: 4542384 After merging: 3873774 After filter out OOB: 3873774
COUNT All: 13413 After merging: 2098 After filter out OOB: 2098
BYTE All: 105897 After merging: 105897 After filter out OOB: 105897
COUNT All: 649678 After merging: 1175 After filter out OOB: 1175
BYTE All: 10741584 After merging: 9836184 After filter out OOB: 9836184
COUNT All: 1142644 After merging: 2386 After filter out OOB: 2184
BYTE All: 49586023 After merging: 19197579 After filter out OOB: 18594264
COUNT All: 322232 After merging: 892 After filter out OOB: 892
BYTE All: 33