In [0]:
import numpy as np
import pandas as pd
from collections import OrderedDict
import datetime
import os

### Easy way to do the 5 min aggregation using Pandas:

In [0]:
def my_pd_5min_agg(fname, interval = 5):
        data = pd.read_csv(fname, 
                           header=None,
                           parse_dates = [['Date', 'Time']],
                           names=['Date','Time','Open','High','Low','Close','Volume'],
                           index_col=0
                          )

        data_agg = data.resample(str(interval) + 'T').agg(OrderedDict({'Open':'first',
                                                        'High':'max', 
                                                        'Low':'min', 
                                                        'Close':'last', 
                                                        'Volume':'sum'}))
        print data
        print
        print data_agg.dropna()

In [0]:
fname = './generic_output/AC.txt'
my_pd_5min_agg(fname)

                       Open   High     Low  Close   Volume
Date_Time                                                 
2012-02-21 14:25:00  21.800  21.80  21.800  21.80    882.0
2012-02-22 10:18:00  21.540  21.54  21.500  21.50    682.0
2012-02-28 11:08:00  21.560  21.56  21.560  21.56   1002.0
2012-03-05 13:08:00  21.820  21.84  21.820  21.84   4602.0
2012-03-06 12:38:00  21.900  21.90  21.900  21.90    602.0
2012-03-07 10:15:00  21.700  21.70  21.700  21.70    582.0
2012-03-07 11:16:00  21.900  21.90  21.900  21.90   2002.0
2012-03-13 12:17:00  21.780  21.78  21.780  21.78    202.0
2012-03-13 12:26:00  21.880  21.88  21.880  21.88    202.0
2012-03-21 12:09:00  21.840  21.84  21.840  21.84    602.0
2012-03-22 09:30:00  21.980  21.98  21.980  21.98    302.0
2012-03-26 10:12:00  21.820  21.82  21.820  21.82    282.0
2012-04-02 10:16:00  21.840  21.84  21.840  21.84   5202.0
2012-04-02 15:31:00  21.840  21.84  21.840  21.84   4002.0
2012-04-11 13:35:00  21.840  21.84  21.800  21.80   4602

### Hard way of doing the aggregation using numpy
Below I tried to solve the problem in the most basic pythonic way as possible, using the fewest libraries.

In [0]:
# Defining my own aggregate function
def my_agg(fname, interval = 5, exclude_off_market=False):
    # Reading date, time , and transaction values from the files
    np_data = np.loadtxt(fname, delimiter=',', dtype=float, usecols=(2,3,4,5,6))
    np_data_time = np.loadtxt(fname, delimiter=',', 
                              dtype=int,
                              converters={1:lambda x: x.split(':')}, usecols=(1))
    np_data_date = np.loadtxt(fname, delimiter=',', 
                              dtype=str, usecols=(0))
    days = np.unique(np_data_date, return_index=True)[:][1]
    n_days = len(days)
    days = np.append(days,-1)
    np_data_time_agg = []
    np_data_date_agg = []
    np_data_agg = []


    # Looping through the data for each day and aggregate the transactions:
    for d in xrange(n_days-1):
        agg_open_arr = []
        agg_high_arr = []
        agg_low_arr = []
        agg_close_arr = []
        agg_volume_arr = []
        hh, mm = np_data_time[days[d]]
        agg_hh = hh
        agg_mm = int(interval * np.floor(mm/float(interval)))
        agg_mm_next = agg_mm + interval
        np_data_time_agg.append([agg_hh, agg_mm])
        np_data_date_agg.append(np_data_date[days[d]])

        for i, (hh, mm) in enumerate(np_data_time[days[d]:days[d+1]], start=days[d]):
            agg_open_arr.append(np_data[i][0])
            agg_high_arr.append(np_data[i][1])
            agg_low_arr.append(np_data[i][2])
            agg_close_arr.append(np_data[i][3])
            agg_volume_arr.append(np_data[i][4])
            if mm >= agg_mm_next:
                agg_hh = hh
                agg_mm = int(interval * np.floor(mm/float(interval)))
                agg_mm_next = agg_mm + interval
                agg_open = agg_open_arr[0]
                agg_high = max(agg_high_arr)
                agg_low = min(agg_low_arr)
                agg_close = agg_close_arr[-1]
                agg_volume = sum(agg_volume_arr)
                agg_open_arr = []
                agg_high_arr = []
                agg_low_arr = []
                agg_close_arr = []
                agg_volume_arr = []
                agg_open_arr.append(np_data[i][0])
                agg_high_arr.append(np_data[i][1])
                agg_low_arr.append(np_data[i][2])
                agg_close_arr.append(np_data[i][3])
                agg_volume_arr.append(np_data[i][4])
                np_data_time_agg.append([agg_hh, agg_mm])
                np_data_agg.append([agg_open, agg_high, agg_low, agg_close, agg_volume])
                np_data_date_agg.append(np_data_date[days[d]])



        agg_open = agg_open_arr[0]
        agg_high = max(agg_high_arr)
        agg_low = min(agg_low_arr)
        agg_close = agg_close_arr[-1]
        agg_volume = sum(agg_volume_arr)
        np_data_agg.append([agg_open, agg_high, agg_low, agg_close, agg_volume])

        
    new_np_data = []

    # Saving the aggregate reuslt to a new file
    for i in range(len(np_data_agg)):
        if exclude_off_market:
            if np_data_time_agg[i][0] < 16 and (np_data_time_agg[i][0] > 9) :
                new_np_data.append(np_data_date_agg[i] + ',' +
                                   str(np_data_time_agg[i][0]) + ':' + 
                                   str(np_data_time_agg[i][1]) + ',' + 
                                   str(np_data_agg[i][0]) + ',' + 
                                   str(np_data_agg[i][1]) + ',' + 
                                   str(np_data_agg[i][2]) + ',' + 
                                   str(np_data_agg[i][3]) + ',' + 
                                   str(np_data_agg[i][4])
                                  )
        else:
            new_np_data.append(np_data_date_agg[i] + ',' +
                               str(np_data_time_agg[i][0]) + ':' + 
                               str(np_data_time_agg[i][1]) + ',' + 
                               str(np_data_agg[i][0]) + ',' + 
                               str(np_data_agg[i][1]) + ',' + 
                               str(np_data_agg[i][2]) + ',' + 
                               str(np_data_agg[i][3]) + ',' + 
                               str(np_data_agg[i][4])
                              )
    np.savetxt(fname[:-4]+'_agg'+fname[-4:], np.array(new_np_data), fmt='%s')

In [0]:
fname = './generic_output/AA.txt'
my_agg(fname)

In [0]:
files = os.listdir('./generic_output/')
for file_name in files:
    if file_name != '.DS_Store':
        fname = './generic_output/'+file_name
        my_agg(fname)