### Read `.bin` file position data from new sessions and convert to `.pos` file

In [7]:
import os

import numpy as np

In [8]:
def get_header_bstring(file):
    """
    Scan file for the occurrence of 'data_start' and return the header
    as byte string

    Parameters
    ----------
    file (str or path): file to be loaded

    Returns
    -------
    str: header byte content
    """

    header = b''
    with open(file, 'rb') as f:
        for bin_line in f:
            if b'data_start' in bin_line:
                header += b'data_start'
                break
            else:
                header += bin_line
    return header

In [9]:
import os
import numpy as np

from BinConverter.core.readBin import get_bin_data, get_raw_pos
from BinConverter.core.CreatePos import create_pos


def get_header_bstring(file):
    """
    Scan file for the occurrence of 'data_start' and return the header
    as byte string

    Parameters
    ----------
    file (str or path): file to be loaded

    Returns
    -------
    str: header byte content
    """

    header = b''
    with open(file, 'rb') as f:
        for bin_line in f:
            if b'data_start' in bin_line:
                header += b'data_start'
                break
            else:
                header += bin_line
    return header


# Full filename to .bin file
bin_filename = '/mnt/d/freelance-work/catalyst-neuro/hussaini-lab-to-nwb/new_session_data/06172021-HPC-B6-RAW/06172021-HPC-B6-RAW.bin'


# Read bin file position data with BinConverter and show first 4 rows
raw_position = get_raw_pos(bin_filename)
print('position data read from `.bin` file with BinConverter tool:\n\n', raw_position[0:4, :].astype(int))


# Save to .pos file
pos_file_from_bin = bin_filename.replace('.bin', '.pos')
create_pos(pos_file_from_bin, raw_position)


# Read .pos file data and show first 4 rows
bytes_packet = 20
footer_size = len('\r\ndata_end\r\n')
header_size = len(get_header_bstring(pos_file_from_bin))
num_bytes = os.path.getsize(pos_file_from_bin) - header_size - footer_size
num_packets = num_bytes // bytes_packet

# Set dtypes
pos_dt = np.dtype([('t', ">i4"), ('X', ">i2"), ('Y', ">i2"), ('x', ">i2"), ('y', ">i2"), 
                   ('PX', ">i2"), ('px', ">i2"), ('tot_px', ">i2"), ('unused', ">i2")])

# Read position data from .pos file after conversion from .bin
np_pos = np.memmap(
    filename=pos_file_from_bin,
    dtype=pos_dt,
    mode='r',
    offset=len(get_header_bstring(pos_file_from_bin)),
    shape=(num_packets, ),
)
print('\n\n\nposition data read from `.pos` file with numpy (after converting from .bin with BinConverter):\n\n', np_pos[0:4,])


position data read from `.bin` file with BinConverter tool:

 [[150121    137     42   1023   1023      0     17      0     17]
 [150122    137     44   1023   1023      0     10      0     10]
 [150123    137     42   1023   1023      0     13      0     13]
 [150124    136     44   1023   1023      0     15      0     15]]


FileNotFoundError: [Errno 2] No such file or directory: '/mnt/d/freelance-work/catalyst-neuro/hussaini-lab-to-nwb/new_session_data/06172021-HPC-B6-RAW/06172021-HPC-B6-RAW.pos'

In [None]:
from BinConverter.core.readBin import get_bin_data, get_raw_pos
from BinConverter.core.CreatePos import create_pos

# Full filename to .bin file
bin_filename = '/mnt/d/freelance-work/catalyst-neuro/hussaini-lab-to-nwb/new_session_data/06172021-HPC-B6-RAW/06172021-HPC-B6-RAW.bin'


# Read bin file position data with BinConverter and show first 4 rows
raw_position = get_raw_pos(bin_filename)
print('position data read from `.bin` file with BinConverter tool:\n\n', raw_position[0:4, :].astype(int))


# Save to .pos file
pos_file_from_bin = bin_filename.replace('.bin', '.pos')
create_pos(pos_file_from_bin, raw_position)


# Read .pos file data and show first 4 rows
bytes_packet = 20
footer_size = len('\r\ndata_end\r\n')
header_size = len(get_header_bstring(pos_file_from_bin))
num_bytes = os.path.getsize(pos_file_from_bin) - header_size - footer_size
num_packets = num_bytes // bytes_packet

# Set dtypes
pos_dt = np.dtype([('t', ">i4"), ('X', ">i2"), ('Y', ">i2"), ('x', ">i2"), ('y', ">i2"), 
                   ('PX', ">i2"), ('px', ">i2"), ('tot_px', ">i2"), ('unused', ">i2")])

# Read position data from .pos file after conversion from .bin
np_pos = np.memmap(
    filename=pos_file_from_bin,
    dtype=pos_dt,
    mode='r',
    offset=len(get_header_bstring(pos_file_from_bin)),
    shape=(num_packets, ),
)
print('\n\n\nposition data read from `.pos` file with numpy (after converting from .bin with BinConverter):\n\n', np_pos[0:4,])


In [44]:
# Read .bin data with custom code

In [45]:
pos_dt_se = np.dtype([('t', "<i4"), ('X', "<i2"), ('Y', "<i2"), ('x', "<i2"), ('y', "<i2"), 
                   ('PX', "<i2"), ('px', "<i2"), ('tot_px', "<i2"), ('unused', "<i2")])

bin_dt = np.dtype([('id', "S4"), ('packet', "<i4"), ('di', "<i2"), ('si', "<i2"),
                   ('pos', pos_dt_se),
                   ('ephys', np.byte, 384),
                   ('trailer', np.byte, 16)
])

In [47]:
np_bin = np.memmap(
    filename=bin_filename,
    dtype=bin_dt,
    mode='r',
    offset=0,
)

In [48]:
pos_mask = [np_bin['id'] == b'ADU2']

In [None]:
pos_data = np_bin['pos'][pos_mask]

pos_data[0:4, :]

In [34]:
import struct

def getpos(pos_fpath, arena, method='', flip_y=True):
    """
    getpos function:
    ---------------------------------------------
    variables:
    -pos_fpath: the full path (C:\example\session.pos)
    output:
    t: column numpy array of the time stamps
    x: a column array of the x-values (in pixels)
    y: a column array of the y-values (in pixels)
    """

    with open(pos_fpath, 'rb+') as f:  # opening the .pos file
        headers = ''  # initializing the header string
        for line in f:  # reads line by line to read the header of the file
            # print(line)
            if 'data_start' in str(line):  # if it reads data_start that means the header has ended
                headers += 'data_start'
                break  # break out of for loop once header has finished
            elif 'duration' in str(line):
                headers += line.decode(encoding='UTF-8')
            elif 'num_pos_samples' in str(line):
                num_pos_samples = int(line.decode(encoding='UTF-8')[len('num_pos_samples '):])
                headers += line.decode(encoding='UTF-8')
            elif 'bytes_per_timestamp' in str(line):
                bytes_per_timestamp = int(line.decode(encoding='UTF-8')[len('bytes_per_timestamp '):])
                headers += line.decode(encoding='UTF-8')
            elif 'bytes_per_coord' in str(line):
                bytes_per_coord = int(line.decode(encoding='UTF-8')[len('bytes_per_coord '):])
                headers += line.decode(encoding='UTF-8')
            elif 'timebase' in str(line):
                timebase = (line.decode(encoding='UTF-8')[len('timebase '):]).split(' ')[0]
                headers += line.decode(encoding='UTF-8')
            elif 'pixels_per_metre' in str(line):
                ppm = float(line.decode(encoding='UTF-8')[len('pixels_per_metre '):])
                headers += line.decode(encoding='UTF-8')
            elif 'min_x' in str(line) and 'window' not in str(line):
                min_x = int(line.decode(encoding='UTF-8')[len('min_x '):])
                headers += line.decode(encoding='UTF-8')
            elif 'max_x' in str(line) and 'window' not in str(line):
                max_x = int(line.decode(encoding='UTF-8')[len('max_x '):])
                headers += line.decode(encoding='UTF-8')
            elif 'min_y' in str(line) and 'window' not in str(line):
                min_y = int(line.decode(encoding='UTF-8')[len('min_y '):])
                headers += line.decode(encoding='UTF-8')
            elif 'max_y' in str(line) and 'window' not in str(line):
                max_y = int(line.decode(encoding='UTF-8')[len('max_y '):])
                headers += line.decode(encoding='UTF-8')
            elif 'pos_format' in str(line):
                headers += line.decode(encoding='UTF-8')
                if 't,x1,y1,x2,y2,numpix1,numpix2' in str(line):
                    two_spot = True
                else:
                    two_spot = False
                    print('The position format is unrecognized!')

            elif 'sample_rate' in str(line):
                sample_rate = float(line.decode(encoding='UTF-8').split(' ')[1])
                headers += line.decode(encoding='UTF-8')

            else:
                headers += line.decode(encoding='UTF-8')

    if two_spot:
        '''Run when two spot mode is on, (one_spot has the same format so it will also run here)'''
        with open(pos_fpath, 'rb+') as f:
            '''get_pos for one_spot'''
            pos_data = f.read()  # all the position data values (including header)
            pos_data = pos_data[len(headers):-12]  # removes the header values

            byte_string = 'i8h'

            pos_data = np.asarray(struct.unpack('>%s' % (num_pos_samples * byte_string), pos_data))
            pos_data = pos_data.astype(float).reshape((num_pos_samples, 9))
    return pos_data

In [38]:
print('\n\n\nposition data read from `.pos` file with numpy (after converting from .bin with BinConverter):\n\n', np_pos[0:4,])

getpos(pos_file_from_bin, arena='BehaviorRoom').astype(int)[0:4, :]




position data read from `.pos` file with numpy (after converting from .bin with BinConverter):

 [(2357, 55, 74, 1023, 1023, 0, 17, 0, 17)
 (2358, 56, 76, 1023, 1023, 0, 13, 0, 13)
 (2359, 57, 74, 1023, 1023, 0, 13, 0, 13)
 (2360, 54, 78, 1023, 1023, 0, 12, 0, 12)]


array([[2357,   55,   74, 1023, 1023,    0,   17,    0,   17],
       [2358,   56,   76, 1023, 1023,    0,   13,    0,   13],
       [2359,   57,   74, 1023, 1023,    0,   13,    0,   13],
       [2360,   54,   78, 1023, 1023,    0,   12,    0,   12]])

In [31]:
from BinConverter.core.Tint_Matlab import getpos

getpos(pos_file_from_bin, arena='BehaviorRoom')

(array([[-52.41932066],
        [-52.21751384],
        [-52.01570703],
        ...,
        [-19.72661634],
        [-19.72661634],
        [-19.72661634]]),
 array([[63.87185751],
        [63.46824388],
        [63.87185751],
        ...,
        [60.037528  ],
        [60.037528  ],
        [60.037528  ]]),
 array([[0.0000e+00],
        [2.0000e-02],
        [4.0000e-02],
        ...,
        [6.0094e+02],
        [6.0096e+02],
        [6.0098e+02]]),
 50.0)

In [25]:
def establish_mmap_to_pos_file(pos_file):
    """
    Generates a memory map (mmap) object connected to an Axona .pos file.

    Parameters:
    -------
    pos_file (Path or Str):
        Full filename of Axona file with any extension.

    Returns:
    -------
    mm (mmap or None):
        Memory map to .pos file position data
    """    
    fpos = open(pos_file, "rb")
    mmpos = mmap.mmap(
        fpos.fileno(),
        length=0,
        access=mmap.ACCESS_READ,
        offset=0
    )
    return mmpos

In [28]:
import mmap

pos_file = pos_file_from_bin

mmpos = establish_mmap_to_pos_file(pos_file)

bytes_packet = 20
print(pos_file)
footer_size = len('\r\ndata_end\r\n')
print(footer_size)
header_size = len(get_header_bstring(pos_file))
num_bytes = os.path.getsize(pos_file) - header_size - footer_size
num_packets = num_bytes // bytes_packet
print(num_packets)

pos = np.ndarray(
    shape=(num_packets,), 
    dtype=('>i2', 8),
    buffer=mmpos,
    offset=len(get_header_bstring(pos_file)) + 4,
    strides=bytes_packet
)
pos

/mnt/d/freelance-work/catalyst-neuro/hussaini-lab-to-nwb/new_session_data/06172021-HPC-B6-RAW/06172021-HPC-B6-RAW.pos
12
30050


array([[  55,   74, 1023, ...,   17,    0,   17],
       [  56,   76, 1023, ...,   13,    0,   13],
       [  57,   74, 1023, ...,   13,    0,   13],
       ...,
       [ 217,   93, 1023, ...,   11,    0,   11],
       [ 217,   93, 1023, ...,   11,    0,   11],
       [ 217,   93, 1023, ...,   11,    0,   11]], dtype=int16)

In [29]:
np_pos

memmap([( 2357,  55, 74, 1023, 1023, 0, 17, 0, 17),
        ( 2358,  56, 76, 1023, 1023, 0, 13, 0, 13),
        ( 2359,  57, 74, 1023, 1023, 0, 13, 0, 13), ...,
        (32357, 217, 93, 1023, 1023, 0, 11, 0, 11),
        (32357, 217, 93, 1023, 1023, 0, 11, 0, 11),
        (32357, 217, 93, 1023, 1023, 0, 11, 0, 11)],
       dtype=[('t', '>i4'), ('X', '>i2'), ('Y', '>i2'), ('x', '>i2'), ('y', '>i2'), ('PX', '>i2'), ('px', '>i2'), ('tot_px', '>i2'), ('unused', '>i2')])

Strangely, we get different data values after converting from .bin to .pos using the BinConverter tool. I did not think this was the case with the old data, let's try converting that instead:

In [43]:
from BinConverter.core.readBin import get_bin_data, get_raw_pos
from BinConverter.core.CreatePos import create_pos

# Full filename to .bin file
bin_filename = '/mnt/d/freelance-work/catalyst-neuro/hussaini-lab-to-nwb/example_data_raw/20201004_Raw.bin'

# Read bin file position data with BinConverter and show first 4 rows
raw_position = get_raw_pos(bin_filename)
print('position data read from `.bin` file with BinConverter tool:\n\n', raw_position[0:4, :].astype(int))


# Save to .pos file
pos_file_from_bin = bin_filename.replace('.bin', '.pos')
create_pos(pos_file_from_bin, raw_position)


# Read .pos file data and show first 4 rows
bytes_packet = 20
footer_size = len('\r\ndata_end\r\n')
header_size = len(get_header_bstring(pos_file_from_bin))
num_bytes = os.path.getsize(pos_file_from_bin) - header_size - footer_size
num_packets = num_bytes // bytes_packet

# Set dtypes
pos_dt = np.dtype([('t', ">i4"), ('X', ">i2"), ('Y', ">i2"), ('x', ">i2"), ('y', ">i2"), 
                   ('PX', ">i2"), ('px', ">i2"), ('tot_px', ">i2"), ('unused', ">i2")])

# Read position data from .pos file after conversion from .bin
np_pos = np.memmap(
    filename=pos_file_from_bin,
    dtype=pos_dt,
    mode='r',
    offset=len(get_header_bstring(pos_file_from_bin)),
    shape=(num_packets, ),
)
print('\n\n\nposition data read from `.pos` file with numpy (after converting from .bin with BinConverter):\n\n', np_pos[0:4,])


position data read from `.bin` file with BinConverter tool:

 [[2357   55   74 1023 1023    0   17    0   17]
 [2358   56   76 1023 1023    0   13    0   13]
 [2359   57   74 1023 1023    0   13    0   13]
 [2360   54   78 1023 1023    0   12    0   12]]



position data read from `.pos` file with numpy (after converting from .bin with BinConverter):

 [(2357, 55, 74, 1023, 1023, 0, 17, 0, 17)
 (2358, 56, 76, 1023, 1023, 0, 13, 0, 13)
 (2359, 57, 74, 1023, 1023, 0, 13, 0, 13)
 (2360, 54, 78, 1023, 1023, 0, 12, 0, 12)]
