In [26]:
import numpy as np
from itertools import product
import zipfile
import os

SUB_PIXEL_RES = 128

ROI_TYPE = dict(polygon=0,
                rect=1,
                oval=2,
                line=3,
                freeline=4,
                polyline=5,
                noRoi=6,
                freehand=7,
                traced=8,
                angle=9,
                point=10)


def read_roi_zip(file_name):
    """
    Reads an ImageJ ROI zip set and parses each ROI individually
    :param str file_name: Full path to ImageJ ROI .zip file
    :return list roi_list: List of the parsed ImageJ ROIs
    https://gist.github.com/luispedro/3437255
    https://github.com/hadim/read-roi/blob/master/read_roi/_read_roi.py
    """
    roi_list = []
    with zipfile.ZipFile(file_name) as zf:
        for name in zf.namelist():
            roi = read_roi(zf.open(name))
            if roi is not None:
                roi['label'] = str(name).rstrip('.roi')
                roi_list.append(roi)
        return roi_list


def read_roi(roi_obj):
    """
    Parses one ImageJ ROI file (.roi)
    _getX lines with no assignment are bytes within the imageJ roi file
    format that contain additional information that can be extracted if
    needed. In line comments label what they are.
    This is based on:
    http://rsbweb.nih.gov/ij/developer/source/ij/io/RoiDecoder.java.html
    http://rsbweb.nih.gov/ij/developer/source/ij/io/RoiEncoder.java.html
    :param file roi_obj: File object containing a single ImageJ ROI
    :return dict ROI: Parsed ROI object
    :raises IOError: If there is an error reading the roi file object
    :raises ValueError: If unable to parse ROI
    """

    def get_magic():
        magic = roi_obj.read(4)
        # pos = [4]
        if magic != b'Iout':
            raise IOError('Magic number not found')

    def get_byte():
        """Read 1 byte from the roi file object"""
        # pos[0] += 1
        s = roi_obj.read(1)
        if not s:
            raise IOError('Unexpected EOF')
        return ord(s)

    def get_2bytes():
        """Read 2 bytes from the roi file object"""
        b0 = get_byte()
        b1 = get_byte()
        return (b0 << 8) | b1

    def get_4bytes():
        """Read 4 bytes from the roi file object"""
        s0 = get_2bytes()
        s1 = get_2bytes()
        return (s0 << 16) | s1

    def get_float():
        """Read a float from the roi file object"""
        v = np.int32(get_4bytes())
        return v.view(np.float32)

    def get_coords(pos=0):
        """
        Get the next coordinates of an roi polygon
        :param int pos: Coordinate position
        :return array points: Coordinates
        """
        if options & SUB_PIXEL_RES:
            getc = get_float
            points = np.empty((n_coordinates, 3), dtype=np.float32)
        else:
            getc = get_2bytes
            points = np.empty((n_coordinates, 3), dtype=np.int16)
        points[:, 0] = [getc() for _ in range(n_coordinates)]
        points[:, 1] = [getc() for _ in range(n_coordinates)]
        points[:, 0] += left
        points[:, 1] += top
        points[:, 2] = pos
        return points.astype('float')

    # Check magic number
    get_magic()
    # Get version
    get_2bytes()
    # Get ROI type
    roi_type = get_byte()
    if not (0 <= roi_type < 11):
        raise ValueError('ROI type {} not supported'.format(roi_type))
    # Discard extra second byte
    get_byte()

    top = get_2bytes()
    left = get_2bytes()
    bottom = get_2bytes()
    right = get_2bytes()
    n_coordinates = get_2bytes()

    x1 = get_float()  # x1
    y1 = get_float()  # y1
    x2 = get_float()  # x2
    y2 = get_float()  # y2
    get_2bytes()  # stroke width
    get_4bytes()  # shape roi size
    get_4bytes()  # stroke color
    get_4bytes()  # fill color
    subtype = get_2bytes()
    if subtype != 0:
        raise ValueError('ROI subtype {} not supported (!= 0)'.format(subtype))
    options = get_2bytes()
    get_byte()  # arrow style
    get_byte()  # arrow head size
    get_2bytes()  # rectangle arc size
    z = get_4bytes()  # position
    if z > 0:
        z -= 1  # Multi-plane images start indexing at 1 instead of 0
    get_4bytes()  # header 2 offset

    # Instantiate ROI dict
    roi_dict = {'format': 'coordinates'}

    if roi_type == ROI_TYPE['polygon']:
        roi_dict['type'] = 'polygon'
        coords = get_coords(z)

    elif roi_type == ROI_TYPE['rect']:
        roi_dict['type'] = 'rectangle'
        coords = [[left, top, z],
                  [right, top, z],
                  [right, bottom, z],
                  [left, bottom, z]]
        coords = np.array(coords).astype('float')

    elif roi_type == ROI_TYPE['oval']:
        roi_dict['type'] = 'oval'
        roi_dict['format'] = 'mask'
        width = right - left
        height = bottom - top

        # 0.5 moves the mid point to the center of the pixel
        x_mid = (right + left) / 2.0 - 0.5
        y_mid = (top + bottom) / 2.0 - 0.5
        coords = np.zeros((z + 1, bottom, right), dtype=bool)
        for y, x in product(np.arange(top, bottom), np.arange(left, right)):
            coords[z, y, x] = ((x - x_mid) ** 2 / (width / 2.0) ** 2 +
                               (y - y_mid) ** 2 / (height / 2.0) ** 2 <= 1)

    elif roi_type == ROI_TYPE['line']:
        roi_dict['type'] = 'line'
        coords = [[x1, y1, z], [x2, y2, z]]
        coords = np.array(coords).astype('float')

    elif roi_type == ROI_TYPE['freehand']:
        roi_dict['type'] = 'freehand'
        coords = get_coords(z)

    elif roi_type == ROI_TYPE['traced']:
        roi_dict['type'] = 'traced'
        coords = get_coords(z)

    elif roi_type == ROI_TYPE['point']:
        roi_dict['type'] = 'point'
        # R = L+1, B = T+1
        coords = [left, top, z]
        coords = [float(c) for c in coords]
    else:
        # Unrecognized format, but we'll just try to get coordinates
        try:
            coords = get_coords(z)
        except:  # noqa: E722
            raise ValueError(
                'ROI type {} not supported'.format(roi_type))

    roi_dict['roi'] = coords
    return roi_dict

rois_labeled = read_roi_zip("\\Users\\kathy\\Downloads\\RoiSet_no_ignore.zip") #contatains the ROIS for the ring, troph, schiz annotations without "ignore" annotation
rois_all = read_roi_zip("\\Users\\kathy\\Downloads\\RoiSet_grid_all.zip") #this is the ROI file for the squares around each cell in the stack 

#the output is an array with the coordinates and the label 
#label in the case of rois_labeled is the annotation labels (troph, schiz, ring)
#label in the case of rois_all is the ID given from ROI manager on imageJ


In [18]:
def get_list(roi_set):
    
    """
    this function creates a list of dictionaries (one dic for each label) with the array of coordinates, as well as
    the x and y coordinates for each corner of the rectangle/square, the centroid coordinates (x and y), and the stack number (z)
    """
    all_list = []
    
    for each in roi_set:
        dic_coords = {}
        coord = each['roi']
        label = each ['label']
        
        
        top_left = coord[0]
        top_right = coord[1]
        bottom_right = coord[2]
        bottom_left = coord[3]
        
        top_left_x = top_left[0]
        top_left_y = top_left[1]
        z = top_left[2]

        top_right_x = top_right[0]
        top_right_y = top_right[1]

        bottom_right_x = bottom_right[0]
        bottom_right_y = bottom_right[1]

        bottom_left_x = bottom_left[0]
        bottom_left_y = bottom_left[1]

        cent_x = (bottom_right_x + top_right_x)/2
        cent_y = (top_right_y + bottom_left_y)/2

        dic_coords['label'] = label
        #coordinates for the corners and centroid
        dic_coords['coordinates'] = coord
        dic_coords['top_left_x'] = top_left_x
        dic_coords['top_left_y'] = top_left_y
        dic_coords['top_right_x'] = top_right_x
        dic_coords['top_right_y'] = top_right_y
        dic_coords['bottom_right_x'] = bottom_right_x 
        dic_coords['bottom_right_y'] = bottom_right_x 
        dic_coords['bottom_left_x'] = bottom_left_x
        dic_coords['bottom_left_y'] = bottom_left_y
        dic_coords['x1'] = top_right_x
        dic_coords['y1'] = top_right_y
        dic_coords['x2'] = bottom_left_x
        dic_coords['y2'] = bottom_left_y
        dic_coords['x'] = cent_x
        dic_coords['y'] = cent_y
        dic_coords['z'] = z #slice/stack number 

        all_list.append(dic_coords)
        
    return(all_list)

#returns a list of dictionaries in this format:
#[{'label': '0001-0060-0060', 'coordinates': array([[  0.,   0.,   0.],
         #[120.,   0.,   0.],
         #[120., 120.,   0.],
         #[  0., 120.,   0.]]), 'top_left_x': 0.0, 'top_left_y': 0.0, 'top_right_x': 120.0, 'top_right_y': 0.0, 'bottom_right_x': 120.0, 'bottom_right_y': 120.0, 'bottom_left_x': 0.0, 'bottom_left_y': 120.0, 'x1': 120.0, 'y1': 0.0, 'x2': 0.0, 'y2': 120.0, 'x': 120.0, 'y': 60.0, 'z': 0.0}
#includes information of the top_left, right, etc. for each cooridnate. 

all_extracoords = get_list(rois_all) #list of dictionaries with coordinates for rois_all (all cells)
label_extracoords = get_list(rois_labeled) #list of dics with coordinates for only labeled parasites


In [27]:
def new_labels(extra_coords_all, extra_labeled_coords):
    """
    the first for loop creates a new dictionary that holds each cell ID with a labeled parasite, i.e. only the infected cells
    the second loop adds a new label to the list of dic with all of the cells if a given cell has a parasite label. 
    It then changes the name of the original label to the new label name (which labels the cell as either schiz, ring, troph, or normal) 
    
    """
    new_dict = {} #holds information of cells that have a parasite in them. {cell_ID:parasite}
    for b in label_extracoords: #labeled ROIs list 
        for a in all_extracoords: #all RBCs list 
            if a['z'] == b['z']: #if same stack 
                if (b['x']<a['x1']) and (b['x']>a['x2']) and (b['y']>a['y1']) and (b['y']<a['y2']): #if the labeled parasite is within the cell 
                    new_dict[a['label']]=b['label'] #add to new dictionary in the format {cellID:parasite label}

    #will label the cellID as parasite life stage or normal
    for each1 in extra_coords_all:
        for k, v in new_dict.items():

            if each1['label'] == k:

                each1['new_label'] = v


        if 'new_label' in each1:
            each1['label'] = each1['new_label']


        else:
            each1['label'] = 'normal'
    #print(extra_coords_all['label'])
    return(extra_coords_all)
#output format {'label': 'normal', 'coordinates': array([[240., 360.,   3.]...]]),'top_left_x': 240.0, 'top_left_y': 360.0, 'top_right_x': 360.0, 'top_right_y': 360.0, 'bottom_right_x': 360.0, 'bottom_right_y': 360.0, 'bottom_left_x': 240.0, 'bottom_left_y': 480.0, 'x1': 360.0, 'y1': 360.0, 'x2': 240.0, 'y2': 480.0, 'x': 360.0, 'y': 420.0, 'z': 3.0},

labeled_and_all_list = new_labels(all_extracoords, label_extracoords)



In [28]:
def all_correct_name(semi_final_list):
    """
    the parasite labels all have a number before them ex. 1-ring, 4-schizont. This function takes away the number and the dash 
    so that the labels are simply ring, troph, or schizont. 
    This function only has to be run once. 
    If you need to run the whole code again on the same dataset, comment this function out because the function will have already
    changed the label to ring, troph or schizont. 
    """
    for each1 in semi_final_list:
        if each1['label'] != 'normal':
            
            new_split = each1['label'].split("-",1)[1]
            each1['label'] = new_split
    return(semi_final_list)
    

#final_list = all_correct_name(labeled_and_all_list)



In [24]:
def create_csv(final_list):
    """
    this function creates a csv file with the label and the coordinates. 
    for your own use, change the directory in open_file to where you want this file to be stored on your computer
    if you want to add more parameters, change them in both open_file.write lines
    """
    open_file = open("\\Users\\kathy\\Downloads\\AllLabeled.csv", "w")

    open_file.write('new_label,stack,x-center,y-center,x-top-left,y-top-left,x-top-right,y-top-right,x-bottom-right,y-bottom-right,x-bottom-left,y-bottom-left\n')
    
    for each1 in final_list:
        open_file.write(str(each1['label'])+","+str(each1['z']+1)+","+str(each1['x'])+","+str(each1['y'])+","+str(each1['top_left_x'])+","+str(each1['top_left_y'])+","+str(each1['top_right_x'])+","+str(each1['top_right_y'])+","+str(each1['bottom_right_x'])+","+str(each1['bottom_right_y'])+","+str(each1['bottom_left_x'])+","+str(each1['bottom_left_y'])+"\n")

create_csv(labeled_and_all_list) #running this function creates the csv file and you should now see the file in your computer