In [None]:
import os, sys, json, glob
import numpy as np
import matplotlib.pyplot as plt
import girder_client
import histomicstk.utils as htk_utils
from cStringIO import StringIO
from IPython.display import Image as IPImage  
import histomicstk.utils as htk_utils
import io, random
from PIL import Image, ImageDraw
import logging

logging.getLogger("requests").setLevel(logging.WARNING)

%matplotlib inline

In [None]:
gc = girder_client.GirderClient(apiUrl="http://candygram.neurology.emory.edu:8080/api/v1")

In [None]:
class LinePrinter():
    """
    Print things to stdout on one line dynamically
    """
    def __init__(self,data):
        sys.stdout.write("\r\x1b[K"+data.__str__())
        sys.stdout.flush()

In [None]:
# Fetches DX1 cohort slides only
def getDXSlidesInCohort(Cohortname, nSlides):
    Cohort = Cohortname
    tcgaCohorts = gc.get('/tcga/cohort')   
    cohortInfo = dict([(x['name'],x['_id']) for x in tcgaCohorts['data']])
    slidesInCohort = gc.get('/tcga/cohort/%s/images?limit=%d' % (cohortInfo[Cohort],nSlides))
    dxSlides = [x for x in slidesInCohort['data'] if '-DX' in x['name']]
    print(len(dxSlides),"for the %s Cohort"%cohort)
    return dxSlides

In [None]:
# Will run a low res segmentation step prior to trying to randomly grab tiles from the input stream
def grabTiles(sID, sampleSlide):
    sID = sampleSlide['_id']
    lowResMag = 1.25
    lowResImg = gc.get('/item/%s/tiles/region?magnification=%s' % ( sID, lowResMag),jsonResp=False)
    lowResPILimage = Image.open(io.BytesIO(lowResImg.content))
    im_fgnd_mask_lres = htk_utils.simple_mask(np.asarray(lowResPILimage))
    (YmaskPts,XmaskPts)  = np.nonzero(im_fgnd_mask_lres)
    maskCoords = zip(YmaskPts,XmaskPts)
    return lowResImg, maskCoords    

In [None]:
# Tiles save function - to save tiles into a file
def tileSave(imageData,nTiles,lowResImg, slideBaseName, out_path, maskCoords):
    sl=imageData
    sldBaseName = slideBaseName
    lowResMag = 1.25
    mask_Coords=maskCoords
    outpath = out_path
    num_tiles=nTiles
    
    random.shuffle(mask_Coords)  
    outputRes = 20
    scaleFactor = outputRes/lowResMag
    
    
    maxx = len(mask_Coords)
    tilecount=0
    for idx, c in enumerate(mask_Coords):
        top = c[0]* scaleFactor ## These are scaled to the output res
        left = c[1]* scaleFactor 
        regionWidth = regionHeight = 256
        
        curTile = gc.get('/item/%s/tiles/region?magnification=%s&top=%d&left=%d&regionWidth=%d&regionHeight=%d' 
                       % ( sl['_id'], outputRes, top, left, regionWidth, regionHeight),jsonResp=False)
        img = Image.open(io.BytesIO(curTile.content))
        avg = np.average(img)
        if avg > 150 and avg < 210:
            tilename = sldBaseName + '_%dx_%d_%d_%dx%d.png' % (outputRes, top, left,regionWidth, regionHeight)
            img.save(outpath + tilename)
            tilecount+=1
        
        if tilecount == num_tiles:
            break

In [None]:
# To fetch tiles for Training and Test 
train  = 0.8
cohort = 'gbm'
dxSlides = getDXSlidesInCohort(cohort,2000)
totalSlides = len(dxSlides)

## Output Testing & Training Images for Cohort
for idx,sl in enumerate(dxSlides):
    if( (idx % 20)  <  16 ):
        opd = "/data/train/%s/" % cohort
    else:
        opd = "/data/test/%s/" % cohort

    if not os.path.isdir(opd):
        os.makedirs(opd)

    slideBaseName = sl['name'].split(".")[0]
    tilesFound = glob.glob(opd+"/%s*png" %  (slideBaseName))
    tilesWanted = 10
    tilesToGenerate = tilesWanted - len(tilesFound)
    
    if (tilesToGenerate) > 0:
        sID = sl['_id']
        lowResImg, maskCoords = grabTiles(sID, sl)
        #tileSave(sl, tilesToGenerate, lowResImg, slideBaseName, opd, maskCoords)
        tileSave(sl, tilesWanted, lowResImg, slideBaseName, opd, maskCoords) 
    else:
        stats = "Processed %d images" % idx
        LinePrinter(stats)

In [None]:
# Function to fetch tiles randomly from each slide in given Cohort set - Need to be deleted
def randomTileSave(dxSlideCohort,cancerClass, train_or_testpath, tileCount):
    cancerClass= cancerClass
    outpath=train_or_testpath
    import random
    for slideNumber in range(100):
        slideNumber = random.randint(1,101)
        sampleSlide = dxSlideCohort['data'][slideNumber]
        sID = sampleSlide['_id']
        lowResImg, maskCoords = grabTiles(sID, sampleSlide)
        slideBaseName = sampleSlide['name'].split(".")[0]
        out_path = '/data/'+outpath+'/'+cancerClass+'/'
        nTiles=tileCount
        tileSave(sID, nTiles, lowResImg, slideBaseName, out_path, maskCoords) 

In [None]:
# Fetches all Cohort slides
def getSlidesInCohort(Cohortname, nSlides):
    tcgaCohorts = gc.get('/tcga/cohort')   
    cohortInfo = dict([(x['name'],x['_id']) for x in tcgaCohorts['data']])
    slidesInCohort = gc.get('/tcga/cohort/%s/images?limit=%d' % (cohortInfo[Cohort],nSlides))
    return slidesInCohort

In [None]:
# Fetches, 5 tiles each from  100 gbm slides for training
cancerClass="gbm"
storPath="train"
dxSlideCohort = getSlidesInCohort('gbm',100)
randomTileSave(dxSlideCohort,cancerClass,storPath,5) 

In [None]:
# Fetches, 5 tiles each from  100 gbm slides for test
cancerClass="gbm"
storPath="test"
dxSlideCohort = getSlidesInCohort('gbm',100)
randomTileSave(dxSlideCohort,cancerClass,storPath,5) # Fetches, gbm class tiles for training

In [None]:
# Fetches, 5 tiles each from  100 lgg slides for training
cancerClass="lgg"
storPath="train"
dxSlideCohort = getSlidesInCohort('lgg',100)
randomTileSave(dxSlideCohort,cancerClass,storPath,5) # Fetches, gbm class tiles for training

In [None]:
# Fetches, 5 tiles each from  100 lgg slides for test
cancerClass="lgg"
storPath="test"
dxSlideCohort = getSlidesInCohort('lgg',100)
randomTileSave(dxSlideCohort,cancerClass,storPath,5) # Fetches, gbm class tiles for training