In [8]:
import dropbox
import base64
import scipy
import numpy as np
import boto
from boto.s3.key import Key
import uuid
from scipy import stats
from scipy import ndimage

In [9]:
#s3_client = boto3.client('s3')

def retrieve(event):
    client = dropbox.Dropbox(event["auth_token"])
    image = event["from_url"]
    f, metadata = client.files_download(image)
    str_array = image.split('/')
    image_name = str_array[-1]
    path = '/'.join(str_array[:-1])
    f_shape, metadata_shape = client.files_download(path + "/shape/" + image_name.strip(".txt") +"_shape.txt")
    data = metadata.content

    metadata_shape = metadata_shape.content
    args = metadata_shape.split("\n")
    
    decoded_array = base64.decodestring(data)

    array = np.frombuffer(decoded_array, dtype=eval("np." + args[-1]))
    list_args = []
    for i in range(len(args)-1):
        list_args.append(int(args[i]))
    result_array = np.reshape(array, tuple(list_args))
    return result_array

In [10]:
def preprocess(event, context):
    #bucket = event['Records'][0]['s3']['bucket']['name']
    #key = event['Records'][0]['s3']['object']['key']
    number = event['number']
    conn = boto.connect_s3()
    b = conn.get_bucket('training-array')
    k = b.new_key('matrix' + str(number) + '.npy')

    np_array = retrieve(event)
    values = analyze(np_array, event)

    #download_path = '/tmp/{}{}'.format(uuid.uuid4(), key)
    upload_path = '/tmp/resized-{}'.format(k)

    #s3_client.download_file(bucket, key, download_path)
    np.save(upload_path, values)
    k.set_contents_from_filename(upload_path)
    #s3_client.upload_file(upload_path, '{}resized'.format(bucket), key)
    return 0

In [11]:
def analyze(arr_arg, event):
    result = None
    arr = np.array(arr_arg)
    h = scipy.histogram(arr, 256)
    dim = len(arr.shape)

    filter_size = int(event['filter_size'])

    mean = scipy.ndimage.generic_filter(arr, scipy.mean, size = filter_size, mode = 'constant')
    
    median = scipy.ndimage.generic_filter(arr, scipy.median, size = filter_size, mode = 'constant')
    
    skew = scipy.ndimage.generic_filter(arr, scipy.stats.skew, size = filter_size, mode = 'constant')
    
    kurtosis = scipy.ndimage.generic_filter(arr, scipy.stats.kurtosis, size = filter_size, mode = 'constant')
    
    uniformity = lambda arr : scipy.sum(np.square((scipy.histogram(arr, 256)[0])))
    uniform = scipy.ndimage.generic_filter(arr, uniformity, size = filter_size, mode = 'constant')

    def entropy(arr):
        log_ret = np.log2(scipy.histogram(arr, 256)[0])
        log_ret[log_ret==-np.inf]=0
        return np.dot(scipy.histogram(arr, 256)[0], log_ret)  
    entropy_val = scipy.ndimage.generic_filter(arr, entropy, size = filter_size, mode = 'constant')
    
    maximum = scipy.ndimage.generic_filter(arr, np.amax, size = filter_size, mode = 'constant')
    
    minimum = scipy.ndimage.generic_filter(arr, np.amin, size = filter_size, mode = 'constant')
    

    def energy(arr):
        return np.sum(np.square(arr))
    energy_val = scipy.ndimage.generic_filter(arr, energy, size = filter_size, mode = 'constant')

    def rms(arr):
        return math.sqrt(energy(arr) / arr.size)
    rms_val = scipy.ndimage.generic_filter(arr, rms, size = filter_size, mode = 'constant')

    def std(arr):
        return np.std(arr, ddof=1)
    std_val = scipy.ndimage.generic_filter(arr, std, size = filter_size, mode = 'constant')

    if dim == 3:
        total = []
        for i in range(arr.shape[0] - filter_size + 1):
            for j in range(arr.shape[1] - filter_size + 1):
                for k in range(arr.shape[2] - filter_size + 1):
                    row = arr[i:i+filter_size, j:j+filter_size, k:k+filter_size].flatten()
                    row = np.append(row, mean[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                    row = np.append(row, median[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                    row = np.append(row, skew[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                    row = np.append(row, kurtosis[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                    row = np.append(row, uniform[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                    row = np.append(row, entropy_val[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                    row = np.append(row, maximum[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                    row = np.append(row, minimum[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                    row = np.append(row, energy_val[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                    row = np.append(row, rms_val[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                    row = np.append(row, std_val[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                total.append(row)
        result = np.array(total)
    elif dim == 2:
        total = []
        for i in range(arr.shape[0] - filter_size + 1):
            for j in range(arr.shape[1] - filter_size + 1):
                row = arr[i:i+filter_size, j:j+filter_size, k:k+filter_size].flatten()
                row = np.append(row, mean[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                row = np.append(row, median[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                row = np.append(row, skew[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                row = np.append(row, kurtosis[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                row = np.append(row, uniform[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                row = np.append(row, entropy_val[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                row = np.append(row, maximum[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                row = np.append(row, minimum[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                row = np.append(row, energy_val[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                row = np.append(row, rms_val[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
                row = np.append(row, std_val[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
            total.append(row)
        result = np.array(total)
    return result

In [12]:
event = {
  "auth_token": "U_0fQNo3MSAAAAAAAAAC_wLTPcM6V7FcPQ5Fo_vl1X4Oa7lJKk01uKRklVWTrzCx",
  "from_url": "/Radiomics/image1.txt",
  "number": "1",
  "filter_size": "3"
}

In [6]:
number = event['number']
conn = boto.connect_s3()
b = conn.get_bucket('training-array')
k = b.new_key('matrix' + str(number) + '.npy')

np_array = retrieve(event)

KeyboardInterrupt: 

array([[[22, 14, 12],
        [ 6,  1,  0],
        [ 4,  0,  0],
        ..., 
        [ 0,  0,  0],
        [ 1,  1,  1],
        [ 3,  3,  3]],

       [[22, 14, 12],
        [ 6,  1,  0],
        [ 4,  0,  0],
        ..., 
        [ 0,  0,  0],
        [ 1,  1,  1],
        [ 3,  3,  3]],

       [[22, 14, 12],
        [ 6,  1,  0],
        [ 4,  0,  0],
        ..., 
        [ 0,  0,  0],
        [ 1,  1,  1],
        [ 3,  3,  3]],

       ..., 
       [[33, 25, 23],
        [19, 11,  9],
        [16,  8,  6],
        ..., 
        [17,  9,  7],
        [24, 13, 11],
        [40, 29, 27]],

       [[33, 25, 23],
        [19, 11,  9],
        [16,  8,  6],
        ..., 
        [16,  8,  6],
        [23, 12, 10],
        [39, 28, 26]],

       [[33, 25, 23],
        [19, 11,  9],
        [16,  8,  6],
        ..., 
        [14,  6,  4],
        [20,  9,  7],
        [36, 25, 23]]], dtype=uint8)

In [13]:
result = None
arr = np.array(np_array)
h = scipy.histogram(arr, 256)
dim = len(arr.shape)

filter_size = int(event['filter_size'])

mean = scipy.ndimage.generic_filter(arr, scipy.mean, size = filter_size, mode = 'constant')

NameError: name 'np_array' is not defined

In [12]:
import math

In [11]:
median = scipy.ndimage.generic_filter(arr, scipy.median, size = filter_size, mode = 'constant')
    
skew = scipy.ndimage.generic_filter(arr, scipy.stats.skew, size = filter_size, mode = 'constant')

kurtosis = scipy.ndimage.generic_filter(arr, scipy.stats.kurtosis, size = filter_size, mode = 'constant')

uniformity = lambda arr : scipy.sum(np.square((scipy.histogram(arr, 256)[0])))
uniform = scipy.ndimage.generic_filter(arr, uniformity, size = filter_size, mode = 'constant')

def entropy(arr):
    log_ret = np.log2(scipy.histogram(arr, 256)[0])
    log_ret[log_ret==-np.inf]=0
    return np.dot(scipy.histogram(arr, 256)[0], log_ret)  
entropy_val = scipy.ndimage.generic_filter(arr, entropy, size = filter_size, mode = 'constant')

maximum = scipy.ndimage.generic_filter(arr, np.amax, size = filter_size, mode = 'constant')

minimum = scipy.ndimage.generic_filter(arr, np.amin, size = filter_size, mode = 'constant')


def energy(arr):
    return np.sum(np.square(arr))
energy_val = scipy.ndimage.generic_filter(arr, energy, size = filter_size, mode = 'constant')

def rms(arr):
    return math.sqrt(energy(arr) / arr.size)
rms_val = scipy.ndimage.generic_filter(arr, rms, size = filter_size, mode = 'constant')

def std(arr):
    return np.std(arr, ddof=1)
std_val = scipy.ndimage.generic_filter(arr, std, size = filter_size, mode = 'constant')



NameError: global name 'math' is not defined

In [5]:
# flattens regular images and squishes

import dropbox
import base64
import scipy
import numpy as np
import boto3
import boto
from boto.s3.key import Key
import uuid
from scipy import stats
from scipy import ndimage
import pandas
from PIL import Image
from StringIO import StringIO

#s3_client = boto3.client('s3')

# this function assumes that we already have a folder with the images on dropbox
# then it will translate all jpeg images into numpy arrays and flatten them
# then it squishes them together, and uploads onto s3 for training

def retrieve(event):
    result_list = []
    dclient = dropbox.client.DropboxClient(event["auth_token"])
    client = dropbox.Dropbox(event["auth_token"])
    metadata = dclient.metadata(event["folder_name"])
    paths = []
    shape_dir = None
    for content in metadata['contents']:
        if content['is_dir'] == False:
            paths.append(content['path'])

    reader = pandas.read_csv("trainLabels.csv")
    # TODO: find way not to hardcode this
    images = list(reader.image)
    levels = list(reader.level)

    label_dict = dict(zip(images, levels))

    for image in paths:
        f, metadata = client.files_download(image)
        image_name = image.split('/')[-1]
        data = metadata.content

        actual_name, extension = image_name.split(".")

        if extension == "dcm":
            f2 = open("/tmp/response_content.dcm", "wb")
            f2.write(data)
            f2.close()

            f2 = open("/tmp/response_content.dcm", "rb")
            ds = dicom.read_file(f2)
            img = ds.pixel_array
            f2.close()
        else:
            img = scipy.array(Image.open(StringIO(data)))
        
        # flatten
        flattened = img.flatten()
        label = label_dict[actual_name]
        final_arr = np.concatenate((flattened, [label]), axis=0)

        #squish
        result_list.append(final_arr) 

    return np.array(result_list)

def upload(event, context):
    number = event['number']
    conn = boto.connect_s3()
    b = conn.get_bucket('training-array')
    k = b.new_key('matrix' + str(number) + '.npy')

    training_matrix = retrieve(event)

    #download_path = '/tmp/{}{}'.format(uuid.uuid4(), key)
    upload_path = 'resized.npy'

    #s3_client.download_file(bucket, key, download_path)
    np.save(upload_path, training_matrix)
    k.set_contents_from_filename(upload_path)

    return 0

In [6]:
event = {
  "auth_token": "U_0fQNo3MSAAAAAAAAAC_wLTPcM6V7FcPQ5Fo_vl1X4Oa7lJKk01uKRklVWTrzCx",
  "folder_name": "/Radiomics",
  "path_to_labels": "trainLabels.csv",
  "number": "1"
}

In [7]:
upload(event, None)



0

In [9]:
arr = np.load("resized.npy")

In [10]:
arr

array([array([0, 0, 0, ..., 0, 0, 0]), array([0, 0, 0, ..., 0, 0, 0]),
       array([0, 0, 0, ..., 0, 0, 0]), array([0, 0, 0, ..., 0, 0, 0]),
       array([0, 0, 0, ..., 0, 0, 1]), array([0, 0, 0, ..., 0, 0, 2]),
       array([0, 0, 0, ..., 0, 0, 4]), array([0, 0, 0, ..., 0, 0, 4]),
       array([0, 0, 0, ..., 0, 0, 0]), array([0, 0, 0, ..., 0, 0, 1])], dtype=object)

In [18]:
arr[1].size

45163009

In [15]:
result_list = []
arr1 = np.array([0, 1, 2, 3])
result_list.append(arr1)
arr2 = np.array([0, 1, 2, 3])
result_list.append(arr2)
arr3 = np.array([0, 1, 2, 3])
result_list.append(arr3)
arr4 = np.array([0, 1, 2, 3])
result_list.append(arr4)

In [16]:
np.array(result_list)

array([[0, 1, 2, 3],
       [0, 1, 2, 3],
       [0, 1, 2, 3],
       [0, 1, 2, 3]])

In [33]:
import random
arr = np.zeros((256, 256, 3))
for i in range(arr.shape[0]):
    for j in range(arr.shape[1]):
        for k in range(arr.shape[2]):
            arr[i][j][k] = random.random()

In [44]:
arr.shape

(256, 256, 3)

In [38]:
%time
mean = scipy.ndimage.generic_filter(arr, scipy.mean, size = filter_size, mode = 'constant')

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 8.11 µs
CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 5.01 µs


In [48]:
def energy(arr):
    return np.sum(np.square(arr))
energy_val = scipy.ndimage.generic_filter(arr, energy, size = filter_size, mode = 'constant')

In [41]:
maximum = scipy.ndimage.generic_filter(arr, np.amax, size = filter_size, mode = 'constant')

In [45]:
minimum = scipy.ndimage.generic_filter(arr, np.amin, size = filter_size, mode = 'constant')

In [47]:
def std(arr):
    return np.std(arr, ddof=1)
std_val = scipy.ndimage.generic_filter(arr, std, size = filter_size, mode = 'constant')

In [35]:
import math
#arr = np.empty((256, 256, 3))
filter_size = 3
mean = scipy.ndimage.generic_filter(arr, scipy.mean, size = filter_size, mode = 'constant')
median = scipy.ndimage.generic_filter(arr, scipy.median, size = filter_size, mode = 'constant')
    
skew = scipy.ndimage.generic_filter(arr, scipy.stats.skew, size = filter_size, mode = 'constant')

kurtosis = scipy.ndimage.generic_filter(arr, scipy.stats.kurtosis, size = filter_size, mode = 'constant')

uniformity = lambda arr : scipy.sum(np.square((scipy.histogram(arr, 256)[0])))
uniform = scipy.ndimage.generic_filter(arr, uniformity, size = filter_size, mode = 'constant')

def entropy(arr):
    log_ret = np.log2(scipy.histogram(arr, 256)[0])
    log_ret[log_ret==-np.inf]=0
    return np.dot(scipy.histogram(arr, 256)[0], log_ret)  
entropy_val = scipy.ndimage.generic_filter(arr, entropy, size = filter_size, mode = 'constant')

maximum = scipy.ndimage.generic_filter(arr, np.amax, size = filter_size, mode = 'constant')

minimum = scipy.ndimage.generic_filter(arr, np.amin, size = filter_size, mode = 'constant')


def energy(arr):
    return np.sum(np.square(arr))
energy_val = scipy.ndimage.generic_filter(arr, energy, size = filter_size, mode = 'constant')

def rms(arr):
    return math.sqrt(energy(arr) / arr.size)
rms_val = scipy.ndimage.generic_filter(arr, rms, size = filter_size, mode = 'constant')

def std(arr):
    return np.std(arr, ddof=1)
std_val = scipy.ndimage.generic_filter(arr, std, size = filter_size, mode = 'constant')

KeyboardInterrupt: 

In [51]:
total = []
for i in range(arr.shape[0] - filter_size + 1):
    for j in range(arr.shape[1] - filter_size + 1):
        for k in range(arr.shape[2] - filter_size + 1):
            row = arr[i:i+filter_size, j:j+filter_size, k:k+filter_size].flatten()
            row = np.append(row, mean[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
            row = np.append(row, maximum[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
            row = np.append(row, minimum[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
            row = np.append(row, energy_val[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
            row = np.append(row, std_val[i:i+filter_size,j:j+filter_size,k:k+filter_size].flatten())
        total.append(row)
result = np.array(total)

In [64]:
result.shape

(64516, 162)

In [63]:
256*256*49*3

9633792