In [1]:
import sys
import subprocess
import re
import os
import time
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cmath
import operator
import random
from operator import itemgetter, attrgetter
import matplotlib.cm as cm
import vtk

## import MI measures
from mi_functions import *

In [2]:
# # ## load vti data
# file1 = '../Data/Isabel_vti/isabel_p_25.vti'
# file2 = '../Data/Isabel_vti/isabel_vel_25.vti'
# arrayName1 = 'ImageFile'
# arrayName2 = 'Velocity'
# outfile = '../output/pmi_sampled_isabel_p.vtp'
# pmivol_file = '../output/pmi_vol_p_vel.vti'

## Nyx: high res
file1 = '/Users/sdutta/Data/Nyx/Density_small.vti'
file2 = '/Users/sdutta/Data/Nyx/Temperature_small.vti'
arrayName1 = 'Density'
arrayName2 = 'Temperature'
outfile = '../output/pmi_sampled_nyx.vtp'
pmivol_file = '../output/pmi_vol_dens_temp_nyx.vti'

R1 = vtk.vtkXMLImageDataReader()
R1.SetFileName(file1)
R1.Update()
dataArray1 = R1.GetOutput().GetPointData().GetArray(arrayName1)

R2 = vtk.vtkXMLImageDataReader()
R2.SetFileName(file2)
R2.Update()
dataArray2 = R2.GetOutput().GetPointData().GetArray(arrayName2)

var1=np.zeros(dataArray1.GetNumberOfTuples()) 
var2=np.zeros(dataArray2.GetNumberOfTuples()) 

for i in range(dataArray1.GetNumberOfTuples()):
    var1[i] = dataArray1.GetTuple1(i)
    var2[i] = dataArray2.GetTuple1(i)

dims = R1.GetOutput().GetDimensions()

min_var1 = np.min(var1)
max_var1 = np.max(var1)

min_var2 = np.min(var2)
max_var2 = np.max(var2)

In [3]:
totPercentage = 5
mixinig_factor_PMI = 1
percentageToStroe = totPercentage*mixinig_factor_PMI
percentage_rand = totPercentage*(1-mixinig_factor_PMI)
pointsToretain = int((len(var1)*percentageToStroe)/100.00)
print 'Percentage needed: ' + str(totPercentage)
print 'Points to be taken PMI based: ' + str(pointsToretain)
print 'Points to be taken randomly: ' + str(int((len(var1)*percentage_rand)/100.00))

Percentage needed: 5
Points to be taken PMI based: 838860
Points to be taken randomly: 0


In [4]:
## compute histogram and PMI
numBins=128
numSamples = np.shape(var1)[0]

hist1 = np.histogram(var1,bins=numBins)
hist2 = np.histogram(var2,bins=numBins)
hist2D = np.histogram2d(var1,var2,bins=numBins)

Array1 = hist1[0]
Array2 = hist2[0]
ArrayComb = hist2D[0]

## compute PMI
PMI = compute_pointwise_mutual_information(Array1,Array2,ArrayComb,numSamples,numBins)

max_pmi = np.max(PMI)
min_pmi = np.min(PMI)
print 'min pmi: ' + str(min_pmi)
print 'max pmi: ' + str(max_pmi)

# normalize PMI
PMI = (PMI-min_pmi)/(max_pmi-min_pmi)
max_pmi = np.max(PMI)
min_pmi = np.min(PMI)

## write PMI vol out
def write_PMI_vol(PMI,data1,data2,arrayName1,arrayName2,pmivol_file):
    
    dims = data1.GetDimensions()
    
    numBins = len(PMI)
    
    min_var1 = np.min(data1.GetPointData().GetArray(arrayName1))
    max_var1 = np.max(data1.GetPointData().GetArray(arrayName1))

    min_var2 = np.min(data2.GetPointData().GetArray(arrayName2))
    max_var2 = np.max(data2.GetPointData().GetArray(arrayName2))
    
    out_data = vtk.vtkImageData()
    out_data.SetDimensions(dims)
    out_data.SetSpacing(data1.GetSpacing())
    
    pmiArr = vtk.vtkDoubleArray()
    pmiArr.SetName('pmi_field')
    pmiArr.SetNumberOfTuples(dims[0]*dims[1]*dims[2])
    
    index=0
    for i in range(dims[2]):
        for j in range(dims[1]):
            for k in range(dims[0]):
                
                val1 = data1.GetPointData().GetArray(arrayName1).GetTuple1(index)
                val2 = data2.GetPointData().GetArray(arrayName2).GetTuple1(index)
                
                bin_id1 = int(((val1 - min_var1)/(max_var1-min_var1))*(numBins-1))
                bin_id2 = int(((val2 - min_var2)/(max_var2-min_var2))*(numBins-1))
                
                pmi_val = PMI[bin_id1][bin_id2]
                pmiArr.SetTuple1(index, pmi_val)
                index=index+1
    
    out_data.GetPointData().AddArray(pmiArr)
    
    writer = vtk.vtkXMLImageDataWriter()
    writer.SetFileName(pmivol_file)
    writer.SetInputData(out_data)
    writer.Write()
    
###################################################################################    
#write_PMI_vol(PMI,R1.GetOutput(),R2.GetOutput(),arrayName1,arrayName2,pmivol_file)    

min pmi: -15.831666593547466
max pmi: 17.426489684101192


In [5]:
class Histogram_1D:
    def __init__(self, freq, value, importance, bin_id):
        self.freq = freq
        self.value = value
        self.importance = importance
        self.bin_id = bin_id
    def __repr__(self):
        return repr((self.freq, self.value, self.importance, self.bin_id))
    
class PMI_2D:
    def __init__(self, freq, importance, bin_id1):
        self.freq = freq
        self.importance = importance
        self.bin_id1 = bin_id1
    def __repr__(self):
        return repr((self.freq, self.importance, self.bin_id1))    

    
var1_hist=[]    
for i in range(numBins):
    var1_hist.append(Histogram_1D(hist1[0][i],hist1[1][i],0,i))
    
var1_hist = sorted(var1_hist, key=attrgetter('freq'))

In [6]:
## Compute probabiliy based importance function
pointsToretain_temp = pointsToretain
pointsPerBin = int(pointsToretain_temp/float(numBins))

currentBinFreq=0
binCounter=0
BinsLeft=numBins

#Compute the sampling/importance function
while True:
    var1_hist[binCounter].importance = 1.0
    BinsLeft = BinsLeft-1
    pointsToretain_temp = pointsToretain_temp - var1_hist[binCounter].freq
    binCounter=binCounter+1
    pointsPerBin = int(pointsToretain_temp/BinsLeft)
    currentBinFreq = var1_hist[binCounter].freq
    if (currentBinFreq > pointsPerBin):
        break
        
#Traverse the rest of the bins and assign sampling percentage
for i in range(binCounter,numBins):
    var1_hist[i].importance = pointsPerBin/float(var1_hist[i].freq)

expected_samples = 0
for i in range(numBins):
    expected_samples = expected_samples + var1_hist[i].importance*Array1[var1_hist[i].bin_id]
print 'Prob based sampling will pick on expectation: ' + str(int(expected_samples)) + str(' points') 

Prob based sampling will pick on expectation: 838829 points


In [7]:
def compute_3d_to_1d_map (pts, dims):
    return pts[0] + dims[0]*(pts[1]+dims[1]*pts[2])


###############################################################
def compute_pmi_based_importance(var1_hist,PMI,hist2D):
    final_acc_func = np.zeros((numBins,numBins), dtype=np.float)
    
    importance_vals = np.zeros(numBins)
    for i in range(numBins):
        importance_vals[var1_hist[i].bin_id] = var1_hist[i].importance

    ## for each bin in 1D hist    
    for i in range(numBins):

        pts_to_take = int(importance_vals[i]*Array1[i])
        pointsToretain_temp = pts_to_take

        ## distribute points according PMI when importance is less than 1
        if importance_vals[i] < 1.0:

            ## sort based on PMI
            var1_temp=[]    
            for j in range(numBins):
                var1_temp.append(PMI_2D(hist2D[0][i][j], PMI[i][j], j))

            ## sort PMI-based high to low
            var1_temp = sorted( var1_temp, key=attrgetter('importance'), reverse=True )

            for j in range(numBins):
                if var1_temp[j].freq <= pointsToretain_temp: 
                    final_acc_func[i][var1_temp[j].bin_id1] = 1.0
                    pointsToretain_temp = pointsToretain_temp - var1_temp[j].freq
                else:
                    final_acc_func[i][var1_temp[j].bin_id1] = pointsToretain_temp/float(var1_temp[j].freq)
                    break

        ##Take all points when 1D prob-based importance is 1.0. No need to pick based on PMI
        else: 
            final_acc_func[:][i] = 1.0
        
    print 'Number of points in expectation will be picked: ' + str(int(np.sum(final_acc_func*ArrayComb)))
    
    return final_acc_func

######################################################################
def sample_pmi_based(final_acc_func,data1,data2,arrayName1,arrayName2):
    
    numBins = len(final_acc_func[0])
    
    numPts = data1.GetPointData().GetArray(arrayName1).GetNumberOfTuples()
    dataArr1 = data1.GetPointData().GetArray(arrayName1)
    dataArr2 = data2.GetPointData().GetArray(arrayName2)
    
    range_vals1 = dataArr1.GetRange() 
    range_vals2 = dataArr2.GetRange()
    
    pdata = vtk.vtkPolyData()
    pts_arr = vtk.vtkPoints()
    sampled_arr = vtk.vtkDoubleArray()
    sampled_arr.SetName(arrayName1)
    
    for i in range(numPts):
        randval = random.uniform(0, 1)
        
        dataval1 = dataArr1.GetTuple1(i)
        bin_id1 = int(((dataval1 - range_vals1[0])/(range_vals1[1]-range_vals1[0]))*(numBins-1))
        
        dataval2 = dataArr2.GetTuple1(i)
        bin_id2 = int(((dataval2 - range_vals2[0])/(range_vals2[1]-range_vals2[0]))*(numBins-1))
        
        if randval <= final_acc_func[bin_id1][bin_id2]:
            pts_arr.InsertNextPoint(data1.GetPoint(i))
            sampled_arr.InsertNextTuple1(dataval1)
    
    pdata.SetPoints(pts_arr)
    pdata.GetPointData().AddArray(sampled_arr)
    
    return pdata


######################################################################
def sample_random(data,arrayName1,percentage,sampled_data):
    
    totPts = data.GetNumberOfPoints()
    varArr = data.GetPointData().GetArray(arrayName1)
    numOfPtsNeeded = int((totPts*percentage)/100.0)
    
    print 'Number of randomly selected points: ' + str(numOfPtsNeeded)
    
    rand_ids = np.random.randint(totPts, size=numOfPtsNeeded)
    
    for i in range(len(rand_ids)):
        pts = data.GetPoint(rand_ids[i])
        var_val = varArr.GetTuple1(rand_ids[i])
        
        sampled_data.GetPoints().InsertNextPoint(pts)
        sampled_data.GetPointData().GetArray(arrayName1).InsertNextTuple1(var_val)
        
    return sampled_data

####################################################################
def insert_corner_pts(data,arrayName1,sampled_data):
    
    dims = data.GetDimensions()
    varArr = data.GetPointData().GetArray(arrayName1)
    
    pts = [0,0,0]
    idx = compute_3d_to_1d_map(pts,dims)
    pts = data.GetPoint(idx)
    var_val = varArr.GetTuple1(idx)
    sampled_data.GetPoints().InsertNextPoint(pts)
    sampled_data.GetPointData().GetArray(arrayName1).InsertNextTuple1(var_val)
    
    pts = [0,dims[1]-1,0]
    idx = compute_3d_to_1d_map(pts,dims)
    pts = data.GetPoint(idx)
    var_val = varArr.GetTuple1(idx)
    sampled_data.GetPoints().InsertNextPoint(pts)
    sampled_data.GetPointData().GetArray(arrayName1).InsertNextTuple1(var_val)
    
    pts = [0,0,dims[2]-1]
    idx = compute_3d_to_1d_map(pts,dims)
    pts = data.GetPoint(idx)
    var_val = varArr.GetTuple1(idx)
    sampled_data.GetPoints().InsertNextPoint(pts)
    sampled_data.GetPointData().GetArray(arrayName1).InsertNextTuple1(var_val)
    
    pts = [0,dims[1]-1,dims[2]-1]
    idx = compute_3d_to_1d_map(pts,dims)
    pts = data.GetPoint(idx)
    var_val = varArr.GetTuple1(idx)
    sampled_data.GetPoints().InsertNextPoint(pts)
    sampled_data.GetPointData().GetArray(arrayName1).InsertNextTuple1(var_val)
    
    pts = [dims[0]-1,0,0]
    idx = compute_3d_to_1d_map(pts,dims)
    pts = data.GetPoint(idx)
    var_val = varArr.GetTuple1(idx)
    sampled_data.GetPoints().InsertNextPoint(pts)
    sampled_data.GetPointData().GetArray(arrayName1).InsertNextTuple1(var_val)
    
    pts = [dims[0]-1,dims[1]-1,0]
    idx = compute_3d_to_1d_map(pts,dims)
    pts = data.GetPoint(idx)
    var_val = varArr.GetTuple1(idx)
    sampled_data.GetPoints().InsertNextPoint(pts)
    sampled_data.GetPointData().GetArray(arrayName1).InsertNextTuple1(var_val)
    
    pts = [dims[0]-1,0,dims[2]-1]
    idx = compute_3d_to_1d_map(pts,dims)
    pts = data.GetPoint(idx)
    var_val = varArr.GetTuple1(idx)
    sampled_data.GetPoints().InsertNextPoint(pts)
    sampled_data.GetPointData().GetArray(arrayName1).InsertNextTuple1(var_val)
    
    pts = [dims[0]-1,dims[1]-1,dims[2]-1]
    idx = compute_3d_to_1d_map(pts,dims)
    pts = data.GetPoint(idx)
    var_val = varArr.GetTuple1(idx)
    sampled_data.GetPoints().InsertNextPoint(pts)
    sampled_data.GetPointData().GetArray(arrayName1).InsertNextTuple1(var_val)
    
    return sampled_data
    

######################################################################
def write_vtp(filename,pdata):
    pdatawriter = vtk.vtkXMLPolyDataWriter()
    pdatawriter.SetFileName(filename)
    pdatawriter.SetInputData(pdata)
    pdatawriter.Write()

In [8]:
## compute 2D importance function
final_acc_func = compute_pmi_based_importance(var1_hist,PMI,hist2D)  

## Sample based on importance function
sampled_data = sample_pmi_based(final_acc_func,R1.GetOutput(),R2.GetOutput(),arrayName1,arrayName2) 

## add random sampled points
sampled_data_mixed = sample_random(R1.GetOutput(),arrayName1,percentage_rand,sampled_data)

## add the 8 corner points for linear interpolation to work
sampled_data_final = insert_corner_pts(R1.GetOutput(),arrayName1,sampled_data_mixed)

Number of points in expectation will be picked: 838824
Number of randomly selected points: 0


In [9]:
print 'Final number of sampled points picked is: ' + str(sampled_data_final.GetNumberOfPoints()) + ' and percentage: ' + \
str(100*(sampled_data_final.GetNumberOfPoints()/float(len(var1))))

## write the data out
write_vtp(outfile,sampled_data_final)

Final number of sampled points picked is: 818387 and percentage: 4.87796664238
