In [1]:
import sys
import subprocess
import re
import os
import time
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cmath
import operator
import random
from operator import itemgetter, attrgetter
import matplotlib.cm as cm
import vtk

## If includeing files from a different source path
# module_path = os.path.abspath(os.path.join('../scripts/'))
# if module_path not in sys.path:
#     sys.path.append(module_path)

## import MI measures
from mi_functions import *

In [2]:
# ## load vti data
file1 = '../Data/Isabel_vti/isabel_p_25.vti'
file2 = '../Data/Isabel_vti/isabel_vel_25.vti'
arrayName1 = 'ImageFile'
arrayName2 = 'Velocity'

R1 = vtk.vtkXMLImageDataReader()
R1.SetFileName(file1)
R1.Update()
dataArray1 = R1.GetOutput().GetPointData().GetArray(arrayName1)

R2 = vtk.vtkXMLImageDataReader()
R2.SetFileName(file2)
R2.Update()
dataArray2 = R2.GetOutput().GetPointData().GetArray(arrayName2)

var1=np.zeros(dataArray1.GetNumberOfTuples()) 
var2=np.zeros(dataArray2.GetNumberOfTuples()) 

for i in range(dataArray1.GetNumberOfTuples()):
    var1[i] = dataArray1.GetTuple1(i)
    var2[i] = dataArray2.GetTuple1(i)

dims = R1.GetOutput().GetDimensions()

min_var1 = np.min(var1)
max_var1 = np.max(var1)

min_var2 = np.min(var2)
max_var2 = np.max(var2)

In [3]:
percentageToStroe = 3
pointsToretain = int((len(var1)*percentageToStroe)/100.00)
print 'Percentage needed: ' + str(percentageToStroe)
print 'Points to be taken: ' + str(pointsToretain)

Percentage needed: 3
Points to be taken: 93750


In [4]:
## compute histogram and PMI
numBins=128
numSamples = np.shape(var1)[0]

hist1 = np.histogram(var1,bins=numBins)
hist2 = np.histogram(var2,bins=numBins)
hist2D = np.histogram2d(var1,var2,bins=numBins)

Array1 = hist1[0]
Array2 = hist2[0]
ArrayComb = hist2D[0]

## compute PMI
PMI = compute_pointwise_mutual_information(Array1,Array2,ArrayComb,numSamples,numBins)

max_pmi = np.max(PMI)
min_pmi = np.min(PMI)
print 'min pmi: ' + str(min_pmi)
print 'max pmi: ' + str(max_pmi)

# normalize PMI
PMI = (PMI-min_pmi)/(max_pmi-min_pmi)
max_pmi = np.max(PMI)
min_pmi = np.min(PMI)

min pmi: -7.985661391117528
max pmi: 11.96071491498369


In [5]:
class Histogram_1D:
    def __init__(self, freq, value, importance, bin_id):
        self.freq = freq
        self.value = value
        self.importance = importance
        self.bin_id = bin_id
    def __repr__(self):
        return repr((self.freq, self.value, self.importance, self.bin_id))
    
class PMI_2D:
    def __init__(self, freq, importance, bin_id1):
        self.freq = freq
        self.importance = importance
        self.bin_id1 = bin_id1
    def __repr__(self):
        return repr((self.freq, self.importance, self.bin_id1))    

var1_hist=[]    
for i in range(numBins):
    var1_hist.append(Histogram_1D(hist1[0][i],hist1[1][i],0,i))
    
var1_hist = sorted(var1_hist, key=attrgetter('freq'))

In [6]:
## Compute probabiliy based importance function
pointsToretain_temp = pointsToretain
pointsPerBin = int(pointsToretain_temp/float(numBins))

currentBinFreq=0
binCounter=0
BinsLeft=numBins

#Compute the sampling/importance function
while True:
    var1_hist[binCounter].importance = 1.0
    BinsLeft = BinsLeft-1
    pointsToretain_temp = pointsToretain_temp - var1_hist[binCounter].freq
    binCounter=binCounter+1
    pointsPerBin = int(pointsToretain_temp/BinsLeft)
    currentBinFreq = var1_hist[binCounter].freq
    if (currentBinFreq > pointsPerBin):
        break
        
#Traverse the rest of the bins and assign sampling percentage
for i in range(binCounter,numBins):
    var1_hist[i].importance = pointsPerBin/float(var1_hist[i].freq)

expected_samples = 0
for i in range(numBins):
    expected_samples = expected_samples + var1_hist[i].importance*Array1[var1_hist[i].bin_id]
print 'Prob based sampling will pick on expectation: ' + str(int(expected_samples)) + str(' points') 

Prob based sampling will pick on expectation: 93713 points


In [7]:
###############################################################
def compute_pmi_based_importance(var1_hist,PMI,hist2D):
    final_acc_func = np.zeros((numBins,numBins), dtype=np.float)
    
    importance_vals = np.zeros(numBins)
    for i in range(numBins):
        importance_vals[var1_hist[i].bin_id] = var1_hist[i].importance

    ## for each bin in 1D hist    
    for i in range(numBins):

        pts_to_take = int(importance_vals[i]*Array1[i])
        pointsToretain_temp = pts_to_take

        ## distribute points according PMI when importance is less than 1
        if importance_vals[i] < 1.0:

            ## sort based on PMI
            var1_temp=[]    
            for j in range(numBins):
                var1_temp.append(PMI_2D(hist2D[0][i][j], PMI[i][j], j))

            ## sort PMI-based high to low
            var1_temp = sorted( var1_temp, key=attrgetter('importance'), reverse=True )

            for j in range(numBins):
                if var1_temp[j].freq <= pointsToretain_temp: 
                    final_acc_func[i][var1_temp[j].bin_id1] = 1.0
                    pointsToretain_temp = pointsToretain_temp - var1_temp[j].freq
                else:
                    final_acc_func[i][var1_temp[j].bin_id1] = pointsToretain_temp/float(var1_temp[j].freq)
                    break

        ##Take all points when 1D prob-based importance is 1.0. No need to pick based on PMI
        else: 
            final_acc_func[:][i] = 1.0
        
    print 'Number of points in expectation will be picked: ' + str(int(np.sum(final_acc_func*ArrayComb)))
    
    return final_acc_func

###############################################################
def sample_pmi_based(final_acc_func,data1,data2,arrayName1,arrayName2):
    
    numBins = len(final_acc_func[0])
    
    numPts = data1.GetPointData().GetArray(arrayName1).GetNumberOfTuples()
    dataArr1 = data1.GetPointData().GetArray(arrayName1)
    dataArr2 = data2.GetPointData().GetArray(arrayName2)
    
    range_vals1 = dataArr1.GetRange() 
    range_vals2 = dataArr2.GetRange()
    
    pdata = vtk.vtkPolyData()
    pts_arr = vtk.vtkPoints()
    sampled_arr = vtk.vtkDoubleArray()
    sampled_arr.SetName(arrayName1)
    
    for i in range(numPts):
        randval = random.uniform(0, 1)
        
        dataval1 = dataArr1.GetTuple1(i)
        bin_id1 = int(((dataval1 - range_vals1[0])/(range_vals1[1]-range_vals1[0]))*(numBins-1))
        
        dataval2 = dataArr2.GetTuple1(i)
        bin_id2 = int(((dataval2 - range_vals2[0])/(range_vals2[1]-range_vals2[0]))*(numBins-1))
        
        if randval <= final_acc_func[bin_id1][bin_id2]:
            pts_arr.InsertNextPoint(data1.GetPoint(i))
            sampled_arr.InsertNextTuple1(dataval1)
    
    pdata.SetPoints(pts_arr)
    pdata.GetPointData().AddArray(sampled_arr)
    
    return pdata

def write_vtp(filename,pdata):
    pdatawriter = vtk.vtkXMLPolyDataWriter()
    pdatawriter.SetFileName(filename)
    pdatawriter.SetInputData(pdata)
    pdatawriter.Write()

In [8]:
final_acc_func = compute_pmi_based_importance(var1_hist,PMI,hist2D)  
sampled_data = sample_pmi_based(final_acc_func,R1.GetOutput(),R2.GetOutput(),arrayName1,arrayName2) 
print 'Final number of sampled points picked is: ' + str(sampled_data.GetNumberOfPoints()) + ' and percentage: ' + \
str(100*(sampled_data.GetNumberOfPoints()/float(len(var1))))

outfile = "../output/pmi_sampled_prob.vtp"
write_vtp(outfile,sampled_data)

Number of points in expectation will be picked: 93713
Final number of sampled points picked is: 80705 and percentage: 2.58256
