In [1]:
import sys
import subprocess
import re
import os
import time
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cmath
import operator
import random
from operator import itemgetter, attrgetter
import matplotlib.cm as cm
import vtk

## If includeing files from a different source path
# module_path = os.path.abspath(os.path.join('../scripts/'))
# if module_path not in sys.path:
#     sys.path.append(module_path)

## import MI measures
from mi_functions import *

In [2]:
# ## load vti data
file1 = '../Data/Isabel_vti/isabel_p_25.vti'
file2 = '../Data/Isabel_vti/isabel_vel_25.vti'
arrayName1 = 'ImageFile'
arrayName2 = 'Velocity'

R1 = vtk.vtkXMLImageDataReader()
R1.SetFileName(file1)
R1.Update()
dataArray1 = R1.GetOutput().GetPointData().GetArray(arrayName1)

R2 = vtk.vtkXMLImageDataReader()
R2.SetFileName(file2)
R2.Update()
dataArray2 = R2.GetOutput().GetPointData().GetArray(arrayName2)

var1=np.zeros(dataArray1.GetNumberOfTuples()) 
var2=np.zeros(dataArray2.GetNumberOfTuples()) 

for i in range(dataArray1.GetNumberOfTuples()):
    var1[i] = dataArray1.GetTuple1(i)
    var2[i] = dataArray2.GetTuple1(i)

dims = R1.GetOutput().GetDimensions()

min_var1 = np.min(var1)
max_var1 = np.max(var1)

min_var2 = np.min(var2)
max_var2 = np.max(var2)

In [3]:
percentageToStroe = 5
pointsToretain = int((len(var1)*percentageToStroe)/100.00)
print 'Percentage needed: ' + str(percentageToStroe)
print 'Points to be taken: ' + str(pointsToretain)

Percentage needed: 5
Points to be taken: 156250


In [4]:
## compute histogram and PMI
numBins=128
numSamples = np.shape(var1)[0]

hist1 = np.histogram(var1,bins=numBins)
hist2 = np.histogram(var2,bins=numBins)

Array1 = hist1[0]
Array2 = hist2[0]
ArrayComb = np.histogram2d(var1,var2,bins=numBins)[0]

In [5]:
class Histogram_1D:
    def __init__(self, freq, value, importance, bin_id):
        self.freq = freq
        self.value = value
        self.importance = importance
        self.bin_id = bin_id
    def __repr__(self):
        return repr((self.freq, self.value, self.importance, self.bin_id))
   
var1_hist=[]    
var2_hist=[] 
for i in range(numBins):
    var1_hist.append(Histogram_1D(hist1[0][i],hist1[1][i],0,i))
    var2_hist.append(Histogram_1D(hist2[0][i],hist2[1][i],0,i)) 
    
var1_hist = sorted(var1_hist, key=attrgetter('freq'))   
var2_hist = sorted(var2_hist, key=attrgetter('freq'))

In [6]:
## Compute probabiliy based importance function for var1
pointsToretain_temp = pointsToretain
pointsPerBin = int(pointsToretain_temp/float(numBins))
print 'Initial points per bin to be taken: ' + str(pointsPerBin)

currentBinFreq=0
binCounter=0
BinsLeft=numBins

#Compute the sampling/importance function
while True:
    var1_hist[binCounter].importance = 1.0
    BinsLeft = BinsLeft-1
    pointsToretain_temp = pointsToretain_temp - var1_hist[binCounter].freq
    binCounter=binCounter+1
    pointsPerBin = int(pointsToretain_temp/BinsLeft)
    currentBinFreq = var1_hist[binCounter].freq
    if (currentBinFreq > pointsPerBin):
        break
        
#Traverse the rest of the bins and assign sampling percentage
for i in range(binCounter,numBins):
    var1_hist[i].importance = pointsPerBin/float(var1_hist[i].freq)

expected_samples = 0
for i in range(numBins):
    expected_samples = expected_samples + var1_hist[i].importance*Array1[var1_hist[i].bin_id]
    
print 'Prob based sampling will pick on expectation: ' + str(int(expected_samples)) + str(' points') 

Initial points per bin to be taken: 1220
Prob based sampling will pick on expectation: 156214 points


In [7]:
def sample_prob_based(var1_hist,data,arrayName1):
    
    numPts = data.GetPointData().GetArray(arrayName1).GetNumberOfTuples()
    dataArr = data.GetPointData().GetArray(arrayName1)
    
    range_vals = dataArr.GetRange()    
    numBins = len(var1_hist)
    
    importance_vals = np.zeros(numBins)
    for i in range(numBins):
        importance_vals[var1_hist[i].bin_id] = var1_hist[i].importance
    
    pdata = vtk.vtkPolyData()
    pts_arr = vtk.vtkPoints()
    sampled_arr = vtk.vtkDoubleArray()
    sampled_arr.SetName(arrayName1)
    
    for i in range(numPts):
        randval = random.uniform(0, 1)
        
        dataval = dataArr.GetTuple1(i)
        bin_id = int(((dataval - range_vals[0])/(range_vals[1]-range_vals[0]))*(numBins-1))
        
        if randval <= importance_vals[bin_id]:
            pts_arr.InsertNextPoint(data.GetPoint(i))
            sampled_arr.InsertNextTuple1(dataval)
    
    pdata.SetPoints(pts_arr)
    pdata.GetPointData().AddArray(sampled_arr)
    
    return pdata

def write_vtp(filename,pdata):
    pdatawriter = vtk.vtkXMLPolyDataWriter()
    pdatawriter.SetFileName(filename)
    pdatawriter.SetInputData(pdata)
    pdatawriter.Write()

In [8]:
sampled_data = sample_prob_based(var1_hist,R1.GetOutput(),arrayName1) 
print 'Final sampled points: ' + str(sampled_data.GetNumberOfPoints())

outfile = "../output/sampled_prob.vtp"
write_vtp(outfile,sampled_data)

Final sampled points: 158043
