In [172]:
from segpy.reader import create_reader
import numpy as np
import os
import pywt
from scipy.fftpack import dct
from scipy.fftpack import idct

In [173]:
dataset = []

directory = r'C:\Users\user\Desktop\2D\Correlated_Shot_Gathers'

for file_name in os.listdir(directory):
    
    if file_name != ".segpy":
        
        with open(directory + "\\" + file_name, 'rb') as segy_in_file:

            seg_y_dataset = create_reader(segy_in_file, endian='<')

            for i in seg_y_dataset.trace_indexes():
                dataset.append(seg_y_dataset.trace_samples(i))
            
dataset = np.array(dataset)

In [174]:
max_dataset = np.max(dataset, axis = 1)
min_dataset = np.min(dataset, axis = 1)

In [175]:
dataset = (dataset - np.repeat(np.expand_dims(min_dataset,axis = 1), 4001, axis = 1))/(np.repeat(np.expand_dims(max_dataset,axis = 1), 4001, axis = 1) - np.repeat(np.expand_dims(min_dataset,axis = 1), 4001, axis = 1))

In [176]:
def compute_dct(dataset, block_size = 32):
    
    dataset = dataset[:,:len(dataset[0]) - len(dataset[0])%block_size]
    dataset = dataset.reshape((len(dataset), len(dataset[0])//block_size, block_size))
    dataset = dct(dataset, norm = 'ortho', axis = 2)
    
    return dataset.reshape((len(dataset), dataset.shape[1]*dataset.shape[2]))

In [177]:
from sklearn.cluster import KMeans

def quantize(signal, levels):
    
    kmeans = KMeans(n_clusters = levels).fit(signal.reshape(-1,1))
    
    return kmeans.labels_, kmeans.cluster_centers_.flatten()

In [178]:
def symbol_seqsymbols, levels):

    return {i:np.mean(symbols == i) for i in np.arange(levels)}

SyntaxError: invalid syntax (<ipython-input-178-c1369a467859>, line 1)

In [None]:
def get_huffman_codes(symbol_probabilities):
    
    huffman_code = {str(key):'' for key in symbol_probabilities.keys()}
    symbol_probabilities = {str(key):symbol_probabilities[key] for key in symbol_probabilities.keys()}
    
    while(len(symbol_probabilities) != 1):
        
        key1 = min(symbol_probabilities, key=symbol_probabilities.get)
        value1 = symbol_probabilities[key1]
        symbol_probabilities.pop(key1)
        
        key2 = min(symbol_probabilities, key=symbol_probabilities.get)
        value2 = symbol_probabilities[key2]
        symbol_probabilities.pop(key2)
        
        for symbol in key1.split('|'):
            huffman_code[symbol] += '1'
            
        for symbol in key2.split('|'):
            huffman_code[symbol] += '0'
            
        symbol_probabilities[key1 + '|' + key2] = value1 + value2
            
    return {int(key):huffman_code[key][::-1] for key in huffman_code.keys()}

In [None]:
def compute_idct(dataset, block_size = 32):
    
    dataset = dataset.reshape((len(dataset), len(dataset[0])//block_size, block_size))
    dataset = idct(dataset, norm = 'ortho', axis = 2)
    
    return dataset.reshape((len(dataset), dataset.shape[1]*dataset.shape[2]))

In [None]:
def get_metrics(x,x_pred):
    nmse = np.sum(np.square(x - x_pred))/np.sum(np.square(x))
    nrmse = np.sqrt(np.mean(np.square(x - x_pred)))/(np.amax(x) - np.amin(x))
    snr = -10*np.log10(nmse)
    return nmse, nrmse, snr

In [179]:
def get_cr_and_metrics(dataset, block_size = 32, level = 8):
    
    nmse, nrmse, snr = np.zeros(len(dataset)), np.zeros(len(dataset)), np.zeros(len(dataset))
    
    dct_dataset = compute_dct(dataset, block_size)
    
    quantized_dct_dataset = np.zeros(np.shape(dct_dataset))
    
    total_length = 0
    
    for i, sample in enumerate(dct_dataset):
        sym, val = quantize(sample, level)
        quantized_dct_dataset[i] = np.array([val[j] for j in sym])
        
        for key, value in get_huffman_codes(get_probability_dictionary(sym, level)).items():
            total_length += np.sum(sym == key)*len(value)
        
    reconstructed_dataset = compute_idct(quantized_dct_dataset, block_size)
    
    for j in range(len(dataset)):
        nmse[j], nrmse[j], snr[j] = get_metrics(dataset[j,:len(reconstructed_dataset[0])], reconstructed_dataset[j])
        
    return 4001*64*len(dataset)/total_length, np.mean(nmse), np.mean(nrmse), np.mean(snr)

In [181]:
get_cr_and_metrics(dataset[900:1000], block_size = 32, level = 2)

(64.016, 0.02468293508842079, 0.07496052558016722, 17.226486039779356)

In [182]:
get_cr_and_metrics(dataset[900:1000], block_size = 32, level = 4)

(60.81644673715811,
 0.012699658914758423,
 0.05353315646203852,
 20.21562790362923)

In [183]:
get_cr_and_metrics(dataset[900:1000], block_size = 32, level = 8)

(56.31394213429278,
 0.0038784370393522734,
 0.02963951549634352,
 25.379319127371154)

In [184]:
get_cr_and_metrics(dataset[900:1000], block_size = 32, level = 16)

(50.19534044909681,
 0.0009332954307914011,
 0.014462819529840434,
 31.73401582702686)

In [185]:
get_cr_and_metrics(dataset[900:1000], block_size = 32, level = 32)

(41.03333285259415,
 0.00018965165385133182,
 0.006436731075366694,
 38.944619141611795)

In [186]:
get_cr_and_metrics(dataset[900:1000], block_size = 32, level = 64)

(30.175943057143193,
 3.0992785521815316e-05,
 0.0025659117798418683,
 47.181498937837894)

In [188]:
get_cr_and_metrics(dataset[900:1000], block_size = 32, level = 128)

(20.3858815287182,
 3.686440832473809e-06,
 0.0008750103959432993,
 56.796527051268484)

In [189]:
get_cr_and_metrics(dataset[900:1000], block_size = 32, level = 1)

  return 4001*64*len(dataset)/total_length, np.mean(nmse), np.mean(nrmse), np.mean(snr)


(inf, 0.9695774326130164, 0.5057045108468173, 0.1341764919036728)