In [1]:
import numpy as np
np.random.seed(0) 

from scipy.stats import multivariate_normal
def get_Data(filename):
    with open(filename) as f:
        return f.readlines()[0]

def get_params(filename):
    with open(filename) as f:
        all_lines = f.readlines()
        h = all_lines[0].strip().split()
        h = np.array(h).astype(np.float)
        variance = float(all_lines[-1].strip())
        sigma = np.sqrt(variance) 
        return h, sigma

train_data = get_Data('train.txt')

test_data = get_Data('test.txt')

h, sigma = get_params('parameters.txt')

In [2]:
n = 2
l = 2
cluster_count = 8   

bits_map_mean = [[] for _ in range(cluster_count)]

bits_map_covar = [[] for _ in range(cluster_count)]

bits_map_prior =  np.zeros(cluster_count)

bits_map_zero_trans =  np.zeros(cluster_count)

bits_map_one_trans =  np.zeros(cluster_count)

cluster_bits = {
    0: '000', 1: '001', 2: '010', 3: '011', 4: '100', 5: '101', 6: '110', 7: '111'
}

cluster_mapper = {
    '000': 0, '001': 1, '010': 2, '011': 3, '100': 4, '101': 5, '110': 6, '111': 7
}


In [3]:
def distortion_val(I_k):
    return (int(I_k[0]) * h[0] + int(I_k[1]) * h[1] + np.random.normal(0, sigma, 1))[0]

In [4]:
def Distorted_Signal(I):
    bits_map_members = [[] for _ in range(cluster_count)]
    signal_len = len(I)
    x = np.zeros([signal_len])
    x[0] = int(I[0]) * h[1] + np.random.normal(0, sigma, 1) 
    
    for i in range(n, signal_len+1):
        I_vec = I[i-n: i]
        x[i-1] = distortion_val(I_vec)
        if (i-3) > 0:
            cur_val = I[i-3: i]
            cur_cluster = cluster_mapper[cur_val]
            bits_map_members[cur_cluster] += [x[i-l: i]]
            if i < signal_len:
                #ToDo:cluster_to_cluster_probability
                if (I[i] == '0'):
                    bits_map_zero_trans[cur_cluster] += 1
                elif (I[i] == '1'):
                    bits_map_one_trans[cur_cluster] += 1
                    
        
    for cur_cluster in range(cluster_count):
        #cur_cluster = bits_map_to_w[I_temp]
        bits_map_members[cur_cluster] = np.array(bits_map_members[cur_cluster])
        #print(bits_map_members[cur_cluster].shape)
        bits_map_mean[cur_cluster] = bits_map_members[cur_cluster].mean(0)
        bits_map_covar[cur_cluster] = np.cov(bits_map_members[cur_cluster].T)
        #set_prior_probablity
        bits_map_prior[cur_cluster] = bits_map_members[cur_cluster].shape[0] / (signal_len - n + 1)
        
        bits_map_one_trans[cur_cluster] = bits_map_one_trans[cur_cluster]/(bits_map_zero_trans[cur_cluster] + bits_map_one_trans[cur_cluster])
        bits_map_zero_trans[cur_cluster] = 1. - bits_map_one_trans[cur_cluster]
        
#         print(w_to_bits_map[cur_cluster], ':', bits_map_members[cur_cluster],
#              bits_map_mean[cur_cluster],bits_map_covar[cur_cluster],bits_map_prior[cur_cluster],
#              bits_map_zero_trans[cur_cluster], bits_map_one_trans[cur_cluster])
        
    return x

In [5]:
x = Distorted_Signal(I=train_data)

In [6]:
def Method_1(w_i, w_i_prev, x_bar_k):
    # eq 5
    p_w = 0.
    if w_i_prev is None:  
        p_w = bits_map_prior[w_i] 
    # eq 4    
    elif cluster_bits[w_i][1:] == cluster_bits[w_i_prev][:-1]: 
        if cluster_bits[w_i][0] == '0':
            p_w = bits_map_zero_trans[w_i]
        else:
            p_w = bits_map_one_trans[w_i] 
            
    # conditional probability
    p_x_w = multivariate_normal.pdf(x_bar_k, mean=bits_map_mean[w_i], cov=bits_map_covar[w_i])     
    
    # to dismiss dbz
    return np.log((p_w * p_x_w)+np.finfo(float).eps) 

def Method_2(w_i, x_bar_k):
    # eq 6
    return np.linalg.norm(x_bar_k - bits_map_mean[w_i]) 

In [7]:
def file_save(filename, input_val):
    f = open(filename, "w")
    f.write(input_val)
    f.close()

In [8]:
def Method_1_run(I, train_data): 
    
    signal_len = len(I) 
    x = np.zeros([signal_len])
    x[0] = int(I[0]) * h[1] + np.random.normal(0, sigma, 1) 
    
    cost_matrix = np.zeros([signal_len, cluster_count])
    parents = np.zeros([signal_len, cluster_count]).astype(int)
    
    
    for i in range(1, signal_len): 
        I_vec = I[i-n+1 : i+1]
        x[i] = distortion_val(I_vec)
        x_bar_k = x[i-1 : i+1]
        
        for j in range(cluster_count):
            temp = []
            for k in range(cluster_count):
                w_i_prev = None
                if i != 1:
                    w_i_prev = k
                temp.append(cost_matrix[i-1, k] + Method_1(w_i=j, w_i_prev=w_i_prev, x_bar_k=x_bar_k))
            
            best_succesor = np.argmax(temp)
            cost_matrix[i, j] = temp[best_succesor]
            parents[i, j] = best_succesor
    
    decoded_class = []
    last_bit = cost_matrix[-1].argmax()
    decoded_class.insert(0,last_bit)
    parent = parents[-1, last_bit]

    for i in range(cost_matrix.shape[0]-1, 0, -1):
        decoded_class.insert(0,parent)
        parent = parents[i, decoded_class[0]]
        
    correct = 0
    inferred_bits = ''
    for i in range(cost_matrix.shape[0]):
        inferred_bits+=(cluster_bits[decoded_class[i]][-1])
        if cluster_bits[decoded_class[i]][-1] == train_data[i]:
            correct += 1
    
    print("accuracy:",(100*correct)/cost_matrix.shape[0])
    file_save("out1.txt",inferred_bits)

In [9]:
Method_1_run(test_data, train_data)

accuracy: 54.0


In [10]:
def Method_2_run(I, train_data): 
    signal_len = len(I) 
    x = np.zeros([signal_len])
    x[0] = int(I[0]) * h[1] + np.random.normal(0, sigma, 1) # as there is no k-1 in this case
    
    decoded_class = ['0',]
    for i in range(n, signal_len+1):
        I_vec = I[i-n: i]
        x[i-1] = distortion_val(I_vec)
        x_bar_k = x[i-l: i]
        
        d_val = []
        for j in range(cluster_count):
            d_w_k = Method_2(j, x_bar_k)
            d_val.append(d_w_k)
            
        best_cluster = np.argmin(d_val)        
        decoded_class.append(cluster_bits[best_cluster][-1])
        
    correct = 0
    for i in range(len(decoded_class)):
        if decoded_class[i] == train_data[i]:
            correct += 1
    
    print("accuracy:",(100*correct)/len(decoded_class))
    file_save("out2.txt",''.join(decoded_class))

In [11]:
Method_2_run(test_data, train_data)

accuracy: 99.0
