### This IPython notebook defines several variations of convolutional neural networks for channel estimation. The training inputs are the preamble + preamble passed through channel; the predicted output is the channel taps that correspond to the input. We explore several ideas here:
#### (A) multi-scale convolution (learned) filters applied separately to the [preamble input] and to the [preamble thorugh channel] input
#### (B) multi-scale convolution (learned) filters applied to both (e.g., 2D convolution filters)

<pre>
model 1:
   preamble -> conv1_1 -> conv2_1 -> conv3_1 ->\
                                                concat -> conv1 -> fc2 -> channel
   received -> conv1_2 -> conv2_2 -> conv3_2 ->/
   
model 2:
   preamble -> conv1 -> conv2 -> conv3 ->\
                                          concat -> conv1 -> fc2 -> channel
   received -> conv1 -> conv2 -> conv3 ->/
   
model 3:
     [   |        |    ]
     [preamble received] -> conv1 -> conv2 -> conv3 -> conv4 -> fc2 -> channel
     [   |        |    ]
</pre>

We make several assumptions about the channel model here as well:
* Channel length is <= 20
* Channel energy (am I saying this correctly?) is 1 (also, does normalizing channel taps by l2 norm ensure this?)
* Channel is sparse (most entries near 0, except for a few spikes)
  * Potential simplifying assumption (maybe include initially?) first entry of channel is 'large'
  
  
Questions: 
1. for my preamble, I am using +/- 1; Nikhil used 1/0 .. which is correct? (It should not matter really for training/testing since it is a simple affine transform between the two, but I want to do the "correct" thing)
2. do my assumptions make sense? for a real model I mean
3. am I adding noise correctly for the SNR I am setting
4. More of a "TODO" but...I am only training and testing on preamble inputs, not additional data -- the reasoning is that for additional data, we really want something that handles sequences (e.g., and RNN) in my opinion and this is more of an exploratoration of convolutional layers here

## ALSO NOTE: I am making a lot of things very modular on purpose..I want to discuss with everyone the problem statement again (I still feel like a lot of things are unclear/ambiguous) and then we can move a lot of this modular code to a rigid "util.py" file that everyone should import from so that we can more easily guarantee correctness and consistency and speed up development time.

In [1]:
# standard imports
import numpy as np
import scipy.signal as sig
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline

  from ._conv import register_converters as _register_converters


In [2]:
# utility functions...we really should standardize this in a Python file [TODO!!!!]
"""Generates random sequence [1 1 1 -1 1 -1 -1 ...] of length LENGTH."""
def gen_preamble(length=100):
    return np.random.randint(2, size=(1,length))*2 - 1

"""Generates N channels of length LENGTH, each with NUM_TAPS taps. This
   means that NUM_TAPS of the entries will be non-zero, and the rest will
   be 'close' to 0 (e.g., noise). 
   Example below.
   
   >>> np.around(gen_channel(),2)
   >>> array([[-0.08,  0.  , -0.06,  0.02,  0.  ,  0.02, -0.85,  0.05, -0.03,
        -0.07,  0.5 , -0.02, -0.  , -0.05, -0.  ,  0.03, -0.07, -0.04,
        -0.01,  0.08]])"""
def gen_channel(N=1,num_taps=2,length=20):
    ret = np.zeros((N, length))
    tap_idxs = np.random.randint(length, size=(N, num_taps))
    tap_vals = ((np.random.randint(10, size=(N, num_taps))+1)*\
                (np.random.randint(2, size=(N, num_taps))*2 - 1))\
                / 10.
    for i in range(N):
        np.put(ret[i], tap_idxs[i], tap_vals[i])
    ret += 5e-2*np.random.randn(N,length)
    return ret / np.linalg.norm(ret,axis=1,keepdims=True)

"""Simulates passing data through a noisy channel.
   If SNR == -1, then no noise. Otherwise, uses AWGN model.
   
   Returned value has shape (1, len(channel.T) + len(data.T) - 1).
   With default settings, this means it is (1, 119)."""
def apply_channel(channel, data, snr=-1):
    ret = sig.convolve(data, channel, mode='full')
    if snr > 0:
        ret += (1./np.sqrt(snr)) * np.random.randn(len(ret))
    return ret

In [172]:
# functions for networks..should also put this in util.py!
"""Run before building a new network. Rests randomization for repeatability."""
def reset():
    tf.reset_default_graph()
    np.random.seed(0)
    tf.set_random_seed(0)
    
"""Defines the loss function."""
def define_loss(placeholders, loss_type):
    output, correct_output = placeholders
    return tf.reduce_mean(tf.reduce_sum((output-correct_output)**2, axis=1))
    
"""Defines the optimizer."""
def define_optimizer(loss, trainable_weights, optimizer, lr):
    opt = tf.train.AdamOptimizer(lr)
    gradients = opt.compute_gradients(loss, trainable_weights)
    train_step = opt.apply_gradients(gradients)
    return train_step

"""Defines a trainable variable with truncated normal initialization."""
def define_variable(name, shape, stddev):
    var = tf.get_variable(name, shape, initializer=
                    tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32),
                    dtype=tf.float32)
    return var
"""* 'preamble_len' : length of preamble; [default = 100]
   * 'channel_len' : length of channel; [default = 20]
   * 'use_max_pool': True to use max pooling in first part of net; [default = False]
   * 'loss' : loss function to use
   * 'optimizer' : optimizer to use
   * 'lr' : base learning rate
   
   """
def load_params(params, default_params):
    if params == None:
        params = {}
    for k in default_params.keys():
        if k not in params:
            params[k] = default_params[k]
    return params

## These networks assume 1 fixed preamble.

In [182]:
# define the networks
"""Builds the network [model 1] -- a basic convolution network; use as a base
   for the next network models.
   
   preamble -> conv1_1 -> conv2_1 -> conv3_1 ->\
                                                concat -> conv1 -> fc2 -> channel_est
   received -> conv1_2 -> conv2_2 -> conv3_2 ->/
"""    
def build_network1(params=None):
    default_params = {'use_max_pool':False, 'loss':"", 'optimizer':"", 'lr':1e-4,
                   'filters':[30,30,10,10], 'filter_sizes':[3,3,3,3]}
    params = load_params(params,default_params)
            
    preamble = tf.placeholder(tf.float32, [1, params['preamble_len'], 1], name="preamble_input")
    # use same length as preamble as per discussion on April 12
    received = tf.placeholder(tf.float32, [None, params['preamble_len'], 1], name="received_preamble")
    channel_true = tf.placeholder(tf.float32, [None, params['channel_len']])
    batch_size = tf.shape(received)[0]
    
    inputs=[preamble,received,channel_true]
    outputs=[]
    weights=[]
    
    nets=[preamble,received]
    
    # Process PREAMBLE and RECEIVED separately through convolutions
    num_filters = [1] + params['filters']
    for i in [1,2]:
        net = nets[i-1]
        for j in range(1, len(num_filters)-1):
            num_filter = num_filters[j]
            prev = num_filters[j-1]
            with tf.variable_scope("conv%d_%d" % (j+1, i)) as scope:
                # use same weight initializer for all, and always use 3x_ convolutions
                kernel = define_variable('conv_weights', [params['filter_sizes'][j-1],
                                                          prev, num_filter], 5e-2)
                biases = define_variable('conv_biases', [num_filter], 5e-3)
                weights.extend([kernel, biases])
                # apply network
                net = tf.nn.conv1d(net, kernel, stride=1, padding='SAME')
                net = tf.nn.bias_add(net, biases)
                net = tf.nn.relu(net)
                if params['use_max_pool']:
                    net = tf.nn.max_pool(net, [1, 3, 1], [1, 2, 1], padding='SAME')
        nets[i-1] = net
        
    # Concatenate
    nets[0] = tf.tile(nets[0], [batch_size, 1, 1])
    output = tf.concat(nets, axis=1)
    num_concat_layers = len(params['filter_sizes'])
    with tf.variable_scope("conv1_concat") as scope:
        kernel = define_variable('conv_weights', [params['filter_sizes'][-1], 
                                                  num_filters[-2], num_filters[-1]], 5e-2)
        biases = define_variable('conv_biases', [num_filters[-1]], 5e-3)
        weights.extend([kernel, biases])
        # apply network
        net = tf.nn.conv1d(net, kernel, stride=1, padding='SAME')
        net = tf.nn.bias_add(net, biases)
        net = tf.nn.relu(net)
    with tf.variable_scope("fc2_concat") as scope:
        dim = output.get_shape()[1].value*output.get_shape()[2].value
        batch_size = tf.shape(output)[0]
        
        kernel = define_variable('conv_weights', [dim, params['channel_len']], 5e-2)
        biases = define_variable('conv_biases', [params['channel_len']], 5e-3)
        weights.extend([kernel, biases])
        # apply network
        output = tf.reshape(output, [batch_size, -1])
        output = tf.matmul(output, kernel) + biases
    
    outputs=[output]
    
    loss = define_loss([output, channel_true], params['loss'])
    train = define_optimizer(loss, weights, params['optimizer'], params['lr'])

    return inputs, outputs, weights, loss, train

"""Builds the network [model 2] -- a basic convolution network; use as a base
   for the next network models. [difference from model 1: share initial convolutions]
   
   preamble -> conv1 -> conv2 -> conv3 ->\
                                          concat -> conv1 -> fc2 -> channel_est
   received -> conv1 -> conv2 -> conv3 ->/
   
   
   """
def build_network2(params=None):
    default_params = {'use_max_pool':False, 'loss':"", 'optimizer':"", 'lr':1e-4,
                   'filters':[30,30,10,10], 'filter_sizes':[3,3,3,3]}
    params = load_params(params,default_params)
            
    preamble = tf.placeholder(tf.float32, [1, params['preamble_len'], 1], name="preamble_input")
    # use same length as preamble as per discussion on April 12
    received = tf.placeholder(tf.float32, [None, params['preamble_len'], 1], name="received_preamble")
    channel_true = tf.placeholder(tf.float32, [None, params['channel_len']])
    batch_size = tf.shape(received)[0]
    
    inputs=[preamble,received,channel_true]
    outputs=[]
    weights=[]
    
    nets=[preamble,received]
    
    # Process PREAMBLE and RECEIVED separately through convolutions
    num_filters = [1] + params['filters']
    
    for j in range(1, len(num_filters)-1):
        num_filter = num_filters[j]
        prev = num_filters[j-1]
        with tf.variable_scope("conv%d" % j) as scope:
            # use same weight initializer for all, and always use 3x_ convolutions
            kernel = define_variable('conv_weights', [params['filter_sizes'][j-1],
                                                      prev, num_filter], 5e-2)
            biases = define_variable('conv_biases', [num_filter], 5e-3)
            weights.extend([kernel, biases])
            # apply network
            for i in range(2):
                net = nets[i]
                net = tf.nn.conv1d(net, kernel, stride=1, padding='SAME')
                net = tf.nn.bias_add(net, biases)
                net = tf.nn.relu(net)
                if params['use_max_pool']:
                    net = tf.nn.max_pool(net, [1, 3, 1], [1, 2, 1], padding='SAME')
                nets[i] = net

    # Concatenate
    nets[0] = tf.tile(nets[0], [batch_size, 1, 1])
    output = tf.concat(nets, axis=1)
    with tf.variable_scope("conv1_concat") as scope:
        kernel = define_variable('conv_weights', [params['filter_sizes'][-1],
                                                  num_filters[-2], num_filters[-1]], 5e-2)
        biases = define_variable('conv_biases', [num_filters[-1]], 5e-3)
        weights.extend([kernel, biases])
        # apply network
        net = tf.nn.conv1d(net, kernel, stride=1, padding='SAME')
        net = tf.nn.bias_add(net, biases)
        net = tf.nn.relu(net)
    with tf.variable_scope("fc2_concat") as scope:
        dim = output.get_shape()[1].value*output.get_shape()[2].value
        batch_size = tf.shape(output)[0]
        
        kernel = define_variable('conv_weights', [dim, params['channel_len']], 5e-2)
        biases = define_variable('conv_biases', [params['channel_len']], 5e-3)
        weights.extend([kernel, biases])
        # apply network
        output = tf.reshape(output, [batch_size, -1])
        output = tf.matmul(output, kernel) + biases
    
    outputs=[output]
    
    loss = define_loss([output, channel_true], params['loss'])
    train = define_optimizer(loss, weights, params['optimizer'], params['lr'])

    return inputs, outputs, weights, loss, train

"""Builds the network [model 3] -- a basic convolution network; use as a base
   for the next network models. [difference from model 1: wide convolutions]
   
     [   |        |    ]
     [preamble received] -> conv1 -> conv2 -> conv3 -> conv4 -> fc2 -> channel_est
     [   |        |    ]
   
   
   """
def build_network3(params=None):
    default_params = {'use_max_pool':False, 'loss':"", 'optimizer':"", 'lr':1e-4,
                   'filters':[30,30,10,10], 'filter_sizes':[3,3,3,3]}
    params = load_params(params,default_params)
            
    preamble = tf.placeholder(tf.float32, [1, params['preamble_len'], 1], name="preamble_input")
    # use same length as preamble as per discussion on April 12
    received = tf.placeholder(tf.float32, [None, params['preamble_len'], 1], name="received_preamble")
    channel_true = tf.placeholder(tf.float32, [None, params['channel_len']])
    batch_size = tf.shape(received)[0]
    
    temp = tf.tile(preamble, [batch_size, 1, 1])
    network_input = tf.concat([temp, received], axis=2)
    
    inputs=[preamble,received,channel_true]
    outputs=[]
    weights=[]
    
    output = network_input
    
    # Process PREAMBLE and RECEIVED separately through convolutions
    num_filters = [2] + params['filters']
    
    for j in range(1, len(num_filters)):
        num_filter = num_filters[j]
        prev = num_filters[j-1]
        with tf.variable_scope("conv%d" % j) as scope:
            # use same weight initializer for all, and always use 3x_ convolutions
            kernel = define_variable('conv_weights', [params['filter_sizes'][j-1],
                                                      prev, num_filter], 5e-2)
            biases = define_variable('conv_biases', [num_filter], 5e-3)
            weights.extend([kernel, biases])
            # apply network
            output = tf.nn.conv1d(output, kernel, stride=1, padding='SAME')
            output = tf.nn.bias_add(output, biases)
            output = tf.nn.relu(output)
            if params['use_max_pool']:
                output = tf.nn.max_pool(output, [1, 3, 1], [1, 2, 1], padding='SAME')
                
    with tf.variable_scope("fc5") as scope:
        dim = output.get_shape()[1].value*output.get_shape()[2].value
        batch_size = tf.shape(output)[0]
        
        kernel = define_variable('conv_weights', [dim, params['channel_len']], 5e-2)
        biases = define_variable('conv_biases', [params['channel_len']], 5e-3)
        weights.extend([kernel, biases])
        # apply network
        output = tf.reshape(output, [batch_size, -1])
        output = tf.matmul(output, kernel) + biases
    
    outputs=[output]
    
    loss = define_loss([output, channel_true], params['loss'])
    train = define_optimizer(loss, weights, params['optimizer'], params['lr'])

    return inputs, outputs, weights, loss, train

In [191]:
# training a network
def train_net(params=None):
    default_params = {'network_option':build_network1, 'num_iter':1000, 'batch_size':10,
                      'snr':-1, 'num_preambles':1, 'fixed_preamble':True, 'num_taps':2, 
                      'preamble_len':100, 'channel_len':20}
    params = load_params(params, default_params)
    
    reset()
    inputs, outputs, weights, loss, train = params['network_option'](params)
    num_iter=params['num_iter']
    batch_size=params['batch_size']
    # use a single fixed preamble
    if params['fixed_preamble']:
        preambles=[gen_preamble(length=params['preamble_len']) for _ in range(params['num_preambles'])]
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    for i in range(0,num_iter):
        if params['fixed_preamble']:
            preamble = preambles[np.random.randint(len(preambles))]
        else:
            preamble = gen_preamble(length=params['preamble_len'])
        # generate data
        channels = gen_channel(N=params['batch_size'])
        received = apply_channel(channels, preamble, snr=params['snr'])
        #channels = channels.reshape((params['batch_size'],-1,1))
        received = received.reshape((params['batch_size'],-1,1))[:,:params['preamble_len'],:]

        # train
        sess.run(train, feed_dict={inputs[0]:preamble.reshape((1, params['preamble_len'], 1)),
                         inputs[1]:received,
                         inputs[2]:channels})
        if i % 100 == 0:
            l = sess.run(loss, feed_dict={inputs[0]:preamble.reshape((1, params['preamble_len'], 1)),
                         inputs[1]:received,
                         inputs[2]:channels})
            print(i,l)
    return inputs, outputs, weights, loss, train, preamble, sess

In [192]:
inputs, outputs, weights, loss, train, preamble, sess = train_net({'network_option':build_network1})

0 1.001707
100 0.9566439
200 0.5922473
300 0.10038408
400 0.037210785
500 0.020201355
600 0.011595882
700 0.008806557
800 0.0075699305
900 0.0067978897


In [193]:
inputs, outputs, weights, loss, train, preamble, sess = train_net({'network_option':build_network2})

0 1.0104367
100 0.9550344
200 0.6552671
300 0.19192454
400 0.07181239
500 0.023382552
600 0.015320713
700 0.012510222
800 0.007581125
900 0.006582043


In [194]:
inputs, outputs, weights, loss, train, preamble, sess = train_net({'network_option':build_network3})

0 0.99624884
100 0.9908497
200 0.8497345
300 0.26192468
400 0.11839296
500 0.021823434
600 0.007299325
700 0.007116267
800 0.0037790383
900 0.0041162595


In [123]:
preamble=gen_preamble()
channels = gen_channel(N=30)
received = apply_channel(channels, preamble, snr=-1)
print(preamble.shape, channels.shape, received.shape)

(1, 100) (30, 20) (30, 119)


In [150]:
np.around(channel,5)

array([[-4.0160e-02,  2.0000e-04, -1.0650e-02, -1.7190e-02,  1.9470e-02,
         1.4550e-02,  8.2606e-01, -5.0860e-02, -6.0600e-03, -1.0610e-02,
         5.8630e-02, -3.9360e-02,  7.4200e-03,  2.6820e-02, -2.9150e-02,
         2.2470e-02, -5.4265e-01,  2.5170e-02,  1.6300e-02, -9.8970e-02]])

In [151]:
sess.run(outputs[0], feed_dict={inputs[0]:preamble.reshape((1, 100, 1)),
                         inputs[1]:received})

array([[-0.02169779, -0.01238012,  0.07508423, -0.03988812,  0.06601575,
         0.07892787,  0.65117973, -0.00265261, -0.01810948, -0.04221896,
         0.00603446,  0.02658754, -0.01491979,  0.00070975, -0.00225196,
        -0.0036241 , -0.02522373, -0.01658292,  0.05722206,  0.03272058]],
      dtype=float32)

## Now we consider networks for channel estimation and preambles chosen from a fixed batch

In [None]:
"""Builds the network [model 3] -- a basic convolution network; use as a base
   for the next network models. [difference from model 1: wide convolutions]
   
     [   |        |    ]
     [preamble received] -> conv1 -> conv2 -> conv3 -> conv4 -> fc2 -> channel_est
     [   |        |    ]
   
   
   """
def build_network3(params=None):
    default_params = {'preamble_len':100, 'channel_len':20, 'num_taps':2,
                   'use_max_pool':False, 'loss':"", 'optimizer':"", 'lr':1e-4,
                   'filters':[30,30,10,10], 'filter_sizes':[3,3,3,3]}
    params = load_params(params,default_params1)
            
    preamble = tf.placeholder(tf.float32, [1, params['preamble_len'], 1], name="preamble_input")
    # use same length as preamble as per discussion on April 12
    received = tf.placeholder(tf.float32, [None, params['preamble_len'], 1], name="received_preamble")
    channel_true = tf.placeholder(tf.float32, [None, params['channel_len']])
    batch_size = tf.shape(received)[0]
    
    temp = tf.tile(preamble, [batch_size, 1, 1])
    network_input = tf.concat([temp, received], axis=2)
    
    inputs=[preamble,received,channel_true]
    outputs=[]
    weights=[]
    
    output = network_input
    
    # Process PREAMBLE and RECEIVED separately through convolutions
    num_filters = [2] + params['filters']
    
    for j in range(1, len(num_filters)):
        num_filter = num_filters[j]
        prev = num_filters[j-1]
        with tf.variable_scope("conv%d" % j) as scope:
            # use same weight initializer for all, and always use 3x_ convolutions
            kernel = define_variable('conv_weights', [params['filter_sizes'][j-1],
                                                      prev, num_filter], 5e-2)
            biases = define_variable('conv_biases', [num_filter], 5e-3)
            weights.extend([kernel, biases])
            # apply network
            output = tf.nn.conv1d(output, kernel, stride=1, padding='SAME')
            output = tf.nn.bias_add(output, biases)
            output = tf.nn.relu(output)
            if params['use_max_pool']:
                output = tf.nn.max_pool(output, [1, 3, 1], [1, 2, 1], padding='SAME')
                
    with tf.variable_scope("fc5") as scope:
        dim = output.get_shape()[1].value*output.get_shape()[2].value
        batch_size = tf.shape(output)[0]
        
        kernel = define_variable('conv_weights', [dim, params['channel_len']], 5e-2)
        biases = define_variable('conv_biases', [params['channel_len']], 5e-3)
        weights.extend([kernel, biases])
        # apply network
        output = tf.reshape(output, [batch_size, -1])
        output = tf.matmul(output, kernel) + biases
    
    outputs=[output]
    
    loss = define_loss([output, channel_true], params['loss'])
    train = define_optimizer(loss, weights, params['optimizer'], params['lr'])

    return inputs, outputs, weights, loss, train