### This IPython notebook defines several variations of convolutional neural networks for channel estimation. The training inputs are the preamble + preamble passed through channel; the predicted output is the channel taps that correspond to the input. We explore several ideas here:
#### (A) multi-scale convolution (learned) filters applied separately to the [preamble input] and to the [preamble thorugh channel] input
#### (B) multi-scale convolution (learned) filters applied to both (e.g., 2D convolution filters)

We make several assumptions about the channel model here as well:
* Channel length is <= 20
* Channel energy (am I saying this correctly?) is 1 (also, does normalizing channel taps by l2 norm ensure this?)
* Channel is sparse (most entries near 0, except for a few spikes)
  * Potential simplifying assumption (maybe include initially?) first entry of channel is 'large'
  
  
Questions: 
1. for my preamble, I am using +/- 1; Nikhil used 1/0 .. which is correct? (It should not matter really for training/testing since it is a simple affine transform between the two, but I want to do the "correct" thing)
2. do my assumptions make sense? for a real model I mean
3. am I adding noise correctly for the SNR I am setting
4. More of a "TODO" but...I am only training and testing on preamble inputs, not additional data -- the reasoning is that for additional data, we really want something that handles sequences (e.g., and RNN) in my opinion and this is more of an exploratoration of convolutional layers here

## ALSO NOTE: I am making a lot of things very modular on purpose..I want to discuss with everyone the problem statement again (I still feel like a lot of things are unclear/ambiguous) and then we can move a lot of this modular code to a rigid "util.py" file that everyone should import from so that we can more easily guarantee correctness and consistency and speed up development time.

In [1]:
# standard imports
import numpy as np
import scipy.signal as sig
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline

  from ._conv import register_converters as _register_converters


In [47]:
# utility functions...we really should standardize this in a Python file [TODO!!!!]
"""Generates random sequence [1 1 1 -1 1 -1 -1 ...] of length LENGTH."""
def gen_preamble(length=100):
    return np.random.randint(2, size=(1,length))*2 - 1

"""Generates N channels of length LENGTH, each with NUM_TAPS taps. This
   means that NUM_TAPS of the entries will be non-zero, and the rest will
   be 'close' to 0 (e.g., noise). 
   Example below.
   
   >>> np.around(gen_channel(),2)
   >>> array([[-0.08,  0.  , -0.06,  0.02,  0.  ,  0.02, -0.85,  0.05, -0.03,
        -0.07,  0.5 , -0.02, -0.  , -0.05, -0.  ,  0.03, -0.07, -0.04,
        -0.01,  0.08]])"""
def gen_channel(N=1,num_taps=2,length=20):
    ret = np.zeros((N, length))
    tap_idxs = np.random.randint(length, size=(N, num_taps))
    tap_vals = ((np.random.randint(10, size=(N, num_taps))+1)*(np.random.randint(2, size=(N, num_taps))*2 - 1)) / 10.
    for i in range(N):
        np.put(ret[i], tap_idxs[i], tap_vals[i])
    ret += 5e-2*np.random.randn(N,length)
    return ret / np.linalg.norm(ret,axis=1,keepdims=True)

"""Simulates passing data through a noisy channel.
   If SNR == -1, then no noise. Otherwise, uses AWGN model.
   
   Returned value has shape (1, len(channel.T) + len(data.T) - 1).
   With default settings, this means it is (1, 119)."""
def apply_channel(channel, data, snr=-1):
    ret = sig.convolve(data, channel, mode='full')
    if snr > 0:
        ret += (1./np.sqrt(snr)) * np.random.randn(len(ret))
    return ret

In [94]:
# functions for networks..should also put this in util.py!
"""Run before building a new network. Rests randomization for repeatability."""
def reset():
    tf.reset_default_graph()
    np.random.seed(0)
    tf.set_random_seed(0)
    
"""Defines the loss function."""
def define_loss(placeholders, loss_type):
    output, correct_output = placeholders
    return tf.reduce_mean(tf.reduce_sum((output-correct_output)**2, axis=1))
    
"""Defines the optimizer."""
def define_optimizer(loss, trainable_weights, optimizer, lr):
    opt = tf.train.AdamOptimizer(lr)
    gradients = opt.compute_gradients(loss, trainable_weights)
    train_step = opt.apply_gradients(gradients)
    return train_step

"""Defines a trainable variable with truncated normal initialization."""
def define_variable(name, shape, stddev):
    var = tf.get_variable(name, shape, initializer=
                    tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32),
                    dtype=tf.float32)
    return var

In [95]:
# define the networks
"""Builds the network [model 1] -- a basic convolution network; use as a base
   for the next network models.
   Elements in PARAMS:
   
   * 'preamble_len' : length of preamble; [default = 100]
   * 'channel_len' : length of channel; [default = 20]
   * 'use_max_pool': True to use max pooling in first part of net; [default = False]
   * 'loss' : loss function to use
   * 'optimizer' : optimizer to use
   * 'lr' : base learning rate
   
   """
def build_network1(params=None):
    if params == None:
        params = {'preamble_len':100, 'channel_len':20,
                  'use_max_pool':False, 'loss':"", 'optimizer':"", 'lr':4e-5}
    preamble = tf.placeholder(tf.float32, [None, params['preamble_len'], 1], name="preamble_input")
    # use same length as preamble as per discussion on April 12
    received = tf.placeholder(tf.float32, [None, params['preamble_len'], 1], name="received_preamble")
    channel_true = tf.placeholder(tf.float32, [None, params['channel_len']])
    
    inputs=[preamble,received,channel_true]
    outputs=[]
    weights=[]
    
    nets=[preamble,received]
    
    # Process PREAMBLE and RECEIVED separately through convolutions
    num_filters = [1, 30, 30, 10]
    for i in [1,2]:
        net = nets[i-1]
        for j in range(1, len(num_filters)):
            num_filter = num_filters[j]
            prev = num_filters[j-1]
            with tf.variable_scope("conv%d_%d" % (j+1, i)) as scope:
                # use same weight initializer for all, and always use 3x_ convolutions
                kernel = define_variable('conv_weights', [3, prev, num_filter], 5e-2)
                biases = define_variable('conv_biases', [num_filter], 5e-3)
                weights.extend([kernel, biases])
                # apply network
                net = tf.nn.conv1d(net, kernel, stride=1, padding='SAME')
                net = tf.nn.bias_add(net, biases)
                net = tf.nn.relu(net)
                if params['use_max_pool']:
                    net = tf.nn.max_pool(net, [1, 3, 1], [1, 2, 1], padding='SAME')
        nets[i-1] = net
        
    # Concatenate
    output = tf.concat(nets, axis=1)
    with tf.variable_scope("conv1_concat") as scope:
        kernel = define_variable('conv_weights', [3, num_filters[-1], 10], 5e-2)
        biases = define_variable('conv_biases', [10], 5e-3)
        weights.extend([kernel, biases])
        # apply network
        net = tf.nn.conv1d(net, kernel, stride=1, padding='SAME')
        net = tf.nn.bias_add(net, biases)
        net = tf.nn.relu(net)
    with tf.variable_scope("fc2_concat") as scope:
        dim = output.get_shape()[1].value*output.get_shape()[2].value
        batch_size = tf.shape(output)[0]
        
        kernel = define_variable('conv_weights', [dim, params['channel_len']], 5e-2)
        biases = define_variable('conv_biases', [params['channel_len']], 5e-3)
        weights.extend([kernel, biases])
        # apply network
        output = tf.reshape(output, [batch_size, -1])
        output = tf.nn.sigmoid(tf.matmul(output, kernel) + biases)
    
    outputs=[output]
    
    loss = define_loss([output, channel_true], params['loss'])
    train = define_optimizer(loss, weights, params['optimizer'], params['lr'])

    return inputs, outputs, weights, loss, train

In [96]:
# train network
reset()
inputs, outputs, weights, loss, train = build_network1()
num_iter=10000
batch_size=100
# use a single fixed preamble
preamble=gen_preamble()
for i in range(0,num_iter):
    channels = gen_channel(N=batch_size)