[View in Colaboratory](https://colab.research.google.com/github/plushvoxel/Project-Lernende-Agenten-colab/blob/master/%22Working%22Version1.3.ipynb)

#### Copyright 2017 Google LLC.

In [0]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Creating a NN

## Setup

First, let's load and prepare the data.

In [0]:
from __future__ import print_function

import math
from urllib import request
from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
import tensorflow as tf
from tensorflow.python.data import Dataset
from google.colab import files
from tarfile import open as taropen
from struct import unpack


In [0]:
training_set_size = 10 #@param {type:"slider", min:1, max:3000, step:1}
validating_set_size = 2 #@param {type:"slider", min:1, max:1000, step:1}
test_set_size = 2 #@param {type:"slider", min:1, max:1000, step:1}
learning_rate = 0.03 #@param ["3", "1", "0.3", "0.1", "0.03", "0.01", "0.003", "0.001", "0.0003", "0.0001"] {type:"raw"}
activation_function = "RELU" #@param ["RELU", "Sigmoid", "Tanh"]
regression = "None" #@param ["None", "L1", "L2"]
regression_rate = 3 #@param ["3", "1", "0.3", "0.1", "0.03", "0.01", "0.003", "0.001"] {type:"raw"}
steps = 2 #@param {type:"slider", min:1, max:100, step:1}
batch_size = 5 #@param {type:"slider", min:1, max:20, step:1}
model = [2048, 500]

In [140]:
tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

request.urlretrieve("https://github.com/plushvoxel/Project-Lernende-Agenten-Data-Generator/blob/master/iq.tar?raw=true", "iq.tar")
tar = taropen("iq.tar")

data = dict()
MODKEY = "mod"

for member in tar.getmembers():
  
  modulation = member.name[3:5]
  if modulation == "am":
    modulation = 0
  else:
    modulation = 1
  if not MODKEY in data:
    data[MODKEY] = [modulation]
  else:
    data[MODKEY].append(modulation)
  with tar.extractfile(member) as f:
    buffer = f.read()
    num_floats = len(buffer)//4
    floats = unpack("f"*num_floats, buffer)
    i = floats[0::2]
    q = floats[1::2]
    for j in range(len(i)):
      ikey = "i{:05d}".format(j)
      qkey = "q{:05d}".format(j)
      if not ikey in data:
        data[ikey] = [i[j]]
      else:
        data[ikey].append(i[j])
      if not qkey in data:
        data[qkey] = [q[j]]
      else:
        data[qkey].append(q[j])        
signal_dataframe = pd.DataFrame(data=data)
signal_dataframeReal = signal_dataframe.copy()
signal_dataframe = signal_dataframe.reindex(
    np.random.permutation(signal_dataframe.index))
signal_dataframe

Unnamed: 0,i00000,i00001,i00002,i00003,i00004,i00005,i00006,i00007,i00008,i00009,...,q02038,q02039,q02040,q02041,q02042,q02043,q02044,q02045,q02046,q02047
28,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0
58,1.0,-1.0,-0.8,0.1,-0.1,1.0,0.9,-0.3,-0.8,-0.8,...,1.0,1.0,0.7,-0.4,0.9,-0.9,-1.0,-0.7,1.0,0.1
52,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0
83,-0.4,-1.0,-0.7,0.1,0.9,0.9,0.2,-0.7,-1.0,-0.5,...,-0.9,-0.9,-0.2,0.7,1.0,0.5,-0.4,-1.0,-0.7,0.1
81,1.0,-0.9,0.5,0.7,-0.7,0.1,-0.7,-0.9,0.7,0.9,...,0.4,-0.9,0.4,0.9,0.9,0.5,-0.1,1.0,-0.7,-0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,0.0
66,0.9,0.5,-0.8,-1.0,-0.6,-0.3,-0.2,0.4,-0.4,0.7,...,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8
41,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0
108,0.2,-0.2,-0.8,1.0,0.9,0.1,-0.1,-1.0,-0.9,0.2,...,0.9,1.0,0.9,-0.7,0.7,-1.0,-1.0,-0.9,1.0,-0.2


In [0]:
def parse_labels_and_features(dataset):
  """Extracts labels and features.
  
  This is a good place to scale or transform the features if needed.
  
  Args:
    dataset: A Pandas `Dataframe`, containing the label on the first column and
      monochrome pixel values on the remaining columns, in row major order.
  Returns:
    A `tuple` `(labels, features)`:
      labels: A Pandas `Series`.
      features: A Pandas `DataFrame`.
  """
  labels = dataset[MODKEY]

  # DataFrame.loc index ranges are inclusive at both ends.
  features = dataset.iloc[:,1:4097]
  print(labels)
  return labels, features

In [142]:
training_targets, training_examples = parse_labels_and_features(signal_dataframe[0:training_set_size])
training_examples.describe()

28     0
58     1
52     0
83     1
81     1
1      0
48     0
63     1
109    1
25     0
Name: mod, dtype: int64


Unnamed: 0,i00001,i00002,i00003,i00004,i00005,i00006,i00007,i00008,i00009,i00010,...,q02038,q02039,q02040,q02041,q02042,q02043,q02044,q02045,q02046,q02047
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,...,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,0.1,0.2,0.6,0.6,0.8,0.3,0.3,0.1,0.2,0.3,...,0.0,-0.1,0.1,0.1,0.1,-0.1,-0.3,-0.2,-0.1,-0.1
std,0.9,0.8,0.4,0.6,0.4,0.8,0.7,0.9,0.9,0.8,...,0.5,0.6,0.4,0.5,0.6,0.5,0.4,0.6,0.5,0.4
min,-1.0,-0.8,-0.0,-0.7,0.0,-1.0,-0.9,-1.0,-1.0,-1.0,...,-0.9,-0.9,-0.5,-0.7,-1.0,-1.0,-1.0,-1.0,-0.7,-0.8
25%,-0.9,-0.6,0.2,0.2,0.8,-0.3,-0.2,-0.9,-0.7,-0.2,...,-0.0,-0.5,-0.0,-0.0,-0.0,-0.5,-0.5,-0.6,-0.4,-0.0
50%,0.1,0.3,0.8,0.9,1.0,0.6,0.5,0.4,0.4,0.5,...,0.0,0.0,0.0,-0.0,0.0,0.0,-0.1,-0.0,-0.0,0.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.1,0.0,0.3,0.2,0.7,0.0,-0.0,0.0,0.0,0.1
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,0.7,0.9,1.0,0.5,0.0,1.0,1.0,0.6


In [143]:
validation_targets, validation_examples = parse_labels_and_features(signal_dataframe[training_set_size:validating_set_size+training_set_size])
validation_examples.describe()

98    1
42    0
Name: mod, dtype: int64


Unnamed: 0,i00001,i00002,i00003,i00004,i00005,i00006,i00007,i00008,i00009,i00010,...,q02038,q02039,q02040,q02041,q02042,q02043,q02044,q02045,q02046,q02047
count,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
mean,1.0,0.1,0.2,0.3,0.6,0.3,0.1,0.6,0.7,0.4,...,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5
std,0.0,1.3,1.1,1.0,0.6,1.0,1.3,0.6,0.5,0.8,...,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7
min,1.0,-0.9,-0.6,-0.4,0.1,-0.5,-0.9,0.2,0.3,-0.2,...,-1.0,-0.9,-0.9,-1.0,-1.0,-0.9,-1.0,-0.9,-0.9,-1.0
25%,1.0,-0.4,-0.2,-0.1,0.3,-0.1,-0.4,0.4,0.5,0.1,...,-0.7,-0.7,-0.7,-0.7,-0.7,-0.7,-0.7,-0.7,-0.7,-0.7
50%,1.0,0.1,0.2,0.3,0.6,0.3,0.1,0.6,0.7,0.4,...,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5
75%,1.0,0.5,0.6,0.6,0.8,0.6,0.5,0.8,0.8,0.7,...,-0.2,-0.2,-0.2,-0.2,-0.2,-0.2,-0.2,-0.2,-0.2,-0.2
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0


## Building a Neural Network

The NN is defined by the [DNNRegressor](https://www.tensorflow.org/api_docs/python/tf/estimator/DNNRegressor) class.

Use **`hidden_units`** to define the structure of the NN.  The `hidden_units` argument provides a list of ints, where each int corresponds to a hidden layer and indicates the number of nodes in it.  For example, consider the following assignment:

`hidden_units=[3,10]`

The preceding assignment specifies a neural net with two hidden layers:

* The first hidden layer contains 3 nodes.
* The second hidden layer contains 10 nodes.

If we wanted to add more layers, we'd add more ints to the list. For example, `hidden_units=[10,20,30,40]` would create four layers with ten, twenty, thirty, and forty units, respectively.

By default, all hidden layers will use ReLu activation and will be fully connected.

In [0]:
def construct_feature_columns():
  """Construct the TensorFlow Feature Columns.

  Returns:
    A set of feature columns
  """ 
  
  # There are 784 pixels in each image.
  return set([tf.feature_column.numeric_column('features', shape=4096)])

In [0]:
def create_training_input_fn(features, labels, batch_size, num_epochs=None, shuffle=True):
  """A custom input_fn for sending MNIST data to the estimator for training.

  Args:
    features: The training features.
    labels: The training labels.
    batch_size: Batch size to use during training.

  Returns:
    A function that returns batches of training features and labels during
    training.
  """
  def _input_fn(num_epochs=None, shuffle=True):
    # Input pipelines are reset with each call to .train(). To ensure model
    # gets a good sampling of data, even when number of steps is small, we 
    # shuffle all the data before creating the Dataset object
    idx = np.random.permutation(features.index)
    raw_features = {"features":features.reindex(idx)}
    raw_targets = np.array(labels[idx])
   
    ds = Dataset.from_tensor_slices((raw_features,raw_targets)) # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    if shuffle:
      ds = ds.shuffle(10000)
    
    # Return the next batch of data.
    feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
    return feature_batch, label_batch

  return _input_fn

In [0]:
def create_predict_input_fn(features, labels, batch_size):
  """A custom input_fn for sending mnist data to the estimator for predictions.

  Args:
    features: The features to base predictions on.
    labels: The labels of the prediction examples.

  Returns:
    A function that returns features and labels for predictions.
  """
  def _input_fn():
    raw_features = {"features": features.values}
    raw_targets = np.array(labels)
    
    ds = Dataset.from_tensor_slices((raw_features, raw_targets)) # warning: 2GB limit
    ds = ds.batch(batch_size)
    
        
    # Return the next batch of data.
    feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
    return feature_batch, label_batch

  return _input_fn

In [147]:
test_targets, test_examples = parse_labels_and_features(signal_dataframe)
test_examples.describe()

28     0
58     1
52     0
83     1
81     1
      ..
54     0
66     1
41     0
108    1
73     1
Name: mod, Length: 110, dtype: int64


Unnamed: 0,i00001,i00002,i00003,i00004,i00005,i00006,i00007,i00008,i00009,i00010,...,q02038,q02039,q02040,q02041,q02042,q02043,q02044,q02045,q02046,q02047
count,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,...,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0
mean,0.4,0.5,0.5,0.5,0.5,0.5,0.4,0.4,0.4,0.5,...,0.0,-0.0,-0.0,-0.0,-0.1,0.0,0.0,-0.0,0.0,0.0
std,0.8,0.7,0.7,0.6,0.7,0.7,0.7,0.7,0.7,0.7,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
min,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
25%,-0.4,0.0,0.1,0.1,0.1,-0.1,0.0,-0.2,-0.0,0.0,...,-0.1,-0.0,-0.1,-0.0,-0.1,-0.0,-0.0,-0.1,-0.0,-0.0
50%,0.9,1.0,1.0,1.0,0.9,1.0,1.0,0.9,0.9,1.0,...,0.0,0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.1,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [0]:
def train_nn_classification_model(
    learning_rate,
    steps,
    batch_size,
    hidden_units,
    training_examples,
    training_targets,
    validation_examples,
    validation_targets):
  """Trains a neural network classification model for the MNIST digits dataset.
  
  In addition to training, this function also prints training progress information,
  a plot of the training and validation loss over time, as well as a confusion
  matrix.
  
  Args:
    learning_rate: An `int`, the learning rate to use.
    steps: A non-zero `int`, the total number of training steps. A training step
      consists of a forward and backward pass using a single batch.
    batch_size: A non-zero `int`, the batch size.
    hidden_units: A `list` of int values, specifying the number of neurons in each layer.
    training_examples: A `DataFrame` containing the training features.
    training_targets: A `DataFrame` containing the training labels.
    validation_examples: A `DataFrame` containing the validation features.
    validation_targets: A `DataFrame` containing the validation labels.
      
  Returns:
    The trained `DNNClassifier` object.
  """

  periods = 10
  # Caution: input pipelines are reset with each call to train. 
  # If the number of steps is small, your model may never see most of the data.  
  # So with multiple `.train` calls like this you may want to control the length 
  # of training with num_epochs passed to the input_fn. Or, you can do a really-big shuffle, 
  # or since it's in-memory data, shuffle all the data in the `input_fn`.
  steps_per_period = steps / periods  
  # Create the input functions.
  predict_training_input_fn = create_predict_input_fn(
    training_examples, training_targets, batch_size)
  predict_validation_input_fn = create_predict_input_fn(
    validation_examples, validation_targets, batch_size)
  training_input_fn = create_training_input_fn(
    training_examples, training_targets, batch_size)
  
  # Create the input functions.
  predict_training_input_fn = create_predict_input_fn(
    training_examples, training_targets, batch_size)
  predict_validation_input_fn = create_predict_input_fn(
    validation_examples, validation_targets, batch_size)
  training_input_fn = create_training_input_fn(
    training_examples, training_targets, batch_size)
  
  # Create feature columns.
  feature_columns = [tf.feature_column.numeric_column('features', shape=4096)]

  # Create a DNNClassifier object.
  my_optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)
  my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)
  classifier = tf.estimator.DNNClassifier(
      feature_columns=feature_columns,
      n_classes=2,
      hidden_units=hidden_units,
      optimizer=my_optimizer,
      config=tf.contrib.learn.RunConfig(keep_checkpoint_max=1)
  )

  # Train the model, but do so inside a loop so that we can periodically assess
  # loss metrics.
  print("Training model...")
  print("LogLoss error (on validation data):")
  training_errors = []
  validation_errors = []
  for period in range (0, periods):
    # Train the model, starting from the prior state.
    classifier.train(
        input_fn=training_input_fn,
        steps=steps_per_period
    )
  
    # Take a break and compute probabilities.
    training_predictions = list(classifier.predict(input_fn=predict_training_input_fn))
    training_probabilities = np.array([item['probabilities'] for item in training_predictions])
    training_pred_class_id = np.array([item['class_ids'][0] for item in training_predictions])
    training_pred_one_hot = tf.keras.utils.to_categorical(training_pred_class_id,10)
        
    validation_predictions = list(classifier.predict(input_fn=predict_validation_input_fn))
    validation_probabilities = np.array([item['probabilities'] for item in validation_predictions])    
    validation_pred_class_id = np.array([item['class_ids'][0] for item in validation_predictions])
    validation_pred_one_hot = tf.keras.utils.to_categorical(validation_pred_class_id,10)    
    
    # Compute training and validation errors.
    training_log_loss = metrics.log_loss(training_targets, training_pred_one_hot)
    validation_log_loss = metrics.log_loss(validation_targets, validation_pred_one_hot)
    # Occasionally print the current loss.
    print("  period %02d : %0.2f" % (period, validation_log_loss))
    # Add the loss metrics from this period to our list.
    training_errors.append(training_log_loss)
    validation_errors.append(validation_log_loss)
  print("Model training finished.")
  # Remove event files to save disk space.
  _ = map(os.remove, glob.glob(os.path.join(classifier.model_dir, 'events.out.tfevents*')))
  
  # Calculate final predictions (not probabilities, as above).
  final_predictions = classifier.predict(input_fn=predict_validation_input_fn)
  final_predictions = np.array([item['class_ids'][0] for item in final_predictions])
  
  
  accuracy = metrics.accuracy_score(validation_targets, final_predictions)
  print("Final accuracy (on validation data): %0.2f" % accuracy)

  # Output a graph of loss metrics over periods.
  plt.ylabel("LogLoss")
  plt.xlabel("Periods")
  plt.title("LogLoss vs. Periods")
  plt.plot(training_errors, label="training")
  plt.plot(validation_errors, label="validation")
  plt.legend()
  plt.show()
  
  # Output a plot of the confusion matrix.
  cm = metrics.confusion_matrix(validation_targets, final_predictions)
  # Normalize the confusion matrix by row (i.e by the number of samples
  # in each class).
  cm_normalized = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
  ax = sns.heatmap(cm_normalized, cmap="bone_r")
  ax.set_aspect(1)
  plt.title("Confusion matrix")
  plt.ylabel("True label")
  plt.xlabel("Predicted label")
  plt.show()

  return classifier

In [149]:
nn_classification = train_nn_classification_model(
    learning_rate=learning_rate,
    steps=steps,
    batch_size=batch_size,
    hidden_units=model,
    training_examples=training_examples,
    training_targets=training_targets,
    validation_examples=validation_examples,
    validation_targets=validation_targets)

Training model...
LogLoss error (on validation data):


ValueError: ignored