Working on classifier for FM vs. noise signal, to later extend it to other labels.

Data logged using:
uhd_ -f -b ....


In [0]:
%tensorflow_version 2.x
from __future__ import absolute_import, division, print_function, unicode_literals
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import TimeDistributed, LSTM, Dropout, Conv1D, Dense, Activation, MaxPooling1D, Input
from tensorflow.keras.utils import Sequence as Sequence
from tensorflow.keras.optimizers import SGD, Adam
import numpy as np
from google.colab import drive
from datetime import datetime
import ast

sample_rate = 100000
signals_count = 2
epochs = 20
batch_size = 128
capture_size = 2048
train_size = 800
test_size = 200

dir_base_model = "/content/gdrive/My Drive/DeepLearning/sigid/test-"
model_name = ""
checkpoint_path = "/content/gdrive/My Drive/DeepLearning/sigid/" + model_name + "-checkpoints/cp-{epoch:04d}.ckpt"

print(tf.__version__)

2.2.0-rc3


In [0]:
drive.mount('/content/gdrive')

# Create a dtype with the binary data format and the desired column names
dt = np.dtype([('i', 'f4'), ('q', 'f4')])
# Load noisy data
data = np.fromfile(dir_base_model + "nothing.iq", dtype=dt)
dfa = pd.DataFrame(data)
dfa["label"] = "BLANK"
npArray = np.array(range(len(dfa)), dtype='f4', copy=True, order='K', subok=False, ndmin=0)
dfa["time"] = npArray / sample_rate
# Load FM data
data = np.fromfile(dir_base_model + "999.iq", dtype=dt)
dfb = pd.DataFrame(data)
dfb["label"] = "FM"
npArray = np.array(range(len(dfb)), dtype='f4', copy=True, order='K', subok=False, ndmin=0)
dfb["time"] = npArray / sample_rate

frames = [dfa, dfb]
result = pd.concat(frames)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
train = []
test = []

# Build datasets for train 
for i in (0, train_size):
  ii = i * capture_size
  train.append(dfa[ii:ii + capture_size].values)
for i in (0, train_size):
  ii = i * capture_size
  train.append(dfb[ii:ii + capture_size].values)

# Build datasets for test 
for i in (0, test_size):
  #ii = i * capture_size
  ii = (train_size + 1 + i) * capture_size
  test.append(dfa[ii:ii + capture_size].values)
for i in (0, test_size):
  #ii = i * capture_size
  ii = (train_size + 1 + i) * capture_size
  test.append(dfb[ii:ii + capture_size].values)

In [0]:
print('Capture size dfa BLANK: ' + str(len(dfa.values)))
print('Capture size dfb FM: ' + str(len(dfb.values)))

Capture size dfa BLANK: 3059712
Capture size dfb FM: 2646528


In [0]:
# Plot some sample data to check iq files TRAIN
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly
fig = make_subplots(rows=1, cols=2)
font_options = dict(family="Courier New, monospace", size=18, color="#7f7f7f")
fig.add_trace(go.Scatter(x=dfa[1:capture_size]['time'], y=dfa[1:capture_size]['i'], mode='lines', name='i'), row=1, col=1)
fig.add_trace(go.Scatter(x=dfa[1:capture_size]['time'], y=dfa[1:capture_size]['q'], mode='lines', name='q'), row=1, col=1)
fig.update_layout(height=400, width=800, title="NOISE", xaxis_title="time", yaxis_title="value", font=font_options)
fig.add_trace(go.Scatter(x=dfb[1:capture_size]['time'], y=dfb[1:capture_size]['i'], mode='lines', name='i'), row=1, col=2)
fig.add_trace(go.Scatter(x=dfb[1:capture_size]['time'], y=dfb[1:capture_size]['q'], mode='lines', name='q'), row=1, col=2)
fig.update_layout(height=400, width=800, title="FM", xaxis_title="time", yaxis_title="value", font=font_options)
fig.show()

In [0]:
# Plot some sample data to check iq files TEST
interleave = 2
fig = make_subplots(rows=1, cols=2)
font_options = dict(family="Courier New, monospace", size=18, color="#7f7f7f")
fig.add_trace(go.Scatter(x=dfa[(1+train_size) * interleave:(1+train_size) * interleave + capture_size]['time'], y=dfa[(1+train_size) * interleave:(1+train_size) * interleave + capture_size]['i'], mode='lines', name='i'), row=1, col=1)
fig.add_trace(go.Scatter(x=dfa[(1+train_size) * interleave:(1+train_size) * interleave + capture_size]['time'], y=dfa[(1+train_size) * interleave:(1+train_size) * interleave + capture_size]['q'], mode='lines', name='q'), row=1, col=1)
fig.update_layout(height=400, width=800, title="NOISE", xaxis_title="time", yaxis_title="value", font=font_options)
fig.add_trace(go.Scatter(x=dfb[(1+train_size) * interleave:(1+train_size) * interleave + capture_size]['time'], y=dfb[(1+train_size) * interleave:(1+train_size) * interleave + capture_size]['i'], mode='lines', name='i'), row=1, col=2)
fig.add_trace(go.Scatter(x=dfb[(1+train_size) * interleave:(1+train_size) * interleave + capture_size]['time'], y=dfb[(1+train_size) * interleave:(1+train_size) * interleave + capture_size]['q'], mode='lines', name='q'), row=1, col=2)
fig.update_layout(height=400, width=800, title="FM", xaxis_title="time", yaxis_title="value", font=font_options)
fig.show()

In [0]:
def get_signal_dataframes(df):
  interleave = 2
  dx = []
  dy = []
  tx = []
  ty = []

  # Generate train dataset 
  dfax = df[['i', 'q']] 
  dfay = df[['label']] 
  dfax = dfax * 10000.0
  for i in range(0, train_size):
    newx = dfax[interleave * i:interleave * i + capture_size]
    dx.append(newx)    
    newy = dfay[interleave * i:interleave * i + capture_size]
    dy.append(newy)

  # Offset to separate train from test datain df
  offset = train_size * interleave + capture_size

  # Generate test dataset 
  for i in range(0, test_size):
    index = interleave * i + offset
    newx = dfax[index:index + capture_size]
    tx.append(newx)
    newy = dfay[index:index + capture_size]
    ty.append(newy)

  # Convert lists to dataframes
  train_x = pd.DataFrame(dx) 
  train_y = pd.DataFrame(dy) 
  test_x = pd.DataFrame(tx) 
  test_y = pd.DataFrame(ty) 
  return train_x, train_y, test_x, test_y

def add_dataframe_for_signal(df, train_x, train_y, test_x, test_y):
  x, y, tx, ty = get_signal_dataframes(df)
  train_x.append(x)
  train_y.append(y)
  test_x.append(tx)
  test_y.append(ty)

train_x = []
train_y = []
test_x = []
test_y = []
add_dataframe_for_signal(dfa, train_x, train_y, test_x, test_y)
add_dataframe_for_signal(dfb, train_x, train_y, test_x, test_y)

# Build dataframes 
train_x = pd.concat(train_x)
train_y = pd.concat(train_y)
test_x = pd.concat(test_x)
test_y = pd.concat(test_y)

# Shuffle data
permutations = np.random.permutation(len(train_y))
train_x = train_x.iloc[permutations]
train_y = train_y.iloc[permutations]
permutations = np.random.permutation(len(test_x))
test_x = test_x.iloc[permutations]
test_y = test_y.iloc[permutations]

In [0]:
def create_np_array_x(d, size):
  new = np.ndarray(shape=(signals_count * size, 2, capture_size), dtype=float)
  # 100 i
  for i in range(len(d.values)):
    # 2048 j
    for j in range(len((d.values)[i][0])):
    #for j in range(len((d.values)[i])):
        new[i][0][j] = ((d.values)[i])[0].values[j][0]
        new[i][1][j] = ((d.values)[i])[0].values[j][1]
  return new

def create_np_array_y(d, size):
  new = np.ndarray(shape=(signals_count * size), dtype=np.int16)
  # 100 i
  for i in range(len(d.values)):
    result = ((d.values)[i])[0].values[0][0]
    if (result == 'FM'):
      new[i] = 1
    else:
      new[i] = 0
  return new

train_x = create_np_array_x(train_x, train_size)
train_y = create_np_array_y(train_y, train_size)
test_x = create_np_array_x(test_x, test_size)
test_y = create_np_array_y(test_y, test_size)

**Aca deberia poder poner el input como yo quiero sin tener que tocar el vector, pero se transformo en el paso anterior.**

In [0]:
# Real model to test
def model_working():
  model = Sequential()
  model.add(Conv1D(filters=10, kernel_size=16, strides=1, padding='same', activation='relu', input_shape=(2, capture_size)))
  model.add(MaxPooling1D(pool_size=10, strides=2, padding='same'))
  model.add(Conv1D(filters=10, kernel_size=12, strides=1, padding='same', activation='relu', input_shape=(6, int(capture_size / 2))))
  model.add(MaxPooling1D(pool_size=10, strides=2, padding='same'))
  model.add(Conv1D(filters=10, kernel_size=6, strides=1, padding='same', activation='relu', input_shape=(6, int(capture_size / 4))))
  model.add(MaxPooling1D(pool_size=10, strides=2, padding='same'))
  model.add(Conv1D(filters=10, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=(6, int(capture_size / 8))))
  model.add(MaxPooling1D(pool_size=10, strides=2, padding='same'))
  model.add(Dense(2, activation='softmax'))
  model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
  return model

In [0]:
# Tensorboard test
from datetime import datetime
logdir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [0]:
# Debug if data has too much zeros and the order
np.set_printoptions(threshold=1024000000)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

print(len(train_x))
print(len(test_x))
print(len(train_x[0]))
#print(len(test_x[0]))
print(len(train_x[0][0]))
#print(len(test_x[0][0]))
print((train_x[0][0][0]))
print((test_x[0][0][0]))
index_check = 201
print(test_y)
print("Train zeros index: " + str(index_check) + " zeros: " + str(len(train_x[index_check][0]) - np.count_nonzero(train_x[index_check][0])))
#print(train_x[index_check])
print("Test zeros index: " + str(index_check) + "  zeros: " + str(len(test_x[index_check][0]) - np.count_nonzero(test_x[index_check][0])))
#print(test_x[index_check][0])

1600
400
2
2048
-1.22074008686468
-2.136295079253614
[1 1 0 1 1 0 0 1 0 1 1 0 0 1 0 1 1 0 0 1 1 0 0 0 1 0 1 1 1 1 0 0 0 1 0 0 1
 1 1 0 0 0 1 1 1 1 0 1 1 1 1 0 1 0 1 1 0 1 0 0 1 1 0 1 1 0 1 1 1 0 1 1 1 1
 0 0 1 0 0 0 0 0 0 1 0 0 1 1 0 1 1 1 0 1 0 0 1 0 0 0 0 1 0 0 0 1 1 0 1 0 1
 0 1 0 1 0 0 0 0 1 1 0 0 1 1 1 0 1 1 1 0 1 1 1 1 1 0 0 1 0 1 1 0 1 0 1 1 0
 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 1 1 1 0 1 0 0 0
 1 0 1 1 0 1 0 0 0 1 0 1 1 0 1 0 0 0 0 0 1 0 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0
 0 1 1 0 0 0 0 0 1 1 0 0 1 1 1 0 0 1 1 0 1 0 0 0 1 1 1 0 1 0 1 1 1 0 0 1 1
 1 0 0 0 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 0 0
 1 0 0 0 1 0 1 0 1 0 1 0 0 0 1 0 1 1 1 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 1 1 0
 1 1 0 1 0 0 0 1 0 1 1 0 0 1 0 1 1 1 1 1 1 0 0 0 1 0 1 0 0 1 1 0 1 0 0 1 1
 0 0 0 1 1 0 0 1 1 0 1 1 1 1 1 0 0 1 0 0 0 0 0 0 1 1 1 0 1 0]
Train zeros index: 201 zeros: 126
Test zeros index: 201  zeros: 556



Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.



In [0]:
# #TODO: Contar los distintos labels de los df
model = model_working()
model.summary()

model.save_weights(checkpoint_path.format(epoch=0))
# Create a callback that saves the model's weights every 1 epochs
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_weights_only=True, period=1)

# Train
model.fit(x = train_x, y = train_y, batch_size=batch_size, verbose=1, validation_data=(test_x, test_y), epochs=8, callbacks=[tensorboard_callback, cp_callback])

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_20 (Conv1D)           (None, 2, 10)             327690    
_________________________________________________________________
max_pooling1d_20 (MaxPooling (None, 1, 10)             0         
_________________________________________________________________
conv1d_21 (Conv1D)           (None, 1, 10)             1210      
_________________________________________________________________
max_pooling1d_21 (MaxPooling (None, 1, 10)             0         
_________________________________________________________________
conv1d_22 (Conv1D)           (None, 1, 10)             610       
_________________________________________________________________
max_pooling1d_22 (MaxPooling (None, 1, 10)             0         
_________________________________________________________________
conv1d_23 (Conv1D)           (None, 1, 10)            

<tensorflow.python.keras.callbacks.History at 0x7fa438290eb8>

In [0]:
def read_saved_info():
  try:
      file = open(dir_base_model + model_name + '.txt', 'rt')
      value = file.read()
      print(value)
      file.close()
      old_evaluation_result = ast.literal_eval(value)
      return old_evaluation_result
  except FileNotFoundError:
      return None

In [0]:
# Evaluate model
evaluation_result = model.evaluate(test_x, test_y, verbose=1)
print(evaluation_result)
print("Current loss is: " + str(evaluation_result))

old_evaluation_result = read_saved_info()
exists = False
print("Old loss is: " + str(old_evaluation_result))
if (old_evaluation_result != None):
  exists = True

if (exists == False or (evaluation_result[1] > old_evaluation_result[1])):
  print("Writting new object...")
  file = open(dir_base_model + model_name + '.txt', 'wt')
  file.write(str(evaluation_result))
  file.close()
  model.save(dir_base_model + model_name + '-090.model')
  model.save_weights(dir_base_model + model_name + '-090.cpk')

[1.9849718809127808, 0.512499988079071]
Current loss is: [1.9849718809127808, 0.512499988079071]
[0.5114555954933167, 0.6524999737739563]
Old loss is: [0.5114555954933167, 0.6524999737739563]


In [0]:
# Prepare one line of data to predict
element = 0
samples_to_show = 5

td = np.ndarray(shape=(samples_to_show, 2, capture_size), dtype=float)
ld = np.ndarray(shape=(samples_to_show), dtype=int)

for j in range(samples_to_show):
  for i in range(capture_size):
    td[j][0][i] = test_x[element+j][0][i]
    td[j][1][i] = test_x[element+j][1][i]
  ld[j] = test_y[element+j]

def label_from_column(max_column):
  if (max_column==1):
    return "FM"
  if (max_column==0):
    return "Noise"
  return "Error"

In [0]:
predictions = model.predict(td)

# Print predicted values
print(predictions)

# Build predicted values
listx = []
for i in range(len(predictions)):
  result = np.argmax(predictions[i], axis=None) 
  print("predicted " + str(result))
  listx.append(label_from_column(result))
print(listx)

# Print original values
print(ld)

[[[0.9382972  0.06170275]]

 [[0.90618783 0.0938122 ]]

 [[0.9894583  0.01054164]]

 [[0.9975979  0.00240208]]

 [[0.95088094 0.049119  ]]]
predicted 0
predicted 0
predicted 0
predicted 0
predicted 0
['Noise', 'Noise', 'Noise', 'Noise', 'Noise']
[1 1 0 1 1]


**TODAS PRUEBAS DE GRABADO**

In [0]:
model.weights

[<tf.Variable 'conv1d_12/kernel:0' shape=(16, 2048, 10) dtype=float32, numpy=
 array([[[-1.19584231e-02, -1.83040649e-03, -1.18301343e-03,
           2.02828366e-03, -1.53315719e-03, -6.66321022e-03,
           1.12070069e-02, -3.82799376e-03,  9.21153650e-03,
           1.14066787e-02],
         [-5.00512403e-03, -3.36266588e-03, -8.41117557e-03,
          -3.49643920e-03, -1.02982121e-02, -4.84282244e-03,
           5.77862188e-03, -1.14123728e-02,  9.84932669e-03,
           7.53317401e-03],
         [-1.22700045e-02,  8.21937434e-03, -2.77540646e-04,
          -2.25239433e-04,  6.55603595e-03, -8.16816464e-05,
          -2.47898791e-03, -5.16852923e-03, -9.19503719e-03,
           5.13233803e-03],
         [ 9.26014036e-03, -6.31634938e-03,  4.06862609e-03,
          -1.04004880e-02,  1.08928345e-02, -6.60424121e-04,
           1.96447596e-04,  1.82699878e-03,  8.00982118e-04,
          -3.13458778e-03],
         [-5.65933622e-03,  7.66629167e-03, -1.22057823e-02,
          -8.9534

In [0]:
# Grabado de modelo prueba 1, con manager
manager = tf.train.Checkpoint(optimizer=model.optimizer, model=model)
status = manager.save(dir_base_model + model_name + 'alternative-090')

#ckpt = tf.train.Checkpoint(optimizer=model.optimizer, model=model)
#manager = tf.train.CheckpointManager(ckpt, dir_base_model + model_name + 'alternative-090', max_to_keep=3)
#status = manager.save()

print(status)

/content/gdrive/My Drive/DeepLearning/sigid/test-best-noise-vs-fm-fixed-checkpointsalternative-090-1


In [0]:
# Grabado de modelo prueba 2, en json
#json_config = model.to_json()
#with open('/content/gdrive/My Drive/DeepLearning/sigid/model-90.json', 'w') as json_file:
#  json_file.write(json_config)
# Guardar pesos en el disco
#model.save_weights('/content/gdrive/My Drive/DeepLearning/sigid/model-90-weights.h5')
#model.save_weights('/content/gdrive/My Drive/DeepLearning/sigid/model-90-weightstf', save_format='tf')

In [0]:
# Grabado de modelo prueba 3, otra forma mas?!
#tf.saved_model.save(model, dir_base_model + model_name + 'alternative-090xxx')


In [0]:
# Draw confusion matrix
# https://plot.ly/~francoisp/50/confusion-matrix/#code
# Get this figure: fig = py.get_figure("https://plotly.com/~francoisp/50/")
# Get this figure's data: data = py.get_figure("https://plotly.com/~francoisp/50/").get_data()
# Add data to this figure: py.plot(Data([Scatter(x=[1, 2], y=[2, 3])]), filename ="plot from API (25)", fileopt="extend")
# Get y data of first trace: y1 = py.get_figure("https://plotly.com/~francoisp/50/").get_data()[0]["y"]

# Get figure documentation: https://plotly.com/python/get-requests/
# Add data documentation: https://plotly.com/python/file-options/

# If you're using unicode in your file, you may need to specify the encoding.
# You can reproduce this figure in Python with the following code!

# Learn about API authentication here: https://plotly.com/python/getting-started
# Find your api_key here: https://plotly.com/settings/api

import plotly.plotly as py
from plotly.graph_objs import *
py.sign_in('username', 'api_key')
trace1 = {
  "type": "heatmap", 
  "x": ["economie-industrie-fiscalite-et-finances-publiques", "europe-et-international", "culture-medias-science-et-numerique", "sante-et-bioethique", "logement-transports-ville-et-amenagement-du-territoire", "institutions-vie-politique-et-moralisation", "environnement-et-energie", "education-recherche-jeunesse-et-sports", "agriculture-et-ruralite", "societe-famille-laicite-et-immigration", "police-justice-et-armee", "social-travail-pouvoir-dachat-et-emploi"], 
  "y": ["economie-industrie-fiscalite-et-finances-publiques", "europe-et-international", "culture-medias-science-et-numerique", "sante-et-bioethique", "logement-transports-ville-et-amenagement-du-territoire", "institutions-vie-politique-et-moralisation", "environnement-et-energie", "education-recherche-jeunesse-et-sports", "agriculture-et-ruralite", "societe-famille-laicite-et-immigration", "police-justice-et-armee", "social-travail-pouvoir-dachat-et-emploi"], 
  "z": [
    [17, 2, 3, 0, 0, 1, 1, 1, 0, 0, 0, 2], [1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0], [1, 1, 5, 0, 0, 3, 0, 0, 0, 0, 0, 0], [0, 0, 0, 5, 2, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 2, 0, 1, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 7, 0, 1, 0, 1, 1, 0], [0, 0, 1, 0, 0, 1, 0, 1, 2, 0, 0, 0], [2, 0, 0, 1, 2, 1, 1, 5, 0, 0, 0, 1], [1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0], [1, 0, 0, 1, 0, 1, 0, 0, 0, 3, 0, 0], [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 4, 1], [3, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, ]
}
data = Data([trace1])
layout = {
  "title": "Confusion Matrix", 
  "xaxis": {
    "title": "Predicted value", 
    "titlefont": {
      "size": 18, 
      "color": "#7f7f7f", 
      "family": "Courier New, monospace"
    }
  }, 
  "yaxis": {
    "title": "True Value", 
    "titlefont": {
      "size": 18, 
      "color": "#7f7f7f", 
      "family": "Courier New, monospace"
    }
  }, 
  "barmode": "overlay"
}
fig = Figure(data=data, layout=layout)
plot_url = py.plot(fig)