In [11]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Load and Process the Data

In [2]:
data = np.loadtxt('data/data.csv', delimiter=',')

The last 13 rows of the data represent my output

In [3]:
X = np.transpose(data[:-13,:])
Y = np.transpose(data[-13:,:])

Every channel has a background of around ~150. We don't want to factor that in. Output should always be positive, so these have been scaled in the range $\left[0,1\right]$.

In [4]:
X.shape

(140, 11725)

In [5]:
inscale = MinMaxScaler()
outscale = MinMaxScaler()

In [6]:
xs = inscale.fit_transform(X)
ys = outscale.fit_transform(Y)

Split the data into **train**, **test**, and **val**idation segments.

The data are in blocks of 20 measurements for each sample. Since the samples should be independent of each other, we can randomly select from each of the 7 samples. I have elected for a 70/15/15 train/test/validate split. 

In [7]:
a = np.reshape(np.arange(0, 12, 1), (3, 4))
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [8]:
np.delete(a, 1, 0)

array([[ 0,  1,  2,  3],
       [ 8,  9, 10, 11]])

In [9]:
np.delete(a, 1, 1)

array([[ 0,  2,  3],
       [ 4,  6,  7],
       [ 8, 10, 11]])

In [10]:
# Select 3 numbers from 0-19
# Select 3 other numbers from 0-19
# For each index, step through the data with step 20
    # Add each row to test or validate as appropriate
# Delete those rows, result is 

*Should my output layer be linear (to try and estimate the amount of material in a spectrum) or LogSig/Softmax (material present or not)*

*Can't use hardlim with backpropagation because it is not differentiable*

*Softmax--multiclassification problem*

In [119]:
c = np.random.choice(range(20), 6, replace=False)

while c.shape[0] <= (xs.shape[0] - 6) * 0.3:
    c = np.concatenate((c, c[-6:]+20))

In [125]:
xs_test = np.array([xs[i, :] for i in c[::2]])
ys_test = np.array([ys[i, :] for i in c[::2]])
xs_val = np.array([xs[i, :] for i in c[1::2]])
ys_val = np.array([ys[i, :] for i in c[1::2]])
xs_train = np.delete(xs, c, 0)
ys_train = np.delete(ys, c, 0)
print(xs_test.shape)
print(ys_test.shape)
print(xs_val.shape)
print(ys_val.shape)
print(xs_train.shape)
print(ys_train.shape)

(21, 11725)
(21, 13)
(21, 11725)
(21, 13)
(98, 11725)
(98, 13)
