In [1]:
import keras
from sklearn.model_selection import train_test_split
from keras.datasets import mnist
import numpy as np
from keras import layers
import os
from keras import regularizers
import math
import matplotlib.pyplot as plt
import tensorflow as tf

2024-07-25 00:01:23.152968: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-25 00:01:23.178481: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# Total PID 51- Read the PIDS from the file Spontaneous_PIDs.txt 
pids = np.loadtxt('../Spontaneous_PIDs_v0.txt')
pids.sort()
# input for AE will be the linear interpolated data
exp_dir = '/home/chinmai/src/Oura/Data/Spontaneous/Lin_IP_GA245_y/'
out_dir = '/home/chinmai/src/laborprediction/Autoencoder/ConvAE_GA245_Encodings_y/'

In [3]:
def define_Autoencoder(enc_dim, in_size):
    
    # Define the input shape
    input_img = keras.Input(shape=(in_size,1))
    # ENCODER PART
    # First 1D convolutional Layer: 16 filters of length 3 
    conv1 = layers.Conv1D(64, 5, activation='LeakyReLU', padding='same', kernel_initializer='glorot_uniform')(input_img)
    pool1 = layers.MaxPooling1D(2, padding='same')(conv1)
    # Second 1D convolutional Layer: 8 filters of length 3 
    conv2 = layers.Conv1D(32, 3, activation='LeakyReLU',padding='same', kernel_initializer='glorot_uniform')(pool1)
    pool2 = layers.MaxPooling1D(2, padding='same')(conv2)
    # Third 1D convolutional Layer: 16 filters of length 3 
    conv3 = layers.Conv1D(16, 3, activation='LeakyReLU', padding='same', kernel_initializer='glorot_uniform')(pool2)
    pool3 = layers.MaxPooling1D(2, padding='same')(conv3) 
    # Flatten the output of all convolutional filters and feed it to a dense fully-connected layer
    flat1 = layers.Flatten()(pool3)
    # Encoded Representatio of daily temperature data
    encoded = layers.Dense(enc_dim,activation='linear', kernel_initializer='glorot_uniform')(flat1)
    enc = tf.reshape(encoded,(-1,enc_dim,1))
    # Instead of reshaping, we can use the transpose layer on the 64 bit vector.
    convT1 = layers.Conv1DTranspose(16, 3,strides=2, padding = 'same', activation = 'LeakyReLU', kernel_initializer='glorot_uniform')(enc)
    convT2 = layers.Conv1DTranspose(32, 3,strides=2, padding = 'same', activation = 'LeakyReLU', kernel_initializer='glorot_uniform')(convT1)
    convT3 = layers.Conv1DTranspose(64, 5,strides=2, padding = 'same', activation = 'LeakyReLU', kernel_initializer='glorot_uniform')(convT2)
    flat2  = layers.Flatten()(convT3)
    decoded = layers.Dense(96, activation='linear')(flat2)
    
    # Keras API allows us to define the model, by specifying the input and final output.
    autoencoder = keras.Model(input_img,decoded)
    encoder = keras.Model(input_img, encoded)
    
    return autoencoder, encoder
    
def create_matrix_from_pids(plist):
    global exp_dir, pids
    # In this block we want to read 5min avg temperature data for training, validation, and test PIDs
    x_arr = []
    count = 0

    for pid in plist:
        fname = os.path.join(exp_dir,str(int(pid))+'_5temp_linIP_y.csv')
        #print('Processing pid: ',pid)
        data = np.loadtxt(fname,delimiter=',')
        d1 = data[:,0:96]
        if count == 0:
            x_arr = d1
        else:
            x_arr = np.concatenate((x_arr,d1),axis=0)
        #print(x_train.shape)
        count += 1
    return x_arr

In [18]:
def main():
    global pids, exp_dir, out_dir
    # Split PIDs to training and validation set
    #x_all  = create_matrix_from_pids(pids)
    #print(x_all.shape)

    # Define the autoencoder model - define_Autoencoder(encoding dimension, input_size)
    autoencoder, encoder = define_Autoencoder(64, 288)
    #autoencoder= keras.models.load_model('Conv_autoencoder_night_CV.keras')
    encoder = keras.models.load_model('./Conv_encoder_all_transpose.keras')
    #encoder.summary()
    
    for pid in pids:
        fname = os.path.join(exp_dir,str(int(pid))+'_5temp_linIP_y.csv')
        #print('Processing pid: ',pid)
        data = np.loadtxt(fname,delimiter=',')
        ga = data[:,0:1]
        y1 = data[:,-1:]
        d1 = data[:,1:289]
        
        r,c = d1.shape
        #print(y1)
        #clean_pid.append(pid)
        res = encoder.predict(d1)
        print (res.shape)
        fin = np.concatenate((ga,res,y1),axis = 1)
        np.savetxt(out_dir + str(int(pid))+'_5temp_encoding.csv',fin,delimiter=',')
    
        
    #print (clean_pid)
main()

(29, 64)
(20, 64)
(32, 64)
(42, 64)
(30, 64)
(28, 64)
(45, 64)
(33, 64)
(35, 64)
(29, 64)
(25, 64)
(23, 64)
(23, 64)
(41, 64)
(40, 64)
(28, 64)
(21, 64)
(29, 64)
(26, 64)
(38, 64)
(19, 64)
(20, 64)
(26, 64)
(36, 64)
(38, 64)
(30, 64)
(28, 64)
(45, 64)
(33, 64)
(25, 64)
(41, 64)
(29, 64)
(41, 64)
(24, 64)
(47, 64)
(16, 64)
(29, 64)
(46, 64)
(29, 64)
(34, 64)
(41, 64)
(28, 64)
(31, 64)
(25, 64)
(28, 64)
(43, 64)
(35, 64)
(46, 64)
(26, 64)
(37, 64)


In [3]:
keras.utils.set_random_seed(912)


print(len(pids))

exp_dir = '/home/chinmai/src/Oura/Data/Linear_Interpolation_y/'
train_pids, test_pids = train_test_split(pids, test_size=0.1, random_state=42)

train_pids.sort()
test_pids.sort()
print(train_pids, test_pids)
print(len(train_pids),len(test_pids))
# We want to further divide the training PIDs to training and Validation PIDs
train_pids,val_pids = train_test_split(train_pids, test_size = 0.1, random_state=42)
train_pids.sort()
print(train_pids, val_pids)
print(len(train_pids), len(val_pids))

NameError: name 'pids' is not defined

In [5]:
# In this block we want to read from training and test PIDs
x_train = []
count = 0
for pid in train_pids:
    fname = os.path.join(exp_dir,str(pid)+'_5temp_linIP_y.csv')
    #print('Processing pid: ',pid)
    data = np.loadtxt(fname,delimiter=',')
    d1 = data[:,0:288]
    if count == 0:
        x_train = d1
    else:
        x_train = np.concatenate((x_train,d1),axis=0)
    #print(x_train.shape)
    count += 1
x_val = []
count = 0
for pid in val_pids:
    fname = os.path.join(exp_dir,str(pid)+'_5temp_linIP_y.csv')
    #print('Processing pid: ',pid)
    data = np.loadtxt(fname,delimiter=',')
    d1 = data[:,0:288]
    if count == 0:
        x_val = d1
    else:
        x_val = np.concatenate((x_val,d1),axis=0)
    #print(x_train.shape)
    count += 1

x_test = []
count = 0
for pid in test_pids:
    fname = os.path.join(exp_dir,str(pid)+'_5temp_linIP_y.csv')
    #print('Processing pid: ',pid)
    data = np.loadtxt(fname,delimiter=',')
    d1 = data[:,0:288]
    if count == 0:
        x_test = d1
    else:
        x_test = np.concatenate((x_test,d1),axis=0)
    #print(x_test.shape)
    count += 1
print(x_train.shape, x_val.shape, x_test.shape)

x_all = []
count = 0
for pid in pids:
    fname = os.path.join(exp_dir,str(pid)+'_5temp_linIP_y.csv')
    #print('Processing pid: ',pid)
    data = np.loadtxt(fname,delimiter=',')
    d1 = data[:,0:288]
    if count == 0:
        x_all = d1
    else:
        x_all = np.concatenate((x_all,d1),axis=0)
    #print(x_test.shape)
    count += 1
print (x_all.shape)

(2267, 288) (250, 288) (295, 288)
(2812, 288)


In [6]:
#autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
encoder = keras.models.load_model('./Conv_encoder.keras')
#autoencoder.compile(optimizer='adam', loss='mean_squared_error')

2023-11-06 12:35:15.861152: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-06 12:35:15.939123: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-06 12:35:15.939340: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [7]:
encoder.compile(optimizer=keras.optimizers.Adam(), loss='mean_squared_error')
encoder.summary()

#keras.utils.plot_model(autoencoder)

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 288, 1)]          0         
                                                                 
 conv1d_6 (Conv1D)           (None, 288, 16)           64        
                                                                 
 max_pooling1d_3 (MaxPoolin  (None, 144, 16)           0         
 g1D)                                                            
                                                                 
 conv1d_7 (Conv1D)           (None, 144, 8)            392       
                                                                 
 max_pooling1d_4 (MaxPoolin  (None, 72, 8)             0         
 g1D)                                                            
                                                                 
 conv1d_8 (Conv1D)           (None, 72, 8)             200 

In [8]:
res = encoder.predict(x_all)

2023-11-06 12:35:34.670046: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8904




2023-11-06 12:35:34.913303: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


In [11]:
print(type(res))

<class 'numpy.ndarray'>


In [15]:
count = 0
clean_pid =[]
for pid in pids:
    fname = os.path.join(exp_dir,str(pid)+'_5temp_linIP_y.csv')
    #print('Processing pid: ',pid)
    data = np.loadtxt(fname,delimiter=',')
    y1 = data[:,-1:]
    d1 = data[:,0:288]
    r,c = d1.shape
    #print(y1)
    #clean_pid.append(pid)
    res = encoder.predict(d1)
    print (res.shape)
    fin = np.concatenate((res,y1),axis = 1)
    np.savetxt('./ConvAE_LinIP_Encodings_y/'+str(pid)+'_5temp_encoding.csv',fin,delimiter=',')

    #print (clean_pid)

(46, 64)
(73, 64)
(32, 64)
(71, 64)
(66, 64)
(28, 64)
(67, 64)
(72, 64)
(44, 64)
(45, 64)
(87, 64)
(52, 64)
(23, 64)
(55, 64)
(50, 64)
(70, 64)
(55, 64)
(29, 64)
(37, 64)
(26, 64)
(58, 64)
(29, 64)
(47, 64)
(46, 64)
(63, 64)
(46, 64)
(30, 64)
(65, 64)
(51, 64)
(68, 64)
(49, 64)
(58, 64)
(83, 64)
(29, 64)
(43, 64)
(45, 64)
(55, 64)
(65, 64)
(39, 64)
(29, 64)
(53, 64)
(75, 64)
(38, 64)
(72, 64)
(56, 64)
(40, 64)
(71, 64)
(25, 64)
(53, 64)
(45, 64)
(42, 64)
(54, 64)
(87, 64)
(26, 64)
(49, 64)
