In [1]:
import glob
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.decomposition import PCA
import numpy as np

2023-11-29 12:20:18.544790: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-29 12:20:19.956348: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-29 12:20:23.445017: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-29 12:20:23.595835: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def load_shape_info(dir):
    import pickle
    with open(dir + '/pickleshapes', 'rb') as file:
        # Load the data from the file
        shapes = pickle.load(file)
        return shapes
def _lab_labels(all_labels):
        #, this is an embedded function called from below
        labels = {}
        labels['comp_labels'] = tf.one_hot(tf.cast(all_labels[0],tf.int32),3)
        labels['amp_labels'] = tf.one_hot(tf.cast(all_labels[1],tf.int32),10)
        labels['torque_labels'] = tf.one_hot(tf.cast(all_labels[2],tf.int32),5)
        labels['joint_labels'] = tf.one_hot(tf.cast(all_labels[3],tf.int32),3)
        return labels        

In [3]:
def load_tf_data(folder,shape_info):
    tf_records = glob.glob(os.path.join(folder, '*.tf'))
    dataset = tf.data.TFRecordDataset(tf_records)
    def _parse_function(example_proto):
        feature_description = {
            'ts': tf.io.FixedLenFeature(shape_info[0], tf.float32),
            'labels': tf.io.FixedLenFeature(shape_info[1], tf.float32),
            'pos_labels': tf.io.FixedLenFeature(shape_info[2], tf.float32)
        }
        #example = tf.io.parse_single_example(example_proto, feature_description)
        example = tf.io.parse_example(example_proto, feature_description)
        all_labels = tf.cast(example['labels'], tf.float32)
        #all_labels = example['labels']
        labels = _lab_labels(all_labels)
        if 'pos_labels' in example.keys():
            pos_labels = tf.cast(example['pos_labels'], tf.float32)
            labels['pos_labels'] = pos_labels
        ts = example['ts']
        #labels = all_labels
        return ts, labels['amp_labels']
        #return example['ts'], labels
    dataset = dataset.map(_parse_function)
    dataset = dataset.batch(batch_size=500, drop_remainder = False)
    return dataset

In [4]:
#dir_base = '/scratch/user/swati/Capstone/'
#dir_data = 'RoughCut_Datasets'
dir_base = '/scratch/group/statconsult/'
dir_data = 'Test data'
dir = dir_base + dir_data + '/'
train_shape_info = load_shape_info(dir + 'train')
test_shape_info = load_shape_info(dir + 'predict')
predict_shape_info = load_shape_info(dir + 'validate')
train_data = load_tf_data(dir + 'train',train_shape_info)
test_data = load_tf_data(dir + 'predict',test_shape_info)
predict_data = load_tf_data(dir + 'validate',predict_shape_info)

In [5]:
def combine_ts_batches(dataset):
    combined_ts = []
    train_labels = []
    for ts, labels in dataset:
        combined_ts.append(ts.numpy())
        train_labels.append(labels.numpy())
    combined_ts = np.concatenate(combined_ts, axis=0)
    return combined_ts,train_labels


In [6]:
import matplotlib.pyplot as plt

def visualize_top_features(pca_model, feature_names=None, top_n=5):
    # Get the principal components and their weights
    components = pca_model.components_

    # If feature_names is not provided, create default feature names
    if feature_names is None:
        feature_names = [f"Feature {i+1}" for i in range(components.shape[1])]

    # Plot bar charts for the top N features for each principal component
    for i in range(components.shape[0]):
        component_weights = list(zip(feature_names, components[i, :]))
        component_weights.sort(key=lambda x: abs(x[1]), reverse=True)
        top_features = component_weights[:top_n]

        features, weights = zip(*top_features)
        plt.figure(figsize=(10, 5))
        plt.bar(features, weights)
        plt.title(f'Top {top_n} Features for Principal Component {i+1}')
        plt.xlabel('Feature')
        plt.ylabel('Weight')
        plt.xticks(rotation=45, ha='right')
        plt.show()

In [7]:
def apply_pca(dataset):
    dataset_batch_size, sequence_length, feature_dim = dataset.shape
    reshaped_data = tf.reshape(dataset, (dataset_batch_size * sequence_length, feature_dim))
    print(dataset.shape)
    batch_size = 500
    dataset_batch_size = int(dataset_batch_size)
    if dataset_batch_size < batch_size:
        batch_size = dataset_batch_size   
    pca = PCA(n_components=50)
    pca_data = pca.fit_transform(reshaped_data)
    #visualize_top_features(pca)
    pca_result = tf.reshape(pca_data, (int(dataset_batch_size/batch_size),batch_size, sequence_length, 50))
    return pca_result

In [8]:
def generate_reduced_dataset(ts_batches,label_batches):
    ts_batches = tf.convert_to_tensor(ts_batches, dtype=tf.float64)
    label_batches = tf.convert_to_tensor(label_batches, dtype=tf.float64)
    
    # Create a TensorFlow dataset
    reduced_dataset = tf.data.Dataset.from_tensor_slices((ts_batches, label_batches))
    return reduced_dataset

In [9]:
combined_train_ts,train_labels = combine_ts_batches(train_data)
combined_test_ts,test_labels = combine_ts_batches(test_data)
combined_val_ts,val_labels = combine_ts_batches(predict_data)

In [10]:
pca_train_ts = apply_pca(combined_train_ts)
train_reduced_dataset = generate_reduced_dataset(pca_train_ts,train_labels)

(45000, 1000, 105)


In [11]:
pca_test_ts = apply_pca(combined_test_ts)
test_reduced_dataset = generate_reduced_dataset(pca_test_ts,test_labels)

(2500, 1000, 105)


In [12]:
pca_val_ts = apply_pca(combined_val_ts)
val_reduced_dataset = generate_reduced_dataset(pca_val_ts,val_labels)

(2500, 1000, 105)


In [14]:
import tensorflow as tf
from tensorflow import keras

# # Define the model
# model = keras.Sequential()

# # Flatten the input data
# model.add(keras.layers.Flatten(input_shape=(1000, 50)))

# # Add a couple of dense layers
# model.add(keras.layers.Dense(128, activation='relu'))
# model.add(keras.layers.Dense(64, activation='relu'))

# # Output layer with 4 units (assuming you have 4 classes for classification)
# model.add(keras.layers.Dense(3, activation='softmax'))

# # Compile the model
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# # Print the model summary
# model.summary()


# model = keras.Sequential()
# model.add(keras.layers.Flatten(input_shape=(1000, 50)))
# model.add(keras.layers.Dense(128, activation='relu'))
# model.add(keras.layers.Dropout(0.5))  # Adding dropout for regularization
# model.add(keras.layers.Dense(64, activation='relu'))
# model.add(keras.layers.Dropout(0.5))
# model.add(keras.layers.Dense(3, activation='softmax'))

# # Compile the model with a lower learning rate
# model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])
# early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

#model.summary()
model = keras.Sequential()
model.add(keras.layers.Flatten(input_shape=(1000, 50)))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dropout(0.5))  # Adding dropout for regularization
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(10, activation='softmax'))

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# model = keras.Sequential([
#     layers.Flatten(input_shape=(1000, 50)),
#     layers.BatchNormalization(),
    
#     layers.Dense(256, activation='relu'),
#     layers.Dropout(0.5),

#     layers.Dense(128, activation='relu'),
#     layers.Dropout(0.5),

#     layers.Dense(64, activation='relu'),
#     layers.Dropout(0.5),

#     layers.Dense(3, activation='softmax')
# ])

# # Use a different optimizer (SGD) and add learning rate scheduling
# opt = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
# model.compile(optimizer=opt,
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])

# model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 50000)             0         
                                                                 
 dense (Dense)               (None, 128)               6400128   
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 10)                650       
                                                                 
Total params: 6409034 (24.45 MB)
Trainable params: 64090

In [15]:
# Set the number of epochs
num_epochs = 50

# Train the model
lr_scheduler = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-5)

#history = model1.fit(train_reduced_dataset, callbacks=[lr_scheduler],epochs=num_epochs, validation_data=val_reduced_dataset)
history = model.fit(train_reduced_dataset,epochs=num_epochs, validation_data=val_reduced_dataset)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [16]:
# Evaluate the model on your test data using the dataset with 'comp_labels' only
test_loss, test_accuracy = model.evaluate(test_reduced_dataset)
print("Test Accuracy: {:.2f}%".format(test_accuracy * 100))

Test Accuracy: 100.00%


In [None]:
pip install sklearn

In [61]:
pip install scikit-learn

You should consider upgrading via the '/scratch/user/swati/pip_envs/my_notebook-Python/3.8.2/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [80]:
!pip list | grep 'tensorflow\|numpy'

numpy                         1.24.3    
tensorflow                    2.13.1    
tensorflow-estimator          2.13.0    
tensorflow-io-gcs-filesystem  0.34.0    
You should consider upgrading via the '/scratch/user/swati/pip_envs/my_notebook-Python/3.8.2/bin/python -m pip install --upgrade pip' command.[0m
