In [160]:
import os
# Set the environment variable to use only the GPU with ID 1 (GTX 1080 Ti)
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import seaborn as sn
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
import pandas as pd
import numpy as np
import pathlib

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, Conv2D, MaxPooling2D, Flatten, Reshape
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout, GlobalAveragePooling2D
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping
import datetime
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import confusion_matrix, f1_score

In [161]:
# Set the print options to display the entire array
np.set_printoptions(threshold=np.inf)

In [162]:

class Manage_data():
    def __init__(self):
        data_dir='/mnt/storage/home/rag-tt/tactile_images/'
        self.data_dir= pathlib.Path(data_dir)
        
    def find_no_of_images(self, obj_id):
        image_dir = os.path.join(self.data_dir, str(obj_id))
        image_dir= pathlib.Path(image_dir)
        no_of_images= len(list(image_dir.glob('*.jpg')))
        return no_of_images
    
    def set_threshold_values(self, label, image_paths, csv_path):
        # Ensure label and image_paths are numpy arrays
        label = np.array(label)
        image_paths = np.array(image_paths)
        
        # Count the total number of zeroes in label
        total_zeroes = np.sum(label == 0)
        
        # Determine the number of zeroes to remove
        zeroes_to_remove = max(0, total_zeroes - tune.no_of_nonslip_data)
        
        # Indices of zero elements
        zero_indices = np.where(label == 0)[0]
        
        # Indices to keep (last self.no_of_nonslip_data zeroes and all ones)
        indices_to_keep = np.concatenate((zero_indices[-tune.no_of_nonslip_data:], np.where(label != 0)[0]))
        indices_to_keep = np.unique(indices_to_keep)
        indices_to_keep = np.sort(indices_to_keep)
        
        # Create the resulting label array
        label_with_few_zeroes = label[indices_to_keep]
        
        # Remove the same number of elements from the start of image_paths
        paths_with_few_zeroes = image_paths[zeroes_to_remove:]
        
        trimmed_labels = []
        trimmed_paths = []
        slip_values = np.genfromtxt(csv_path, delimiter=',', skip_header=1, usecols=2, dtype=None, encoding=None)
        i = 0
        for slip_value in slip_values:
            if slip_value < tune.max_labels:
                trimmed_labels.append(label_with_few_zeroes[i])
                trimmed_paths.append(paths_with_few_zeroes[i])
            i += 1
        trimmed_labels = np.array(trimmed_labels)
        trimmed_paths = np.array(trimmed_paths)
        # print('label_with_few_zeroes =', label_with_few_zeroes.shape)
        # print('paths_with_few_zeroes=', paths_with_few_zeroes.shape)
        # print('trimmed_labels=', trimmed_labels.shape)
        # print('trimed_paths=', trimmed_paths.shape)
        return trimmed_labels, trimmed_paths
    
    def check_pattern(self,label):
        # Ensure arr is a numpy array
        label = np.array(label)
        
        # Find the first occurrence of 1
        first_one_index = np.argmax(label == 1)
        
        if np.all(label == 0):  # If there's no 1 in the array, ensure all are 0
            return
        
        # Check if there's no 1 in the array
        if np.max(label) == 0:
            assert np.all(label == 0), "Array does not follow the pattern: continuous zeroes followed by continuous ones"
            return
        
        # Assert all elements before first_one_index are 0
        assert np.all(label[:first_one_index] == 0), "Array does not follow the pattern: continuous zeroes followed by continuous ones"
        
        # Assert all elements from first_one_index to the end are 1
        assert np.all(label[first_one_index:] == 1), "Array does not follow the pattern: continuous zeroes followed by continuous ones"
    
    def create_slip_instant_labels(self, csv_path):
        label = []
        slip_values = np.genfromtxt(csv_path, delimiter=',', skip_header=1, usecols=2, dtype=None, encoding=None)
        for slip_value in slip_values:
            if slip_value < tune.slip_instant_labels:
                label.append(0)
            else:
                label.append(1)
        return label
 
    def duplicate_n_balance_data(self, labels, image_paths):
        # Convert labels to numpy array for easier manipulation
        labels = np.array(labels)
        image_paths = np.array(image_paths)

        # Get indices of each class
        class_0_indices = np.where(labels == 0)[0]
        class_1_indices = np.where(labels == 1)[0]
            # Check if either class is empty
        if len(class_0_indices) == 0 or len(class_1_indices) == 0:
            # print(f"Skipping balancing for {labels} as one of the classes is missing")
            return labels, image_paths
    
        # Calculate the difference in the number of samples
        diff = len(class_0_indices) - len(class_1_indices)

        if diff > 0:  # More 0s than 1s
            # Randomly duplicate class 1 samples to balance the dataset
            additional_indices = np.random.choice(class_1_indices, size=diff, replace=True)
            labels = np.concatenate([labels, labels[additional_indices]])
            image_paths = np.concatenate([image_paths, image_paths[additional_indices]])
        elif diff < 0:  # More 1s than 0s
            # Randomly duplicate class 0 samples to balance the dataset
            additional_indices = np.random.choice(class_0_indices, size=-diff, replace=True)
            labels = np.concatenate([labels, labels[additional_indices]])
            image_paths = np.concatenate([image_paths, image_paths[additional_indices]])
        # # Shuffle the dataset to mix the duplicated samples
        # shuffle_indices = np.arange(len(labels))
        # np.random.shuffle(shuffle_indices)
        # labels = labels[shuffle_indices]
        # image_paths = image_paths[shuffle_indices]

        return labels, image_paths           
            
            
    def load_data(self, no_of_samples = 30):
        file_paths = []
        image_paths = []
        sequential_image_paths = []
        y = []
        window_size = tune.img_sequence_window_size
        for obj_id in range(no_of_samples):
            no_of_images = self.find_no_of_images(obj_id)
            csv_path = os.path.join(self.data_dir, str(obj_id),'slip_log.csv')
            if no_of_images < 40 or not os.path.exists(csv_path):
                continue
            label = self.create_slip_instant_labels(csv_path)
            label2 = np.genfromtxt(csv_path, delimiter=',', skip_header=1, usecols=1, dtype=None, encoding=None)
            
            for img_id in range(no_of_images):
                image_path = os.path.join(self.data_dir, str(obj_id), str(img_id)+ '.jpg')
                image_paths.append(image_path)
            self.check_pattern(label)
            
            label, image_paths = self.set_threshold_values(label,image_paths,csv_path)    
            # label, image_paths = self.duplicate_n_balance_data(label, image_paths)
            
            '''            
            example values of label and image_paths pair             
            label size = 83, imagepaths_size = 83 
            zeroes = 41, ones = 42
            img40 == 0, img41 == 1
            '''
            # print(image_paths[41])
            # print(label[41])
            
            # club images together as per the window
            clubbed_image_paths = []
            for i in range(0, len(image_paths) - (tune.img_sequence_window_size-1)):  # Ensuring sequences of 5 images
                row = image_paths[i:i+tune.img_sequence_window_size]
                clubbed_image_paths.append(row)
            image_paths = []
            label = label[(tune.img_sequence_window_size-1):]

            '''   
            old version- label = label[:-(tune.img_sequence_window_size - 1)]         
            example values of label and clubbed_image_paths  
            here img_sequence_window_size = 3   
            clubbed_image_paths = [0,1,2] ... [80,81,82]       
            label size = 81, imagepaths_size = 81
            zeroes = 41, ones = 40
            [40,41,42] == 0, [41,42,43] == 1
            '''
            '''   
            new version - label = label[(tune.img_sequence_window_size-1):]       
            example values of label and clubbed_image_paths  
            here img_sequence_window_size = 3   
            clubbed_image_paths = [0,1,2] ... [80,81,82]       
            label size = 81, imagepaths_size = 81
            zeroes = 39, ones = 42
            [38,39,40] == 0 ,[39,40,41] == 1, [40,41,42] == 1, [41,42,43] == 1
            '''
            
            # print(clubbed_image_paths[38])
            # print(label[38])
            y.append(label)
            print(len(y))
            file_paths.append(clubbed_image_paths)
        
        #concatenate = merge multipe arrays into one
        y = np.concatenate(y)
        self.labels = np.array(y)
        # print(self.labels.shape) = 2025
        self.file_paths = np.concatenate(file_paths)
        # print(self.file_paths.shape) = (2025,3)
        
    def load_data2(self, no_of_samples=600, stride=1):
        file_paths = []
        image_paths = []
        sequential_image_paths = []
        y = []
        window_size = tune.img_sequence_window_size
        for obj_id in range(no_of_samples):
            no_of_images = self.find_no_of_images(obj_id)
            csv_path = os.path.join(self.data_dir, str(obj_id), 'slip_log.csv')
            
            if no_of_images < 40 or not os.path.exists(csv_path):
                continue
            
            label = self.create_slip_instant_labels(csv_path)
            label2 = np.genfromtxt(csv_path, delimiter=',', skip_header=1, usecols=1, dtype=None, encoding=None)
            
            for img_id in range(no_of_images):
                image_path = os.path.join(self.data_dir, str(obj_id), str(img_id) + '.jpg')
                image_paths.append(image_path)
            
            self.check_pattern(label)
            label, image_paths = self.set_threshold_values(label, image_paths, csv_path)    
            label, image_paths = self.duplicate_n_balance_data(label, image_paths)
            y.append(label[:-(tune.img_sequence_window_size - 1)])
            
            for i in range(0, len(image_paths) - (tune.img_sequence_window_size - 1), stride):
                row = image_paths[i:i + tune.img_sequence_window_size]
                sequential_image_paths.append(row)
            
            image_paths = []

        y = np.concatenate(y)
        self.labels = np.array(y)
        print(labels.shape)
        self.file_paths = np.array(sequential_image_paths)
            
    def shuffle_file_paths(self):
        # Shuffle the dataset
        indices = np.arange(len(self.file_paths))
        np.random.shuffle(indices)
        self.file_paths = self.file_paths[indices]
        self.labels = self.labels[indices]
        
    def shuffle_train_file_paths(self):
        # Shuffle the dataset
        indices = np.arange(len(self.train_filepaths))
        np.random.shuffle(indices)
        self.train_filepaths = self.train_filepaths[indices]
        self.train_labels = self.train_labels[indices]
        
    def create_split_filepaths(self,train=0.7,val=0.2):
        dataset_size = len(self.file_paths)
        train_size = int(train * dataset_size)

        val_size = int(val * dataset_size)
        
        test_size = dataset_size - train_size - val_size
        print('dataset_size=', dataset_size)
        print('train size=', train_size)
        print('test size=', test_size)
        print('val size=', val_size)
        self.train_filepaths = self.file_paths[ : train_size]
        self.val_filepaths = self.file_paths[train_size : train_size+val_size]
        self.test_filepaths = self.file_paths[train_size+val_size : ]
        
        self.train_labels = self.labels[ : train_size]
        self.val_labels = self.labels[train_size : train_size+val_size]
        self.test_labels = self.labels[train_size+val_size : ]
        
        # Check the sizes of the splits
        assert len(self.train_filepaths) == train_size, "Training set size mismatch"
        assert len(self.val_filepaths) == val_size, "Validation set size mismatch"
        assert len(self.test_filepaths) == test_size, "Test set size mismatch"
        assert len(self.train_labels) == train_size, "Training set size mismatch"
        assert len(self.val_labels) == val_size, "Validation set size mismatch"
        assert len(self.test_labels) == test_size, "Test set size mismatch"
        
    def parse_function_vgg(self, filenames, label):
        images = []
        for filename in filenames:
            image_string = tf.io.read_file(filename)
            image_decoded = tf.image.decode_jpeg(image_string, channels=3)
            image_resized = tf.image.resize(image_decoded, [224, 224])  # Adjust size as needed
            # Convert image to a float32 tensor and preprocess it for VGG16
            image = tf.cast(image_resized, tf.float32)
            image = preprocess_input(image)
            # Ensure images are float32 and normalized between 0 and 1
            images.append(image)
        images = tf.stack(images)
        return images, label
        
    def create_dataset(self,file_paths, labels):
                # Create a TensorFlow dataset from the file paths and labels
        dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))
        

        def wrapped_parse_function(filenames, label):
            images, label = tf.py_function(func=self.parse_function_vgg, inp=[filenames, label], Tout=[tf.float32, tf.float64])
            images.set_shape((tune.img_sequence_window_size, 224, 224, 3))  # Explicitly set the shape
            label.set_shape([])  # Explicitly set the shape for the label
            return images, label
 
        
        dataset = dataset.map(wrapped_parse_function, num_parallel_calls=tf.data.AUTOTUNE)

        dataset = dataset.batch(tune.batch_size)  # Adjust batch size as needed
        dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
        return dataset
        
    def create_split_datasets(self):
        self.train_dataset = self.create_dataset(self.train_filepaths, self.train_labels)
        self.val_dataset = self.create_dataset(self.val_filepaths, self.val_labels)
        self.test_dataset = self.create_dataset(self.test_filepaths, self.test_labels)
        

In [163]:
class tuning():
    def __init__(self):
        self.img_sequence_window_size_array = [3,8,9,10]
        self.learning_rate_array = [0.00005,0.00003, 0.00001]
        self.reshuffle_array=[False, True]
        self.regularization_constant_array = [0.01, 0.05, 0.1, 0.2, 0.3]
        self.dense_neurons2_array = [8, 16, 32]
        self.vgg_layers_array= [7,11,15,19]
        self.slip_instant_labels_array = [0.0001,0.0005, 0.001, 0.003, 0.005]
        
        self.img_sequence_window_size =  self.img_sequence_window_size_array[0]
        self.stride = 3 
        self.learning_rate = self.learning_rate_array[1]
        self.reshuffle =  self.reshuffle_array[0]
        self.dropout1 = 0.5
        self.dropout2 = 0.5
        self.dropout3 = 0.5
        self.dropout4 = 0.5
        self.regularization_constant = 0.001
        self.batch_size = 4
        self.dense_neurons1 = 64
        self.dense_neurons2 = 8
        self.csv_id = 0
        self.no_of_samples = 4559
        self.epochs = 40
        self.vgg_layers = 19
        self.other_param='additional cnn + global average'
        self.no_of_nonslip_data = 200
        self.slip_instant_labels = 0.0001
        self.max_labels = 0.005
              
    def start_training(self):
        try:
            manage_data.load_data(no_of_samples=self.no_of_samples)
            # manage_data.shuffle_file_paths()
            manage_data.create_split_filepaths()
            manage_data.shuffle_train_file_paths()
            manage_data.create_split_datasets()
            network.vgg_lstm()
            
            #print the tuning parametrs before training
            accuracy_history.on_epoch_end(0)
            df = accuracy_history.create_accuracy_dataframe()
            # Transpose the DataFrame
            df_transposed = df.transpose()
            print(df_transposed)
            network.train(manage_data.train_dataset, manage_data.val_dataset)
        
        # Ensure accuracy data is saved even if training is interrupted 
        finally:        
            # Create a DataFrame from the accuracy history lists
            accuracy_df = accuracy_history.create_accuracy_dataframe()

            # Save the DataFrame to a CSV file
            accuracy_history.save_to_csv(accuracy_df)
            accuracy_history.reset_dict()                    
    def Tune(self):
        # for value in self.vgg_layers_array:
        #     self.vgg_layers = value         
        #     self.start_training()
        # self.vgg_layers= 19
        
        for value in self.learning_rate_array:
            self.learning_rate = value           
            self.start_training()
        self.learning_rate = 0.0003

In [164]:
manage_data = Manage_data()
tune = tuning()
manage_data.load_data()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
(2025,)
(2025, 3)


In [165]:
for i in range(140):
    print(manage_data.file_paths[i])

['/mnt/storage/home/rag-tt/tactile_images/2/0.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/1.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/2.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/1.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/2.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/3.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/2.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/3.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/4.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/3.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/4.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/5.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/4.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/5.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/6.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/5.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/6.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/7.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/6.jpg'
 '/mnt/storage/home/rag-t

In [166]:
manage_data.labels[:140]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 0, 0])

In [167]:
manage_data.load_data2(stride = 2)

NameError: name 'labels' is not defined

In [None]:
for i in range(200):
    print(manage_data.file_paths[i])

['/mnt/storage/home/rag-tt/tactile_images/2/0.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/1.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/2.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/2.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/3.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/4.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/4.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/5.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/6.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/6.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/7.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/8.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/8.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/9.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/10.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/10.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/11.jpg'
 '/mnt/storage/home/rag-tt/tactile_images/2/12.jpg']
['/mnt/storage/home/rag-tt/tactile_images/2/12.jpg'
 '/mnt/storage/home/

In [None]:
manage_data.labels[:70]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1])