In [323]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential 
import pandas as pd
import numpy as np
import pathlib

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, Conv2D, MaxPooling2D, Flatten, Reshape
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout
from tensorflow.keras.losses import BinaryCrossentropy

This is demo code on to check how preprocess the images for vgg network

In [324]:
    from tensorflow.keras.applications.vgg16 import preprocess_input
    manage_data = Manage_data()
    image_path = os.path.join(manage_data.data_dir,'6/6.jpg')
    # Read the image file
    image = tf.io.read_file(image_path)
    # Decode the image to get a tensor
    image = tf.image.decode_jpeg(image, channels=3)
    # Resize the image to the desired input size
    image = tf.image.resize(image, [224, 224])
    # Convert image to a float32 tensor and preprocess it for VGG16
    image = tf.cast(image, tf.float32)
    image = preprocess_input(image)


In [325]:
image.shape

TensorShape([224, 224, 3])

In [326]:


class Manage_data():
    def __init__(self):
        data_dir='/home/rag-tt/workspace/tactile_images/'
        self.data_dir= pathlib.Path(data_dir)

    def find_no_of_images(self, obj_id):
        image_dir = os.path.join(self.data_dir, str(obj_id))
        image_dir= pathlib.Path(image_dir)
        no_of_images= len(list(image_dir.glob('*.jpg')))
        return no_of_images
    
    

    def parse_function_sequential(self, filenames, label):
        images = []
        for filename in filenames:
            image_string = tf.io.read_file(filename)
            image_decoded = tf.image.decode_jpeg(image_string, channels=3)
            image_resized = tf.image.resize(image_decoded, [480, 640])  # Adjust size as needed
            # Ensure images are float32 and normalized between 0 and 1
            images.append(image_resized)
        images = tf.stack(images)
        return images, label
    
    def parse_function_vgg(self, filenames, label):
        images = []
        for filename in filenames:
            image_string = tf.io.read_file(filename)
            image_decoded = tf.image.decode_jpeg(image_string, channels=3)
            image_resized = tf.image.resize(image_decoded, [224, 224])  # Adjust size as needed
            # Convert image to a float32 tensor and preprocess it for VGG16
            image = tf.cast(image_resized, tf.float32)
            image = preprocess_input(image)
            # Ensure images are float32 and normalized between 0 and 1
            images.append(image)
        images = tf.stack(images)
        return images, label
    
    def load_sequential_data(self, no_of_samples = 600):
        file_paths = []
        image_paths = []
        sequential_image_paths = []
        y = []
        window_size = 5
        for obj_id in range(no_of_samples):
            no_of_images = self.find_no_of_images(obj_id)
            if no_of_images < 40:
                continue
            
            csv_path = os.path.join(self.data_dir, str(obj_id),'slip_log.csv')
            label = np.genfromtxt(csv_path, delimiter=',', skip_header=1, usecols=1, dtype=None, encoding=None)
            y.append(label[:-4])
            
            for img_id in range(no_of_images):
                image_path = os.path.join(self.data_dir, str(obj_id), str(img_id)+ '.jpg')
                image_paths.append(image_path)
            for i in range(0, len(image_paths) - 4):  # Ensuring sequences of 5 images
                row = image_paths[i:i+5]
                sequential_image_paths.append(row)
            image_paths = []

        y = np.concatenate(y)
        y = np.array(y)

        file_paths = np.array(sequential_image_paths)

        return file_paths, y
    
    def create_sequential_dataset(self,file_paths, labels):
                # Create a TensorFlow dataset from the file paths and labels
        dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))
        
        # # Map the function to each sequence of file paths and labels
        # dataset = dataset.map(
        #     lambda file_paths, label: self.parse_function_sequential(file_paths, label),
        #     num_parallel_calls=tf.data.AUTOTUNE
        # )
        def wrapped_parse_function(filenames, label):
            images, label = tf.py_function(func=self.parse_function_vgg, inp=[filenames, label], Tout=[tf.float32, tf.int64])
            images.set_shape((5, 224, 224, 3))  # Explicitly set the shape
            label.set_shape([])  # Explicitly set the shape for the label
            return images, label
        # # Map the parse_function to the dataset using tf.py_function
        # dataset = dataset.map(lambda file_paths, label: tf.py_function(func=self.parse_function_sequential, inp=[file_paths, label], Tout=[tf.float32, tf.int64]),
        #               num_parallel_calls=tf.data.AUTOTUNE)
        
        dataset = dataset.map(wrapped_parse_function, num_parallel_calls=tf.data.AUTOTUNE)
        #     # Use tf.py_function to apply the parse_function
        # def wrapped_parse_function(filenames, label):
        #     return tf.py_function(func=self.parse_function_sequential, inp=[filenames, label], Tout=[tf.float32, tf.float32])

        # dataset = dataset.map(wrapped_parse_function, num_parallel_calls=tf.data.AUTOTUNE)

        dataset = dataset.batch(32)  # Adjust batch size as needed
        self.dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)


    def split_dataset(self, file_paths):
        dataset_size = len(file_paths)
        train_size = int(0.8 * dataset_size)
        self.train_dataset = self.dataset.take(train_size)
        self.val_dataset = self.dataset.skip(train_size)



In [327]:
# Import the VGG16 model with pre-trained weights
# vgg_model = tf.keras.applications.VGG16(weights='imagenet', include_top=False)
vgg_model = tf.keras.applications.VGG16(weights='imagenet')
# Print the summary of the VGG16 model
vgg_model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_66 (InputLayer)       [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [328]:
vgg_model.input_shape

(None, 224, 224, 3)

In [329]:

class create_network():

    def __init__(self):
        self.x =0
    def cnn_lstm1(self):

        # Define CNN model
        cnn_model = Sequential([
            Conv2D(32, (3, 3), activation='relu', input_shape=(480, 640, 3)),
            MaxPooling2D((2, 2)),
            Conv2D(32, (3, 3), activation='relu'),
            MaxPooling2D((2, 2)),
            Conv2D(32, (3, 3), activation='relu'),
            MaxPooling2D((2, 2)),
            Flatten()  # Flatten the spatial dimensions
        ])

        
        # Define LSTM model
        lstm_model = Sequential([
            LSTM(64,input_shape=(5, 144768) ),
            Dense(8, activation='relu'),
            Dense(1, activation='sigmoid'),
        ])

        # Combine CNN and LSTM models
        self.model = Sequential([
            TimeDistributed(cnn_model, input_shape=(5, 480, 640, 3)),  # Apply CNN to each frame in the sequence
            (Reshape((5,144768))),
            lstm_model,
        ])
        self.model.summary()

    def train(self, train_dataset, val_dataset):
        cp = ModelCheckpoint('model_vgg/',monitor='val_accuracy',save_best_only=True)
        self.model.compile(loss=BinaryCrossentropy(), optimizer=Adam(learning_rate=0.00001),metrics=['accuracy'])
        self.model.fit(train_dataset,validation_data=val_dataset, epochs=50, callbacks=[cp])

    
    def vgg_lstm(self):
        # VGG16 model with pre-trained weights
        #include top  false remove the final classification layer
        vgg_model = tf.keras.applications.VGG16(weights='imagenet',include_top=False, input_shape=(224, 224, 3))
        # Freeze the VGG16 layers if you don't want to train them
        for layer in vgg_model.layers:
            layer.trainable = False

        # Define CNN model
        vgg_model_flatten = Sequential([
            vgg_model,
            Flatten(),  # Flatten the spatial dimensions
            Dense(64, activation='relu')
        ])

        #25088 is the output of vff_model_flatten
        # Define LSTM model
        lstm_model = Sequential([
            LSTM(64, input_shape=(5, 64)),
            Dropout(0.5),  # Dropout layer to prevent overfitting
            Dense(8, activation='relu'),
            Dropout(0.5),
            Dense(1, activation='sigmoid'),
                ])

        # Combine CNN and LSTM models
        self.model = Sequential([
            TimeDistributed(vgg_model_flatten, input_shape=(5, 224, 224, 3)),  # Apply CNN to each frame in the sequence
            (Reshape((5,64))),
            lstm_model,
        ])
        vgg_model.summary()
        vgg_model_flatten.summary()
        self.model.summary()
        


In [330]:
manage_data = Manage_data()
network = create_network()

In [331]:
# creats a numpy array of images, then it lumps the images to together in batch of 5 for lstm
#(None,5)
filepaths, label = manage_data.load_sequential_data()


# creates a tensor called dataset which contains the images. 
#The images are not loaded and stored in the seperate memory, it uses the existing images instead
#This saves time and memory
#(None,5,240,240,3)
print(filepaths.shape)
manage_data.create_sequential_dataset(filepaths,label)
for batch in manage_data.dataset.take(1):  # Take one batch to print its shape
    images_batch, labels_batch = batch
    print("Images batch shape:", images_batch.shape)
    print("Labels batch shape:", labels_batch.shape)
manage_data.split_dataset(filepaths)
print('filepaths size=',filepaths.shape)
#creates a combined network of cnn and lstm
print('train set=',manage_data.train_dataset)
network.vgg_lstm()
network.train(manage_data.train_dataset, manage_data.val_dataset)

# Save the entire model to a HDF5 file
network.model.save('model_vgg.h5')

(66780, 5)
Images batch shape: (32, 5, 224, 224, 3)
Labels batch shape: (32,)
filepaths size= (66780, 5)
train set= <_TakeDataset element_spec=(TensorSpec(shape=(None, 5, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_67 (InputLayer)       [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     