In [69]:
import os
import shutil

In [70]:
def count_subdirectories(directory):
    try:
        # List all entries in the directory
        entries = os.listdir(directory)
        
        # Filter out the subdirectories
        subdirectories = [entry for entry in entries if os.path.isdir(os.path.join(directory, entry))]
        
        # Count the subdirectories
        num_subdirectories = len(subdirectories)
        
        print(f"The directory '{directory}' contains {num_subdirectories} subdirectories.")
        
    except FileNotFoundError:
        print(f"The directory '{directory}' does not exist.")
    except PermissionError:
        print(f"Permission denied: Unable to access '{directory}'.")
    except Exception as e:
        print(f"Error: {e}")

In [71]:
def create_directory(directory_name):
    try:
        os.mkdir(directory_name)
        print(f"Directory '{directory_name}' created successfully.")
    except FileExistsError:
        print(f"Directory '{directory_name}' already exists.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [72]:
def move_folder(source_dir,destination_dir):
    try:
        # Move the directory and its contents
        shutil.move(source_dir, destination_dir)
    except FileNotFoundError:
        print(f"Error: Directory '{source_dir}' not found.")
    except shutil.Error as e:
        print(f"Error: {e}")

In [73]:
def fill_gaps_in_sequence(directory):
    try:
        # List all entries in the directory
        entries = os.listdir(directory)
        
        # Filter out the subdirectories and sort them numerically
        subdirectories = sorted([entry for entry in entries if os.path.isdir(os.path.join(directory, entry)) and entry.isdigit()], key=int)
        
        expected_number = 0
        
        for subdir in subdirectories:
            subdir_path = os.path.join(directory, subdir)
            expected_subdir_path = os.path.join(directory, str(expected_number))
            
            if subdir_path != expected_subdir_path:
                os.rename(subdir_path, expected_subdir_path)
            
            expected_number += 1
        
        print("Renaming completed successfully.")
        
    except FileNotFoundError:
        print(f"The directory '{directory}' does not exist.")
    except PermissionError:
        print(f"Permission denied: Unable to access '{directory}'.")
    except Exception as e:
        print(f"Error: {e}")



Create the required directories

In [74]:
data_dir = '/home/rag-tt/'
train_data_dir = os.path.join(data_dir,'train_data')
test_data_dir = os.path.join(data_dir,'test_data')
source_data_dir = os.path.join(data_dir,'tactile5')
tactile_images_dir =  os.path.join(source_data_dir,'tactile_images')
temp_train_data_dir = os.path.join(source_data_dir,'train_data')
temp_test_data_dir = os.path.join(source_data_dir,'test_data')

check number of data already present in the train and test dataset to append the new data afterwards

In [75]:
train_data_qty = count_subdirectories(train_data_dir)
print('train_data_qty=', train_data_qty)
test_data_qty = count_subdirectories(test_data_dir)
print('test_data_qty=', test_data_qty)

The directory '/home/rag-tt/train_data' contains 0 subdirectories.
train_data_qty= None
The directory '/home/rag-tt/test_data' contains 0 subdirectories.
test_data_qty= None


split the data as per the ratio
Note that the split is being done according to obj_id instead of actual amout of data
this done so that the same objects from different data set ends up in test_set
The above method ensures that the test set is never seen by the model

In [76]:
data_size = 100
split_ratio = 0.9
train_size = round(data_size*(split_ratio))
train_data_list = []
test_data_list = []
for i in range(train_size):
    train_data_list.append(i)
for i in range(train_size,data_size):
    test_data_list.append(i)
    
print('train_data',train_data_list)
print('test_data',test_data_list)

train_data [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89]
test_data [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


Move the data to temporary train and test folders

In [77]:
create_directory(temp_train_data_dir)
create_directory(temp_test_data_dir)

Directory '/home/rag-tt/tactile5/train_data' already exists.
Directory '/home/rag-tt/tactile5/test_data' already exists.


In [78]:
for obj_id in train_data_list:
    obj_dir = os.path.join(tactile_images_dir, str(obj_id))
    destination_dir = os.path.join(temp_train_data_dir, str(obj_id))
    if not os.path.exists(obj_dir):
        continue
    move_folder(obj_dir,destination_dir)
    
for obj_id in test_data_list:
    obj_dir = os.path.join(tactile_images_dir, str(obj_id))
    destination_dir = os.path.join(temp_test_data_dir, str(obj_id))
    if not os.path.exists(obj_dir):
        continue
    move_folder(obj_dir,destination_dir)

Unnecessary data was deleted in last step of data processing
We fill the missing data by renaming data in this step

In [79]:
fill_gaps_in_sequence(temp_train_data_dir)
fill_gaps_in_sequence(temp_test_data_dir)

Renamed '/home/rag-tt/tactile5/train_data/2' to '/home/rag-tt/tactile5/train_data/0'
Renamed '/home/rag-tt/tactile5/train_data/3' to '/home/rag-tt/tactile5/train_data/1'
Renamed '/home/rag-tt/tactile5/train_data/4' to '/home/rag-tt/tactile5/train_data/2'
Renamed '/home/rag-tt/tactile5/train_data/5' to '/home/rag-tt/tactile5/train_data/3'
Renamed '/home/rag-tt/tactile5/train_data/6' to '/home/rag-tt/tactile5/train_data/4'
Renamed '/home/rag-tt/tactile5/train_data/7' to '/home/rag-tt/tactile5/train_data/5'
Renamed '/home/rag-tt/tactile5/train_data/8' to '/home/rag-tt/tactile5/train_data/6'
Renamed '/home/rag-tt/tactile5/train_data/9' to '/home/rag-tt/tactile5/train_data/7'
Renamed '/home/rag-tt/tactile5/train_data/10' to '/home/rag-tt/tactile5/train_data/8'
Renamed '/home/rag-tt/tactile5/train_data/11' to '/home/rag-tt/tactile5/train_data/9'
Renamed '/home/rag-tt/tactile5/train_data/12' to '/home/rag-tt/tactile5/train_data/10'
Renamed '/home/rag-tt/tactile5/train_data/13' to '/home/rag-t