In [3]:
import os
import random
import shutil

# Path to your dataset folder containing the "fraction" and "non-fraction" folders
dataset_path = "data/"

# Path to create the train, test, and valid folders inside the dataset folder
train_path = os.path.join(dataset_path, "train")
test_path = os.path.join(dataset_path, "test")
valid_path = os.path.join(dataset_path, "valid")

# Create the train, test, and valid folders if they don't exist
os.makedirs(train_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)
os.makedirs(valid_path, exist_ok=True)

# Set the percentage split for train, test, and valid
train_ratio = 0.7
test_ratio = 0.15
valid_ratio = 0.15

# Function to move files from the source to the destination folder
def move_files(file_list, src_folder, dest_folder):
    for file_name in file_list:
        src = os.path.join(src_folder, file_name)
        dest = os.path.join(dest_folder, file_name)
        shutil.move(src, dest)

# Get the list of files in each class folder
fraction_files = os.listdir(os.path.join(dataset_path, "fraction"))
non_fraction_files = os.listdir(os.path.join(dataset_path, "non-fraction"))

# Shuffle the file lists to ensure randomness
random.shuffle(fraction_files)
random.shuffle(non_fraction_files)

# Calculate the number of files for each split
num_fraction = len(fraction_files)
num_non_fraction = len(non_fraction_files)

num_train_fraction = int(train_ratio * num_fraction)
num_test_fraction = int(test_ratio * num_fraction)
num_valid_fraction = num_fraction - num_train_fraction - num_test_fraction

num_train_non_fraction = int(train_ratio * num_non_fraction)
num_test_non_fraction = int(test_ratio * num_non_fraction)
num_valid_non_fraction = num_non_fraction - num_train_non_fraction - num_test_non_fraction

# Split the files for each class into train, test, and valid
train_fraction_files = fraction_files[:num_train_fraction]
test_fraction_files = fraction_files[num_train_fraction:num_train_fraction + num_test_fraction]
valid_fraction_files = fraction_files[num_train_fraction + num_test_fraction:]

train_non_fraction_files = non_fraction_files[:num_train_non_fraction]
test_non_fraction_files = non_fraction_files[num_train_non_fraction:num_train_non_fraction + num_test_non_fraction]
valid_non_fraction_files = non_fraction_files[num_train_non_fraction + num_test_non_fraction:]

# Move files to the appropriate folders
move_files(train_fraction_files, os.path.join(dataset_path, "fraction"), train_path)
move_files(test_fraction_files, os.path.join(dataset_path, "fraction"), test_path)
move_files(valid_fraction_files, os.path.join(dataset_path, "fraction"), valid_path)

move_files(train_non_fraction_files, os.path.join(dataset_path, "non-fraction"), train_path)
move_files(test_non_fraction_files, os.path.join(dataset_path, "non-fraction"), test_path)
move_files(valid_non_fraction_files, os.path.join(dataset_path, "non-fraction"), valid_path)


In [13]:
def set_second_column_below_first(file_path):
    # Read the content of the file
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Strip newline characters and whitespace from each line
    lines = [line.strip() for line in lines]

    # Split the lines into two separate lists for the two columns
    first_column = []
    second_column = []
    for line in lines:
        columns = line.split()
        if len(columns) >= 2:
            first_column.append(columns[0])
            second_column.append(columns[1])

    # Combine the first and second columns, setting the second column below the first
    combined_columns = [f"{first}\n{second}" for first, second in zip(first_column, second_column)]

    # Write the updated list back to the txt file
    with open(file_path, 'w') as file:
        file.write('\n'.join(combined_columns))

# Replace 'train.txt' with the path to your txt file
file_path = 'data/test.txt'

# Call the function to set the second column below the first
set_second_column_below_first(file_path)


In [18]:
def add_new_name_before_each_name(file_path, new_name):
    # Read the content of the file
    with open(file_path, 'r') as file:
        names = file.readlines()

    # Strip newline characters and whitespace from each name
    names = [name.strip() for name in names]

    # Add the new name before each existing name
    updated_names = [f"{new_name}{name}" for name in names]

    # Write the updated list back to the txt file
    with open(file_path, 'w') as file:
        file.write('\n'.join(updated_names))

# Replace 'names.txt' with the path to your txt file
file_path = 'data/test.txt'

# Replace 'train/' with the new name you want to add
new_name = '/home/rail/pegah/data/test/'

# Call the function to add the new name before each name in the list
add_new_name_before_each_name(file_path, new_name)


In [17]:
def remove_prefix_from_names(file_path, prefix_to_remove):
    # Read the content of the file
    with open(file_path, 'r') as file:
        names = file.readlines()

    # Strip newline characters and whitespace from each name
    names = [name.strip() for name in names]

    # Remove the prefix from each name
    updated_names = [name.replace(prefix_to_remove, "") for name in names]

    # Write the updated list back to the txt file
    with open(file_path, 'w') as file:
        file.write('\n'.join(updated_names))

# Replace 'train.txt' with the path to your txt file
file_path = 'data/test.txt'

# Replace 'train/' with the prefix you want to remove
prefix_to_remove = 'data/test/'

# Call the function to remove the prefix from each name in the list
remove_prefix_from_names(file_path, prefix_to_remove)


In [11]:
import re

def remove_column_numbers_from_names(file_path):
    # Read the content of the file
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Strip newline characters and whitespace from each line
    lines = [line.strip() for line in lines]

    # Remove the column numbers from before each name using regular expression
    updated_lines = [re.sub(r'^\s*\d+\s+', '', line) for line in lines]

    # Write the updated list back to the txt file
    with open(file_path, 'w') as file:
        file.write('\n'.join(updated_lines))

# Replace 'train.txt' with the path to your txt file
file_path = 'data/train.txt'

# Call the function to remove the column numbers from before each name in the list
remove_column_numbers_from_names(file_path)
