In [1]:
import os
import shutil
import random

In [2]:
# Directory paths
augmented_videos_path = "augmented-videos"  # Augmented videos directory
split_videos_path = "split-videos"  # Directory to save split data (train and test folders)


In [3]:


# Function to split data into train and test sets
def split_data(augmented_videos_path, split_videos_path, train_ratio=0.8):
    train_dir = os.path.join(split_videos_path, 'train')
    test_dir = os.path.join(split_videos_path, 'test')

    # Ensure train and test directories exist
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)

    # Iterate through each class folder
    for class_dir in os.listdir(augmented_videos_path):
        class_path = os.path.join(augmented_videos_path, class_dir)
        
        if not os.path.isdir(class_path):
            continue

        # Create corresponding directories in train and test sets
        train_class_dir = os.path.join(train_dir, class_dir)
        test_class_dir = os.path.join(test_dir, class_dir)

        if not os.path.exists(train_class_dir):
            os.makedirs(train_class_dir)
        if not os.path.exists(test_class_dir):
            os.makedirs(test_class_dir)

        # Get all videos in the class folder
        videos = os.listdir(class_path)

        # Shuffle videos to randomize the split
        random.shuffle(videos)

        # Determine the split index
        split_index = int(train_ratio * len(videos))

        # Assign videos to train and test sets
        train_videos = videos[:split_index]
        test_videos = videos[split_index:]

        # Copy videos to the train set
        for video in train_videos:
            src_path = os.path.join(class_path, video)
            dest_path = os.path.join(train_class_dir, video)
            shutil.copy(src_path, dest_path)

        # Copy videos to the test set
        for video in test_videos:
            src_path = os.path.join(class_path, video)
            dest_path = os.path.join(test_class_dir, video)
            shutil.copy(src_path, dest_path)

        print(f"Class '{class_dir}': {len(train_videos)} training videos, {len(test_videos)} testing videos.")



In [4]:
# Call the function to split data
split_data(augmented_videos_path, split_videos_path, train_ratio=0.8)


Class 'Amilo': 112 training videos, 28 testing videos.
Class 'Baisakh': 112 training videos, 28 testing videos.
Class 'Falful': 112 training videos, 28 testing videos.
Class 'Ghar': 112 training videos, 28 testing videos.
Class 'Gundruk': 112 training videos, 28 testing videos.
Class 'Guy': 112 training videos, 28 testing videos.
Class 'Hariyo': 112 training videos, 28 testing videos.
Class 'Hathi': 112 training videos, 28 testing videos.
Class 'Jestha': 112 training videos, 28 testing videos.
Class 'Kera': 112 training videos, 28 testing videos.
Class 'Khairo': 112 training videos, 28 testing videos.
Class 'Kharayo': 112 training videos, 28 testing videos.
Class 'Mahina': 112 training videos, 28 testing videos.
Class 'Naadi': 112 training videos, 28 testing videos.
Class 'Naang': 106 training videos, 27 testing videos.
Class 'Nariwal': 112 training videos, 28 testing videos.
Class 'Poush': 112 training videos, 28 testing videos.
Class 'Saag': 112 training videos, 28 testing videos.
Cl

In [1]:
import pandas as pd

In [2]:

# Paths to your train and test CSV files
train_csv_path = "train_data.csv"
test_csv_path = "test_data.csv"

# Function to shuffle CSV data and save it
def shuffle_csv(input_csv_path, output_csv_path):
    # Load the CSV file
    df = pd.read_csv(input_csv_path)
    
    # Shuffle the DataFrame
    shuffled_df = df.sample(frac=1).reset_index(drop=True)
    
    # Save the shuffled DataFrame back to CSV
    shuffled_df.to_csv(output_csv_path, index=False)
    print(f"Shuffled CSV saved to {output_csv_path}")




In [3]:
# Shuffle train and test CSV files
shuffle_csv(train_csv_path, train_csv_path)  # Overwrite original train CSV
shuffle_csv(test_csv_path, test_csv_path)    # Overwrite original test CSV

Shuffled CSV saved to train_data.csv
Shuffled CSV saved to test_data.csv
