# Code

In [2]:
import numpy as np
import pandas as pd

In [3]:
class DataSplitter:

    def split(self, data, train_ratio):
        """
        Splits the data into training and testing sets, supporting lists, numpy arrays, and pandas DataFrames.

        Parameters:
            data (list, np.ndarray, pd.DataFrame): The input data to be split.
            train_ratio (float): The proportion of data for training.

        Returns:
            tuple: (train_data, test_data) - the same type as input.
        """
        if not (0 <= train_ratio <= 1):
            raise ValueError("train_ratio must be between 0 and 1.")
        
        if isinstance(data, list):
            return self.__split_list(data, train_ratio)
        elif isinstance(data, np.ndarray):
            return self.__split_numpy(data, train_ratio)
        elif isinstance(data, pd.DataFrame):
            return self.__split_dataframe(data, train_ratio)
        else:
            raise TypeError(f"Unsupported data type: {type(data)}")

    def _shuffle_and_split(self, data_size, train_ratio):
        shuffled_indices = np.random.permutation(data_size)
        split_index = int(data_size * train_ratio)
        return shuffled_indices[:split_index], shuffled_indices[split_index:]

    def __split_list(self, data, train_ratio):
        train_indices, test_indices = self._shuffle_and_split(len(data), train_ratio)
        return [data[i] for i in train_indices], [data[i] for i in test_indices]

    def __split_numpy(self, data, train_ratio):
        train_indices, test_indices = self._shuffle_and_split(len(data), train_ratio)
        return data[train_indices], data[test_indices]

    def __split_dataframe(self, data, train_ratio):
        train_indices, test_indices = self._shuffle_and_split(len(data), train_ratio)
        return data.iloc[train_indices], data.iloc[test_indices]

# Testing

In [5]:
#list
data = np.array( ["A","B","C","D","E","F","G","H","I","J","K"] )
train_ratio = 0.8

splitter = DataSplitter()
train_set , test_set = splitter.split(data,train_ratio)

print(f"Test set: {train_set}")
print(f"Train set: {test_set}")

Test set: ['D' 'K' 'C' 'I' 'B' 'F' 'G' 'A']
Train set: ['H' 'J' 'E']


In [6]:
#numpy
data = np.array( ["A","B","C","D","E","F","G","H","I","J","K"] )
train_ratio = 0.8

splitter = DataSplitter()
train_set , test_set = splitter.split(data,train_ratio)

print(f"Test set: {train_set}")
print(f"Train set: {test_set}")

Test set: ['H' 'G' 'D' 'F' 'B' 'K' 'C' 'I']
Train set: ['J' 'A' 'E']


In [7]:
#dataset
data = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"]
df = pd.DataFrame(data, columns=["Values"])
train_ratio = 0.8

splitter = DataSplitter()
train_set , test_set = splitter.split(df,train_ratio)

print(f"Test set: {train_set}")
print(f"Train set: {test_set}")

Test set:    Values
4       E
2       C
3       D
1       B
7       H
10      K
9       J
0       A
Train set:   Values
8      I
5      F
6      G
