In [1]:
import numpy as np
from sklearn.model_selection import ShuffleSplit

In [2]:
# Generating dummy data
np.random.seed(42)

In [3]:
# Two predictor columns
predictor1 = np.random.rand(20)
predictor2 = np.random.rand(20)

In [4]:
# Binary target column
target = np.random.randint(2, size=20)

In [5]:
# Combine predictors into a feature matrix
X = np.column_stack((predictor1, predictor2))

In [6]:
X

array([[0.37454012, 0.61185289],
       [0.95071431, 0.13949386],
       [0.73199394, 0.29214465],
       [0.59865848, 0.36636184],
       [0.15601864, 0.45606998],
       [0.15599452, 0.78517596],
       [0.05808361, 0.19967378],
       [0.86617615, 0.51423444],
       [0.60111501, 0.59241457],
       [0.70807258, 0.04645041],
       [0.02058449, 0.60754485],
       [0.96990985, 0.17052412],
       [0.83244264, 0.06505159],
       [0.21233911, 0.94888554],
       [0.18182497, 0.96563203],
       [0.18340451, 0.80839735],
       [0.30424224, 0.30461377],
       [0.52475643, 0.09767211],
       [0.43194502, 0.68423303],
       [0.29122914, 0.44015249]])

In [7]:
# Specify the number of splits
n_splits = 3

In [8]:
# Specify the test set size
test_size = 0.2

In [9]:
# Perform Shuffle Split cross-validation
ss = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=42)

In [10]:
# Counter to track the split number
split_count = 0

In [11]:
# Print train-test splits for each iteration
for train_index, test_index in ss.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = target[train_index], target[test_index]

    print(f"Split {split_count + 1} - Train set: {len(X_train)} samples, Test set: {len(X_test)} samples")
    print(f"X_train: {X_train}, X_test: {X_test}")
    print(f"y_train: {y_train}, y_test: {y_test}")
    print("-" * 30)

    split_count += 1

Split 1 - Train set: 16 samples, Test set: 4 samples
X_train: [[0.60111501 0.59241457]
 [0.15599452 0.78517596]
 [0.96990985 0.17052412]
 [0.59865848 0.36636184]
 [0.43194502 0.68423303]
 [0.30424224 0.30461377]
 [0.21233911 0.94888554]
 [0.73199394 0.29214465]
 [0.70807258 0.04645041]
 [0.29122914 0.44015249]
 [0.15601864 0.45606998]
 [0.83244264 0.06505159]
 [0.86617615 0.51423444]
 [0.02058449 0.60754485]
 [0.18182497 0.96563203]
 [0.05808361 0.19967378]], X_test: [[0.37454012 0.61185289]
 [0.52475643 0.09767211]
 [0.18340451 0.80839735]
 [0.95071431 0.13949386]]
y_train: [1 0 1 1 1 1 1 0 1 0 0 1 0 1 1 1], y_test: [0 1 1 1]
------------------------------
Split 2 - Train set: 16 samples, Test set: 4 samples
X_train: [[0.15601864 0.45606998]
 [0.83244264 0.06505159]
 [0.18182497 0.96563203]
 [0.86617615 0.51423444]
 [0.59865848 0.36636184]
 [0.05808361 0.19967378]
 [0.73199394 0.29214465]
 [0.70807258 0.04645041]
 [0.21233911 0.94888554]
 [0.02058449 0.60754485]
 [0.43194502 0.6842330