Write a Python function to generate random subsets of a given dataset. The function should take in a 2D numpy array X, a 1D numpy array y, an integer n_subsets, and a boolean replacements. It should return a list of n_subsets random subsets of the dataset, where each subset is a tuple of (X_subset, y_subset). If replacements is True, the subsets should be created with replacements; otherwise, without replacements.

Example:
Input:
X = np.array([[1, 2],
                  [3, 4],
                  [5, 6],
                  [7, 8],
                  [9, 10]])
    y = np.array([1, 2, 3, 4, 5])
    n_subsets = 3
    replacements = False
    get_random_subsets(X, y, n_subsets, replacements)
Output:
[array([[7, 8],
            [1, 2]]), 
     array([4, 1])]
     
    [array([[9, 10],
            [5, 6]]), 
     array([5, 3])]
     
    [array([[3, 4],
            [5, 6]]), 
     array([2, 3])]
Reasoning:
The function generates three random subsets of the dataset without replacements. Each subset includes 50% of the samples (since replacements=False). The samples are randomly selected without duplication.



In [1]:
import numpy as np

def get_random_subsets(X: np.ndarray, y: np.ndarray, n_subsets: int, replacements: bool) -> list:
    """
    Generate random subsets of the dataset.

    Args:
    X (np.ndarray): 2D array representing feature matrix.
    y (np.ndarray): 1D array representing labels.
    n_subsets (int): Number of subsets to generate.
    replacements (bool): Whether to sample with replacement.

    Returns:
    list: A list containing n_subsets tuples, each containing (X_subset, y_subset).
    """
    n_samples = X.shape[0]
    subset_size = n_samples // 2  # Each subset contains 50% of the samples

    subsets = []
    for _ in range(n_subsets):
        indices = np.random.choice(n_samples, subset_size, replace=replacements)
        X_subset = X[indices]
        y_subset = y[indices]
        subsets.append((X_subset, y_subset))

    return subsets

# Example Usage
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
y = np.array([1, 2, 3, 4, 5])
n_subsets = 3
replacements = False

random_subsets = get_random_subsets(X, y, n_subsets, replacements)
for subset in random_subsets:
    print(subset)

(array([[5, 6],
       [7, 8]]), array([3, 4]))
(array([[7, 8],
       [5, 6]]), array([4, 3]))
(array([[1, 2],
       [5, 6]]), array([1, 3]))


In [None]:
import random

def get_random_subsets(X: list, y: list, n_subsets: int, replacements: bool) -> list:
    """
    Generate random subsets of the dataset.

    Args:
    X (list): 2D list representing feature matrix.
    y (list): 1D list representing labels.
    n_subsets (int): Number of subsets to generate.
    replacements (bool): Whether to sample with replacement.

    Returns:
    list: A list containing n_subsets tuples, each containing (X_subset, y_subset).
    """
    n_samples = len(X)
    subset_size = n_samples // 2  # Each subset contains 50% of the samples

    subsets = []
    for _ in range(n_subsets):
        indices = random.choices(range(n_samples), k=subset_size) if replacements \
                  else random.sample(range(n_samples), k=subset_size)
        X_subset = [X[i] for i in indices]
        y_subset = [y[i] for i in indices]
        subsets.append((X_subset, y_subset))

    return subsets

# Example Usage
X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
y = [1, 2, 3, 4, 5]
n_subsets = 3
replacements = False

random_subsets = get_random_subsets(X, y, n_subsets, replacements)
for subset in random_subsets:
    print(subset)

In [None]:
import numpy as np

def get_random_subsets(X, y, n_subsets, replacements=True, seed=42):
    np.random.seed(seed)

    n, m = X.shape

    subset_size = n if replacements else n // 2
    idx = np.array([np.random.choice(n, subset_size, replace=replacements) for _ in range(n_subsets)])
    # convert all ndarrays to lists
    return [(X[idx][i].tolist(), y[idx][i].tolist()) for i in range(n_subsets)]
