In [1]:
# import libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

import numpy as np

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline

matplotlib_inline.backend_inline.set_matplotlib_formats("svg")

In [2]:
# a function that creates data


def createSomeData(nPerClust):

    A = [1, 1]
    B = [5, 1]
    C = [4, 4]

    # generate data
    a = [A[0] + np.random.randn(nPerClust), A[1] + np.random.randn(nPerClust)]
    b = [B[0] + np.random.randn(nPerClust), B[1] + np.random.randn(nPerClust)]
    c = [C[0] + np.random.randn(nPerClust), C[1] + np.random.randn(nPerClust)]

    # true labels
    labels_np = np.hstack(
        (np.zeros((nPerClust)), np.ones((nPerClust)), 1 + np.ones((nPerClust)))
    )

    # concatanate into a matrix, then convert to a pytorch tensor
    data_np = np.hstack((a, b, c)).T

    # NEW: put all outputs into a dictionary
    output = {}
    output["data"] = torch.tensor(data_np).float()
    output["labels"] = torch.tensor(
        labels_np
    ).long()  # note: "long" format (integers) for labels

    # use scikitlearn to split the data
    train_data, test_data, train_labels, test_labels = train_test_split(
        output["data"], output["labels"], train_size=0.9
    )

    # then convert them into PyTorch Datasets (note: already converted to tensors)
    train_data = TensorDataset(train_data, train_labels)
    test_data = TensorDataset(test_data, test_labels)

    # finally, translate into dataloader objects
    batchsize = 8
    output["train_data"] = DataLoader(
        train_data, batch_size=batchsize, shuffle=True, drop_last=True
    )
    output["test_data"] = DataLoader(
        test_data, batch_size=test_data.tensors[0].shape[0]
    )

    return output