In [1]:
import torchvision
import torch
import math
import numpy as np
from sklearn.model_selection import train_test_split
import os

### Load Semeion Dataset

In [8]:
def load_dataset():
    semeion_data = torchvision.datasets.SEMEION(root='../data', download=True)
    dataset_name = 'semeion'
    data, label = semeion_data.data, semeion_data.labels
    #data = data/255*math.pi/8 # pi/2
    #data = data/255*math.pi/4
    data = data/255*math.pi/4
    n_qubits = 8
    n_class = len(np.unique(label))
    
    data = data.reshape(-1,data.shape[1]*data.shape[2])
    data = torch.from_numpy(data)
    label = torch.from_numpy(label)

    return data, label

### Create permutation

In [13]:
def create_permutation(seed, data):
    torch.manual_seed(seed)
    perm_indices = torch.randperm(data.shape[1])
    permuted_data = data[:, perm_indices]

    return perm_indices, permuted_data

### Create train test partition & train dataloader

In [17]:
def create_train_test_partition(data, label, test_size, random_seed):
    data_tr, data_te, label_tr, label_te = train_test_split(
        data, label, test_size=test_size, stratify=label, random_state=random_seed
        )   
    return data_tr, data_te, label_tr, label_te


## Create and Save Data Exps

In [44]:
def create_n_save_data(save_path, permutation_seed=42, test_size=0.2, partition_seed=42):
    # Load Dataset
    data, label = load_dataset()
    
    # Create permutated dataset
    perm_indices, permuted_data = create_permutation(permutation_seed, data)
    
    # Create train test partition
    data_tr, data_te, label_tr, label_te = create_train_test_partition(
        permuted_data, 
        label, test_size=test_size, 
        random_seed=partition_seed)

    # Save data tensors into .pt file
    data_tensors_path = save_path+"/"+"data_tensors.pt"
    torch.save({
        "data_tr": data_tr,
        "label_tr": label_tr,
        "data_te": data_te,
        "label_te": label_te,
    }, data_tensors_path)

    
    params = {
        "permutation_seed": permutation_seed,
        "test_size": test_size,
        "partition_seed": partition_seed,
        "n_class": len(np.unique(label_te)),
        "data_tensors": data_tensors_path
    }

    data_config_path = save_path+"/"+"data_config.json"
    with open(data_config_path, "w") as f:
        json.dump(params, f, indent=4)

    

    print(f"Data settings files saved in {save_path}")

In [45]:
setting = "Setting_1"
root = "../exps"
path = os.path.join(root, setting)
os.makedirs(path, exist_ok=True)

create_n_save_data(path)

Files already downloaded and verified
Data settings files saved in ../exps/Setting_1
