In [2]:
import numpy as np
import pandas as pd
import h5py

Please download the .h5 dataset and decompress:  
[https://zenodo.org/records/6371680](https://zenodo.org/records/6371680)
```bash
xz -dv /path/to/dataset.tar.xz
```

You may also need to install h5py
```bash
sudo apt update -y && sudo apt install python3-h5py
```

## Load data

In [3]:
dataset_path = "/workspace/dataset.h5" #"path/to/dataset.h5"

In [4]:
df_meta = pd.read_hdf(dataset_path, "meta")

In [5]:
df_meta_classif = df_meta[df_meta["Task"] == "Classification"] # Only want classification models
df_meta_classif = df_meta_classif.copy() # Make not a slice
del df_meta # Remove from mem

In [6]:
num_model_samples = 500
# Pick random models from dataset
rand_models_ind = np.random.choice(len(df_meta_classif), num_model_samples, replace=False)
df_meta_sample = df_meta_classif.copy().iloc[rand_models_ind]
del df_meta_classif # remove from mem

In [7]:
df_meta_sample["filter_ids"] = df_meta_sample["filter_ids"].apply(lambda s: list(np.arange(int(s.split(":")[0]), 1 + int(s.split(":")[1]))))
df_meta_sample

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,layer_depth,filter_ids,model,path,producer,op_set,Transpose,Conv,"(3, 3) filters",Clip,...,Accessible,Dataset URL,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,total_filters,3x3_filter_share
model_id,conv_depth,conv_depth_norm,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
294,20,0.833333,58,"[998059162, 998059163, 998059164, 998059165, 9...",robustbench_wu2020adversarial_extra_linf_cifar...,/data/onnx_zoo/robustbench/robustbench_wu2020a...,pytorch 1.9,11,,28.0,4021808.0,,...,https://github.com/RobustBench/robustbench,,,,,,,,4021808.0,1.000000
57,104,0.654088,317,"[40282954, 40282955, 40282956, 40282957, 40282...",hso_lowres_densenet161_fashionmnist_11,/data/onnx_zoo/hso/hso_lowres_densenet161_fash...,pytorch 1.9,11,,160.0,718944.0,,...,https://github.com/paulgavrikov/pytorch-pretra...,,,,,,,,718944.0,1.000000
403,35,0.777778,54,"[1143012822, 1143012823, 1143012824, 114301282...",timm_hrnet_w18_small_imagenet_11,/data/onnx_zoo/timm_2/timm_hrnet_w18_small_ima...,pytorch 1.10,11,,91.0,934592.0,,...,https://rwightman.github.io/pytorch-image-models/,,,,,,,,934592.0,1.000000
91,5,0.312500,13,"[73806698, 73806699, 73806700, 73806701, 73806...",hso_lowres_resnet18_fashionmnist_11,/data/onnx_zoo/hso/hso_lowres_resnet18_fashion...,pytorch 1.9,11,,20.0,1220672.0,,...,https://github.com/paulgavrikov/pytorch-pretra...,,,,,,,,1220672.0,1.000000
36,50,0.420168,155,"[29335914, 29335915, 29335916, 29335917, 29335...",hso_lowres_densenet121_cifar10_11,/data/onnx_zoo/hso/hso_lowres_densenet121_cifa...,pytorch 1.9,11,,120.0,237760.0,,...,https://github.com/paulgavrikov/pytorch-pretra...,,,,,,,,237760.0,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,137,0.861635,418,"[41485290, 41485291, 41485292, 41485293, 41485...",hso_lowres_densenet161_kmnist_11,/data/onnx_zoo/hso/hso_lowres_densenet161_kmni...,pytorch 1.9,11,,160.0,718944.0,,...,https://github.com/paulgavrikov/pytorch-pretra...,,,,,,,,718944.0,1.000000
631,95,0.633333,222,"[1409699152, 1409699153, 1409699154, 140969915...",resnet152_imagenet_11,/data/onnx_zoo/torchvision_official/resnet152_...,pytorch 1.7,11,,155.0,3289088.0,,...,https://pytorch.org/docs/stable/model_zoo.html...,,,,,,,,3289280.0,0.999942
234,8,0.333333,24,"[423170186, 423170187, 423170188, 423170189, 4...",robustbench_hendrycks2019using_linf_cifar10_11,/data/onnx_zoo/robustbench/robustbench_hendryc...,pytorch 1.9,11,,28.0,4021808.0,,...,https://github.com/RobustBench/robustbench,,,,,,,,4021808.0,1.000000
482,65,0.541667,195,"[1268530726, 1268530727, 1268530728, 126853072...",timm_res2net50_26w_8s_imagenet_11,/data/onnx_zoo/timm_2/timm_res2net50_26w_8s_im...,pytorch 1.10,11,,149.0,1452724.0,,...,https://rwightman.github.io/pytorch-image-models/,,,,,,,,1452916.0,0.999868


In [8]:
# Get dX_inds from df_meta_classif
dX_inds = np.concatenate([f for f in df_meta_sample["filter_ids"]])
dX_inds

array([ 998059162,  998059163,  998059164, ..., 1203877029, 1203877030,
       1203877031])

In [9]:
# Sort sample ind (MUST for h5)
dX_inds.sort()

In [10]:
with h5py.File(dataset_path, "r") as f:
    dX = f["filters"][dX_inds].reshape(-1, 9).astype(np.float16)

In [11]:
np.save('sampled_cnn_models.npy', dX)