### The notebook builds a sample JSON from the original dataset

In [9]:
import numpy as np
import pandas as pd
import json
from sklearn.datasets import load_breast_cancer

In [10]:
# prepare the dataset for test

# we take the dataset from Sklearn
data = load_breast_cancer(as_frame=True)

# I prefer working with Dataframe
orig_df = data.frame

# we must rename columns, to remove spaces in names
# otherwise we get problems with ONNX

# substitute all spaces with _
dict_columns = {}

for col in orig_df.columns:
    dict_columns[col] = col.replace(" ", "_")

orig_df = orig_df.rename(columns=dict_columns)

In [11]:
orig_df.head(10)

Unnamed: 0,mean_radius,mean_texture,mean_perimeter,mean_area,mean_smoothness,mean_compactness,mean_concavity,mean_concave_points,mean_symmetry,mean_fractal_dimension,...,worst_texture,worst_perimeter,worst_area,worst_smoothness,worst_compactness,worst_concavity,worst_concave_points,worst_symmetry,worst_fractal_dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0
5,12.45,15.7,82.57,477.1,0.1278,0.17,0.1578,0.08089,0.2087,0.07613,...,23.75,103.4,741.6,0.1791,0.5249,0.5355,0.1741,0.3985,0.1244,0
6,18.25,19.98,119.6,1040.0,0.09463,0.109,0.1127,0.074,0.1794,0.05742,...,27.66,153.2,1606.0,0.1442,0.2576,0.3784,0.1932,0.3063,0.08368,0
7,13.71,20.83,90.2,577.9,0.1189,0.1645,0.09366,0.05985,0.2196,0.07451,...,28.14,110.6,897.0,0.1654,0.3682,0.2678,0.1556,0.3196,0.1151,0
8,13.0,21.82,87.5,519.8,0.1273,0.1932,0.1859,0.09353,0.235,0.07389,...,30.73,106.2,739.3,0.1703,0.5401,0.539,0.206,0.4378,0.1072,0
9,12.46,24.04,83.97,475.9,0.1186,0.2396,0.2273,0.08543,0.203,0.08243,...,40.68,97.65,711.4,0.1853,1.058,1.105,0.221,0.4366,0.2075,0


In [12]:
# shuffle the data
orig_df = orig_df.sample(frac = 1.0)

In [17]:
BATCH_SIZE = 16

sample_df = orig_df.iloc[:BATCH_SIZE]

In [24]:
# build a dict from the sample_df dataframe

input_dict = {}

for col in sample_df.columns:
    # remove the target column
    if col != 'target':
        values = sample_df[col].values.tolist()
        input_dict[col] = values

In [25]:
input_dict

{'mean_radius': [25.22,
  10.48,
  18.81,
  12.63,
  14.34,
  12.58,
  10.48,
  12.72,
  11.14,
  19.21,
  13.74,
  17.02,
  14.25,
  12.68,
  12.89,
  14.59],
 'mean_texture': [24.91,
  14.98,
  19.98,
  20.76,
  13.47,
  18.4,
  19.86,
  17.67,
  14.07,
  18.57,
  17.91,
  23.98,
  21.72,
  23.84,
  13.12,
  22.68],
 'mean_perimeter': [171.5,
  67.49,
  120.9,
  82.15,
  92.51,
  79.83,
  66.72,
  80.98,
  71.24,
  125.5,
  88.12,
  112.8,
  93.63,
  82.69,
  81.89,
  96.39],
 'mean_area': [1878.0,
  333.6,
  1102.0,
  480.4,
  641.2,
  489.0,
  337.7,
  501.3,
  384.6,
  1152.0,
  585.0,
  899.3,
  633.0,
  499.0,
  515.9,
  657.1],
 'mean_smoothness': [0.1063,
  0.09816,
  0.08923,
  0.09933,
  0.09906,
  0.08393,
  0.107,
  0.07896,
  0.07274,
  0.1053,
  0.07944,
  0.1197,
  0.09823,
  0.1122,
  0.06955,
  0.08473],
 'mean_compactness': [0.2665,
  0.1013,
  0.05884,
  0.1209,
  0.07624,
  0.04216,
  0.05971,
  0.04522,
  0.06064,
  0.1267,
  0.06376,
  0.1496,
  0.1098,
  0.1262,

In [26]:
json.dump(input_dict, open( "sample1.json", 'w' ) )