In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
%cd drive/MyDrive/D5/phase2\ public
%ls

/content/drive/.shortcut-targets-by-id/1exPdvEPc0bY2bSmEUBsKg5fzhOH39uXh/D5/phase2 public
domain_digit_classification_features.npz  phase_2_code.ipynb  train_data.npz
load_features.py                          test_data.npz


In [3]:
import numpy as np
from typing import Tuple
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score


from sklearn.metrics import classification_report

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import random
import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pickle
import time
start_time = time.time()

In [4]:
seed = 57

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)

In [5]:
def load_data(file_name):
    data = np.load(file_name)
    features = data['feats']
    domains = data['domain_labels']
    digits = data['digits_labels']
    print(features.shape, domains.shape, digits.shape)
    return features, domains, digits

In [6]:
def count_domains_digits(domains, digits):
    my_count = np.zeros((5, 10))
    for i in range(5):
        for j in range(10):
            for z in range(domains.shape[0]):
                if (i == domains[z]) & (j == digits[z]):
                    my_count[i][j] += 1
    print([(f'dom{item}:', np.count_nonzero(domains == item)) for item in range(5)])
    # for i in range(5):
    #     print(f'dom{i}:', [(f'dig{item}', my_count[i][item]) for item in range(10)])

    return my_count


# train -- test

In [7]:
def generate_train_test_data(features, domains, digits, my_count, domains_no=5, classes_no=10, data_class_no=2000):


    # calculate the maximum amount of data per class, based on the minimum class count
    min_class_count = (np.amin(my_count))
    max_data_class_no = int((min_class_count / 6) * 5)

    # adjust data_class_no if it exceeds the maximum
    data_class_no = min(data_class_no, max_data_class_no)

    # calculate the number of data points for training and testing
    train_data_no = domains_no * classes_no * data_class_no
    test_data_no = domains_no * classes_no * int(data_class_no * 0.2)

    # initialize arrays for training and testing data and labels
    x_train = np.zeros((train_data_no, features.shape[1]))
    y_train_domain = np.zeros(train_data_no)
    y_train_digits = np.zeros(train_data_no)
    x_test = np.zeros((test_data_no, features.shape[1]))
    y_test_domain = np.zeros(test_data_no)
    y_test_digits = np.zeros(test_data_no)

    # iterate over domains, classes, and data points to populate train and test data arrays
    for domain_idx in range(domains_no):
        for class_idx in range(classes_no):
            # get indices of data points with the current domain and class label
            indices = np.where((domains == domain_idx) & (digits == class_idx))[0]

            # shuffle the indices and split into train and test
            np.random.shuffle(indices)
            train_indices = indices[:data_class_no]
            test_indices = indices[data_class_no:data_class_no+int(data_class_no*0.2)]

            # populate train and test data arrays with features and labels
            for i, idx in enumerate(train_indices):
                x_train[domain_idx * classes_no * data_class_no + class_idx * data_class_no + i] = features[idx]
                y_train_domain[domain_idx * classes_no * data_class_no + class_idx * data_class_no + i] = domain_idx
                y_train_digits[domain_idx * classes_no * data_class_no + class_idx * data_class_no + i] = class_idx

            for i, idx in enumerate(test_indices):
                x_test[domain_idx * classes_no * int(data_class_no*0.2) + class_idx * int(data_class_no*0.2) + i] = features[idx]
                y_test_domain[domain_idx * classes_no * int(data_class_no*0.2) + class_idx * int(data_class_no*0.2) + i] = domain_idx
                y_test_digits[domain_idx * classes_no * int(data_class_no*0.2) + class_idx * int(data_class_no*0.2) + i] = class_idx

    # return train and test data and labels
    return x_train, y_train_domain, y_train_digits, x_test, y_test_domain, y_test_digits, data_class_no


In [8]:
def display_confusion_matrix(y_true, y_pred, s='classifier'):
    # compute confusion matrix and accuracy score
    cm = confusion_matrix(y_true, y_pred)
    acc = accuracy_score(y_true, y_pred)
    print('accuracy_score', acc)

    # plot confusion matrix
    sns.set(rc={'figure.figsize':(5,2)})
    plt.subplots_adjust(top=1)
    ax = plt.subplot()
    sns.heatmap(cm, annot=True, fmt='g', ax=ax)
    ax.set_xlabel('Predicted labels')
    ax.set_ylabel('True labels')
    ax.set_title(f'Confusion Matrix - {s} ')
    plt.show()


In [9]:
def random_forest_classification(xdata, y_data, xdata_test, y_data_test, plot_CM=False):
    # initialize random forest classifier with max depth of 2
    # n_estimators = [5, 10, 15, 50, 100]
    # max_features = ['log2', 'sqrt']
    # max_depth = [5, 20, 50, None]
    # bootstrap = [True, False]
    clf = RandomForestClassifier(n_estimators=100, max_depth=50, random_state=seed,max_features='sqrt', bootstrap=True)
    clf.fit(xdata, y_data)

    # predict labels for test data
    y_pred = clf.predict(xdata_test)

    if(plot_CM):
        # compute and display confusion matrix
        display_confusion_matrix(y_data_test, y_pred)

    # compute and return accuracy score
    return accuracy_score(y_data_test, y_pred), y_pred

# spliting train dataset

In [10]:
def split_unpure_data_train(xdata, y_digits_data, scale, domain_data_no, domains_no):
    extra_len_dom = domain_data_no // scale
    extra_len = 4 * extra_len_dom
    class_extra_no = extra_len_dom // 10

    xdata_dom_list = []
    ydig_dom_list = []
    for i in range(domains_no):
        xdata_dom = np.zeros((domain_data_no + extra_len, 2048))
        ydig_dom = np.zeros((domain_data_no + extra_len))

        xdata_dom[:domain_data_no] = xdata[i*domain_data_no:(i+1)*domain_data_no]
        ydig_dom[:domain_data_no] = y_digits_data[i*domain_data_no:(i+1)*domain_data_no]

        pointer = domain_data_no
        for j in range(domains_no):
            if j != i:
                xdum, x_test, ydum, y_test  = train_test_split(
                    xdata[j*domain_data_no:(j+1)*domain_data_no],
                    y_digits_data[j*domain_data_no:(j+1)*domain_data_no],
                    random_state=seed, test_size=(1/scale))

                xdata_dom[pointer:pointer+extra_len_dom] = x_test
                ydig_dom[pointer:pointer+extra_len_dom] = y_test
                pointer += extra_len_dom

        xdata_dom_list.append(xdata_dom)
        ydig_dom_list.append(ydig_dom)

    return xdata_dom_list, ydig_dom_list

In [11]:
def split_pure_data_train(xdata, y_digits_data, domain_data_no, domains_no):
    xdata_dom_list = []
    ydig_dom_list = []

    for i in range(domains_no):
        start = i * domain_data_no
        end = start + domain_data_no

        xdata_dom_list.append(xdata[start:end])
        ydig_dom_list.append(y_digits_data[start:end])

    return xdata_dom_list, ydig_dom_list


# compuing test data set

In [12]:
def split_test_data_by_prediction(xdata_test, y_digits_data_test, y_pred):


    xdom1_test = []; ydom1_test = []
    xdom2_test = []; ydom2_test = []
    xdom3_test = []; ydom3_test = []
    xdom4_test = []; ydom4_test = []
    xdom5_test = []; ydom5_test = []
    for i, data_pred in enumerate(y_pred):

        if(data_pred == 0):
            xdom1_test.append(xdata_test[i])
            ydom1_test.append(y_digits_data_test[i])
        elif(data_pred == 1):
            xdom2_test.append(xdata_test[i])
            ydom2_test.append(y_digits_data_test[i])
        elif(data_pred == 2):
            xdom3_test.append(xdata_test[i])
            ydom3_test.append(y_digits_data_test[i])
        elif(data_pred == 3):
            xdom4_test.append(xdata_test[i])
            ydom4_test.append(y_digits_data_test[i])
        else:
            xdom5_test.append(xdata_test[i])
            ydom5_test.append(y_digits_data_test[i])

    xdom_test_list = [xdom1_test, xdom2_test, xdom3_test, xdom4_test, xdom5_test]
    ydom_test_list = [ydom1_test, ydom2_test, ydom3_test, ydom4_test, ydom5_test]
    xdom_test_np_list = [np.asarray(x, dtype=float) for x in xdom_test_list]
    ydom_test_np_list = [np.asarray(y, dtype=float) for y in ydom_test_list]

    return xdom_test_np_list, ydom_test_np_list

In [13]:
def split_data_by_domain(xdata, y_digits_data, data_test_class_no, classes_no=10):

    domain_data_test_no = classes_no * data_test_class_no
    xdata_test_dom1_p = xdata[0:domain_data_test_no]
    xdata_test_dom2_p = xdata[1*domain_data_test_no:2*domain_data_test_no]
    xdata_test_dom3_p = xdata[2*domain_data_test_no:3*domain_data_test_no]
    xdata_test_dom4_p = xdata[3*domain_data_test_no:4*domain_data_test_no]
    xdata_test_dom5_p = xdata[4*domain_data_test_no:5*domain_data_test_no]

    ydig_test_dom1_p = y_digits_data[0:domain_data_test_no]
    ydig_test_dom2_p = y_digits_data[1*domain_data_test_no:2*domain_data_test_no]
    ydig_test_dom3_p = y_digits_data[2*domain_data_test_no:3*domain_data_test_no]
    ydig_test_dom4_p = y_digits_data[3*domain_data_test_no:4*domain_data_test_no]
    ydig_test_dom5_p = y_digits_data[4*domain_data_test_no:5*domain_data_test_no]

    xdata_test_list_p =  [xdata_test_dom1_p, xdata_test_dom2_p, xdata_test_dom3_p, xdata_test_dom4_p, xdata_test_dom5_p]
    xdata_test_list_p = [ydig_test_dom1_p, ydig_test_dom2_p, ydig_test_dom3_p, ydig_test_dom4_p, ydig_test_dom5_p]
    return xdata_test_list_p, xdata_test_list_p

In [14]:
def run_random_forest(xdata, y_data, xdata_test, y_domain_data_test, bootstrap, n_estimators, max_features, max_depth):
    acc_list = []
    boot = bootstrap[0]
    for n_est in n_estimators:
        for max_feat in max_features:
            for max_dep in max_depth:
                clf = RandomForestClassifier(n_estimators=n_est, max_features=max_feat, max_depth=max_dep, bootstrap= boot)
                clf.fit(xdata, y_data)
                y_pred = clf.predict(xdata_test)
                acc = accuracy_score(y_domain_data_test,y_pred)
                cm = confusion_matrix(y_domain_data_test,y_pred)
                acc_list.append([n_est, max_feat, max_dep, acc, cm])
    return acc_list


In [15]:
def save_results(acc_list, file_name):
    %cd features/
    pkl_name = file_name +'.pkl'
    with open(pkl_name, 'wb') as f:
        pickle.dump(acc_list, f)
    return pkl_name


In [16]:
def load_results(pkl_name):
    %cd features/
    # pkl_name = file_name + ' --- ' + str(xdata.shape[0]) +  'domain c .pkl'
    with open(pkl_name, 'rb') as f:
        mynewlist = pickle.load(f)
    return mynewlist


# digit classifier on all domain using methods

In [17]:
def domain_to_digit_classifier(file_name, scale, data_class_no=500, domains_no = 5):
    acc_list = []
    param_list = [file_name, data_class_no, scale]###
    domain_data_no = data_class_no * domains_no

    features, domains, digits = load_data(file_name)
    my_count = count_domains_digits(domains, digits)

    x_train, y_train_domain, y_train_digits, x_test, y_test_domain, y_test_digits, data_class_no  = \
    generate_train_test_data(features, domains,
                            digits, my_count,
                            domains_no=domains_no,
                            classes_no=10,
                            data_class_no=data_class_no)

    acc_domain, y_pred = random_forest_classification(
                                    xdata=x_train,
                                    y_data= y_train_domain,
                                    xdata_test=x_test,
                                    y_data_test=y_test_domain,
                                    plot_CM=False)
    cm = confusion_matrix(y_test_domain, y_pred)
    acc_DomC_list = [x_train.shape[0], round(acc_domain, 2), cm]###

    xdom_test_np_list, ydom_test_np_list = split_test_data_by_prediction(
                                                xdata_test=x_test,
                                                y_digits_data_test=y_test_digits,
                                                y_pred=y_pred)


    acc_DigC_pure_list = []
    xdata_dom_list, ydig_dom_list= split_pure_data_train(xdata=x_train,
                                                        y_digits_data=y_train_digits,
                                                        domain_data_no=domain_data_no,
                                                        domains_no=domains_no)

    for i in range(len(xdata_dom_list)):
        acc_DC, y_pred_DC = random_forest_classification( #digit classifier trained by pure data
                                    xdata=xdata_dom_list[i],
                                    y_data= ydig_dom_list[i],
                                    xdata_test=xdom_test_np_list[i],
                                    y_data_test=ydom_test_np_list[i],
                                    plot_CM=False)
        cm = confusion_matrix(ydom_test_np_list[i], y_pred_DC)
        acc_DigC_pure_list.append([f'pure_DigC{i+1}', (xdata_dom_list[i]).shape[0], round(acc_DC, 2)])###

    # acc_list.append(acc_DigC_pure_list)
    acc_DigC_unpure_list = []
    xdata_dom_list, ydig_dom_list = split_unpure_data_train(xdata=x_train,
                                                    y_digits_data=y_train_digits,
                                                    scale=scale,
                                                    domain_data_no=domain_data_no,
                                                    domains_no=domains_no)

    for i in range(len(xdata_dom_list)):
        acc_DC, y_pred_DC = random_forest_classification( #digit classifier
                                    xdata=xdata_dom_list[i],
                                    y_data= ydig_dom_list[i],
                                    xdata_test=xdom_test_np_list[i],
                                    y_data_test=ydom_test_np_list[i],
                                    plot_CM=False)
        cm = confusion_matrix(ydom_test_np_list[i], y_pred_DC)
        acc_DigC_unpure_list.append([f'DigC{i+1}', (xdata_dom_list[i]).shape[0], round(acc_DC, 2)])###

    acc_list= [param_list, acc_DomC_list, [acc_DigC_pure_list, acc_DigC_unpure_list]]

    print(acc_list)
    return(acc_list)

In [18]:
# file_name = 'domain_digit_classification_features.npz'
# file_name = 'features.npz'
# n_estimators = [5, 10, 15, 50, 100]
# max_features = ['log2', 'sqrt']
# max_depth = [5, 20, 50, None]
# bootstrap = [True, False]
# data_class_no = 2000

# features, domains, digits = load_data(file_name)
# my_count = count_domains_digits(domains, digits)


# x_train, y_train_domain, y_train_digits, x_test, y_test_domain, y_test_digits, data_class_no  = \
# generate_train_test_data(features, domains,
#                          digits, my_count,
#                          domains_no=5,
#                          classes_no=10,
#                          data_class_no=data_class_no)

# acc_list =  run_random_forest(xdata=x_train,
#                                y_data=y_train_domain,
#                                xdata_test=x_test,
#                                y_domain_data_test=y_test_domain,
#                                bootstrap=bootstrap,
#                                n_estimators=n_estimators,
#                                max_features=max_features,
#                                max_depth=max_depth)
# result_file_name = file_name + '---' + str(x_train.shape[0]) + '---domain'
# pkl_name = save_results(acc_list, result_file_name)
# mynewlist = load_results(pkl_name)


In [19]:
def domain_to_digit_classifier_2(file_name, scale, data_class_no=500, domains_no = 5, isEqual=False, time_show=False):
    if(time_show):
        start_time =time.time()
    acc_list = []

    features, domains, digits = load_data(file_name)
    my_count = count_domains_digits(domains, digits)
    min_class_count = (np.amin(my_count))
    n_unpure = data_class_no
    n_pure = data_class_no

    if(isEqual):
        max_data_no_confus = int (data_class_no + 4*data_class_no/scale)
        if(min_class_count < max_data_no_confus):
            n_unpure = data_class_no=int((min_class_count*scale)/((domains_no-1)+scale))
            n_pure = min_class_count
            data_class_no=n_pure

    # ####### tempoary_break
    print(data_class_no)
    # return(data_class_no)
    # #######
    x_train, y_train_domain, y_train_digits, x_test, y_test_domain, y_test_digits, data_class_no  = \
    generate_train_test_data(features, domains,
                            digits, my_count,
                            domains_no=domains_no,
                            classes_no=10,
                            data_class_no=data_class_no)

    param_list = [file_name, data_class_no, scale]############
    domain_data_no = data_class_no * domains_no

    if(time_show):
        print("---seconds ", round((time.time() - start_time), 2),  "--- train-test data generated")
        start_time = time.time()
    # acc_dig_all, y_pred = random_forest_classification(
    #                                 xdata=x_train,
    #                                 y_data= y_train_digits,
    #                                 xdata_test=x_test,
    #                                 y_data_test=y_test_digits,
    #                                 plot_CM=False)
    # cm = confusion_matrix(y_test_digits, y_pred)
    # acc_DigC_all_list = [x_train.shape[0], round(acc_dig_all, 2), cm]############

    if(time_show):
        print("---seconds ", round((time.time() - start_time), 2),  "--- digit classifier on all datas finished")
        start_time = time.time()

    acc_domain, y_pred = random_forest_classification(
                                    xdata=x_train,
                                    y_data= y_train_domain,
                                    xdata_test=x_test,
                                    y_data_test=y_test_domain,
                                    plot_CM=False)
    cm = confusion_matrix(y_test_domain, y_pred)
    acc_DomC_list = [x_train.shape[0], round(acc_domain, 2), cm]############
    if(time_show):
        print("---seconds ", round((time.time() - start_time), 2),  "--- domain classifier on all datas finished")
        start_time = time.time()
    xdom_test_np_list, ydom_test_np_list = split_test_data_by_prediction(
                                                xdata_test=x_test,
                                                y_digits_data_test=y_test_digits,
                                                y_pred=y_pred)


    acc_DigC_pure_list = []
    xdata_dom_list, ydig_dom_list= split_pure_data_train(xdata=x_train,
                                                        y_digits_data=y_train_digits,
                                                        domain_data_no=domain_data_no,
                                                        domains_no=domains_no)

    for i in range(len(xdata_dom_list)):
        acc_DC, y_pred_DC = random_forest_classification( #digit classifier trained by pure data
                                    xdata=xdata_dom_list[i],
                                    y_data= ydig_dom_list[i],
                                    xdata_test=xdom_test_np_list[i],
                                    y_data_test=ydom_test_np_list[i],
                                    plot_CM=False)
        cm = confusion_matrix(ydom_test_np_list[i], y_pred_DC)
        acc_DigC_pure_list.append([f'pure_DigC{i+1}', (xdata_dom_list[i]).shape[0], round(acc_DC, 2)])############

    if(time_show):
        print("---seconds ", round((time.time() - start_time), 2),  "--- digit classifing trained by pure data  finished")
        start_time = time.time()

    # acc_list.append(acc_DigC_pure_list)
    acc_DigC_unpure_list = []
    if(isEqual):
        data_class_no = data_class_no
    xdata_dom_list, ydig_dom_list = split_unpure_data_train(xdata=x_train,
                                                    y_digits_data=y_train_digits,
                                                    scale=scale,
                                                    domain_data_no=domain_data_no,
                                                    domains_no=domains_no)

    for i in range(len(xdata_dom_list)):
        acc_DC, y_pred_DC = random_forest_classification( #digit classifier
                                    xdata=xdata_dom_list[i],
                                    y_data= ydig_dom_list[i],
                                    xdata_test=xdom_test_np_list[i],
                                    y_data_test=ydom_test_np_list[i],
                                    plot_CM=False)
        cm = confusion_matrix(ydom_test_np_list[i], y_pred_DC)
        acc_DigC_unpure_list.append([f'DigC{i+1}', (xdata_dom_list[i]).shape[0], round(acc_DC, 2)])###
    if(time_show):
        print("---seconds ", round((time.time() - start_time), 2),  "--- digit classifing trained by unpure data finished")
        start_time = time.time()
    acc_list= [param_list, acc_DigC_all_list, acc_DomC_list,
               [acc_DigC_pure_list, acc_DigC_unpure_list]]

    print(acc_list)
    return(acc_list)

In [20]:
def show_result(acc_list):
      print('--param_list--',
            '\nfile_name:', acc_list[0][0],
            '\ndata_class_no:', acc_list[0][1],
            '\nscale:', acc_list[0][2])
      print('--acc_DigC_all_list--', '\nx_train.shape[0]:', acc_list[1][0], ' ---- acc_dig_all:', acc_list[1][1])
      print('--acc_DomC_list--', '\nx_train.shape[0]:', acc_list[2][0], ' ---- acc_dom_all:', acc_list[2][1])
      print('-- acc_DigC_pure_list --')
      for res in acc_list[3][0]:
            print( 'x_train.shape[0]:', res[1], ' ---- acc_DC:', res[2])
      print('--acc_DigC_unpure_list--')
      for res in acc_list[3][1]:
            print( 'x_train.shape[0]:', res[1], ' ---- acc_DC:', res[2])


# calling methods

In [36]:
file_name = 'domain_digit_classification_features.npz'
# file_name = 'features.npz'
classes_no = 10
scale = 5

data_class_no = 1700
domains_no = 5

acc_list = domain_to_digit_classifier_2(file_name, scale, data_class_no=data_class_no, domains_no = 5,isEqual=False,time_show=True)

(50000, 1024) (50000,) (50000,)
[('dom0:', 10000), ('dom1:', 10000), ('dom2:', 10000), ('dom3:', 10000), ('dom4:', 10000)]
1700
---seconds  1.46 --- train-test data generated
---seconds  0.0 --- digit classifier on all datas finished
---seconds  195.9 --- domain classifier on all datas finished
---seconds  31.14 --- digit classifing trained by pure data  finished


ValueError: ignored

In [None]:
# print(acc_list)
show_result(acc_list)

--param_list-- 
file_name: domain_digit_classification_features.npz 
data_class_no: 1266 
scale: 5
--acc_DigC_all_list-- 
x_train.shape[0]: 63300  ---- acc_dig_all: 0.93
--acc_DomC_list-- 
x_train.shape[0]: 63300  ---- acc_dom_all: 0.95
-- acc_DigC_pure_list --
x_train.shape[0]: 6330  ---- acc_DC: 0.5
x_train.shape[0]: 6330  ---- acc_DC: 0.43
x_train.shape[0]: 6330  ---- acc_DC: 0.44
x_train.shape[0]: 6330  ---- acc_DC: 0.45
x_train.shape[0]: 6330  ---- acc_DC: 0.5
--acc_DigC_unpure_list--
x_train.shape[0]: 11394  ---- acc_DC: 0.97
x_train.shape[0]: 11394  ---- acc_DC: 0.89
x_train.shape[0]: 11394  ---- acc_DC: 0.61
x_train.shape[0]: 11394  ---- acc_DC: 0.87
x_train.shape[0]: 11394  ---- acc_DC: 0.97


In [None]:
file_name = 'domain_digit_classification_features.npz --- 95000.pkl'
mynewlist = load_results(file_name)
sorted_list = sorted(mynewlist, key=lambda x: x[3], reverse=True)
sorted_list
random_forest_classification(xdata, y_data, xdata_test, y_data_test, plot_CM=False)

[Errno 2] No such file or directory: 'features/'
/media/storage/senpai/university/term8/TA/CAI/phase2/features


[[100,
  'sqrt',
  50,
  0.9316842105263158,
  array([[1820,   13,   12,    7,   10,    1,   12,    4,    9,   12],
         [  11, 1781,   15,   15,   31,    2,    3,   30,   11,    1],
         [   9,   20, 1807,   13,   12,    2,    1,   12,   15,    9],
         [   6,   15,   24, 1747,    5,   39,    7,    7,   37,   13],
         [   4,   38,    5,    8, 1783,    2,   10,    6,    7,   37],
         [   2,   10,    5,   56,   12, 1751,   19,    4,   22,   19],
         [  22,   12,    3,   12,   21,   27, 1770,    2,   27,    4],
         [   8,   37,   17,    8,   13,    4,    1, 1790,    2,   20],
         [  17,   28,   14,   24,   21,   17,   28,    7, 1713,   31],
         [  27,    5,   11,   22,   35,   13,    3,   20,   24, 1740]])],
 [100,
  'sqrt',
  None,
  0.9316315789473684,
  array([[1819,   15,    8,    3,   11,    3,   11,    6,   14,   10],
         [  14, 1780,   16,   16,   24,    1,    3,   31,   11,    4],
         [  10,   15, 1806,   12,   14,    2,    1,  