In [4]:
import csv
import numpy as np
import time
import random

In [9]:
filename='featuresKrK'
data_file_name=filename+".csv" # diversity selection filename

 
diverse_ratio=0.8 # training set ratio, 
remaining_ratio=1-diverse_ratio # test set ratio

In [10]:
with open(data_file_name) as f:
        data_file = csv.reader(f)
        temp = next(data_file)
        n_samples = int(temp[0])
        N_features = int(temp[1])
        data = np.empty((n_samples, N_features))
        temp = next(data_file)
        feature_names = np.array(temp)
        
        for i, d in enumerate(data_file):
            data[i] = np.asarray(d[2:2+N_features], dtype=np.float64)

print("Total number of materials : ", data.shape[0])
N_materials = data.shape[0]
N_features = 6
print("Number of features: ", N_features)

feature_0 = data.T[0]
feature_1 = data.T[1]
feature_2 = data.T[2]
feature_3 = data.T[3]
feature_4 = data.T[4]
feature_5 = data.T[5]

# rescale
feature_0 = (feature_0 - np.min(feature_0))/(np.max(feature_0) - np.min(feature_0))
feature_1 = (feature_1 - np.min(feature_1))/(np.max(feature_1) - np.min(feature_1))
feature_2 = (feature_2 - np.min(feature_2))/(np.max(feature_2) - np.min(feature_2))
feature_3 = (feature_3 - np.min(feature_3))/(np.max(feature_3) - np.min(feature_3))
feature_4 = (feature_4 - np.min(feature_4))/(np.max(feature_4) - np.min(feature_4))
feature_5 = (feature_5 - np.min(feature_5))/(np.max(feature_5) - np.min(feature_5))

if N_features == 6:
    x = np.concatenate((feature_0.reshape(1,N_materials),feature_1.reshape(1,N_materials),feature_2.reshape(1,N_materials),
                        feature_3.reshape(1,N_materials),feature_4.reshape(1,N_materials),feature_5.reshape(1,N_materials),))
    
    
print("Shape of feature x: ", np.shape(x))
print("Example feature vector = " , x[:,0])
N_sample = int(N_materials * diverse_ratio)-1

print("Sampling %d diverse structures out of %d" % (N_sample+1,N_materials))
print("total accessible materials considered: ", N_materials)



print("Starting diversity selection. Seeking %d points" % (N_sample+1))
time.sleep(1)
# store indices of x here for the diverse and non-diverse sets.
diverse_set = []
remaining_set = list(range(N_materials))
### INITIALIZE WITH RANDOMLY SELECTED POINT
idx_init = random.sample(list(np.arange(N_materials)),1)[0]
diverse_set.append(idx_init)
remaining_set.remove(idx_init)
N_diverse = 1
while N_diverse <= N_sample:
        print("Selecting point ", N_diverse)
        min_d_to_diverse_set = np.zeros((N_materials-N_diverse,))
        # for every candidate point not in diverse set...
        for i in range(N_materials - N_diverse):
            # get the distance of this point to each point in the diverse set
            d_from_each_diverse_pt = np.linalg.norm(x[:,diverse_set] - x[:,remaining_set[i]].reshape(N_features,1),axis=0)
            # get the closest distance that this point is to the diverse set
            min_d_to_diverse_set[i] = np.min(d_from_each_diverse_pt)
        # select point that has the largest distance from the diverse set
        idx_select = remaining_set[np.argmax(min_d_to_diverse_set)]
        assert (len(remaining_set) == np.size(min_d_to_diverse_set))
        print("\tSelected point " , idx_select)
        # add point to diverse set; remove it from remaining set
        diverse_set.append(idx_select)
        remaining_set.remove(idx_select)
        print("\tPts in diverse set: ", len(diverse_set))
        print("\tPts in remaining set: ", len(remaining_set))
        print(diverse_set[N_diverse-1])
        N_diverse += 1

with open("divided_set_"+str(diverse_ratio)+"_"+str("%.1f"%remaining_ratio)+"_"+"Kr-K.txt", "w") as f:
    f.write(str(diverse_set)+" "+str(remaining_set))
    print("Save file name : divided_set_"+str(diverse_ratio)+"_"+str("%.1f"%remaining_ratio)+"_"+"Kr-K.txt")

Total number of materials :  1243
Number of features:  6
Shape of feature x:  (6, 1243)
Example feature vector =  [0.04842407 0.05382044 0.26062098 0.32749834 0.31130952 0.58526657]
Sampling 994 diverse structures out of 1243
total accessible materials considered:  1243
Starting diversity selection. Seeking 994 points
Selecting point  1
	Selected point  876
	Pts in diverse set:  2
	Pts in remaining set:  1241
367
Selecting point  2
	Selected point  1143
	Pts in diverse set:  3
	Pts in remaining set:  1240
876
Selecting point  3
	Selected point  35
	Pts in diverse set:  4
	Pts in remaining set:  1239
1143
Selecting point  4
	Selected point  297
	Pts in diverse set:  5
	Pts in remaining set:  1238
35
Selecting point  5
	Selected point  848
	Pts in diverse set:  6
	Pts in remaining set:  1237
297
Selecting point  6
	Selected point  478
	Pts in diverse set:  7
	Pts in remaining set:  1236
848
Selecting point  7
	Selected point  40
	Pts in diverse set:  8
	Pts in remaining set:  1235
478
Se

	Selected point  1065
	Pts in diverse set:  83
	Pts in remaining set:  1160
1171
Selecting point  83
	Selected point  343
	Pts in diverse set:  84
	Pts in remaining set:  1159
1065
Selecting point  84
	Selected point  55
	Pts in diverse set:  85
	Pts in remaining set:  1158
343
Selecting point  85
	Selected point  1145
	Pts in diverse set:  86
	Pts in remaining set:  1157
55
Selecting point  86
	Selected point  251
	Pts in diverse set:  87
	Pts in remaining set:  1156
1145
Selecting point  87
	Selected point  776
	Pts in diverse set:  88
	Pts in remaining set:  1155
251
Selecting point  88
	Selected point  1156
	Pts in diverse set:  89
	Pts in remaining set:  1154
776
Selecting point  89
	Selected point  36
	Pts in diverse set:  90
	Pts in remaining set:  1153
1156
Selecting point  90
	Selected point  318
	Pts in diverse set:  91
	Pts in remaining set:  1152
36
Selecting point  91
	Selected point  449
	Pts in diverse set:  92
	Pts in remaining set:  1151
318
Selecting point  92
	Select

	Selected point  30
	Pts in diverse set:  167
	Pts in remaining set:  1076
761
Selecting point  167
	Selected point  533
	Pts in diverse set:  168
	Pts in remaining set:  1075
30
Selecting point  168
	Selected point  72
	Pts in diverse set:  169
	Pts in remaining set:  1074
533
Selecting point  169
	Selected point  371
	Pts in diverse set:  170
	Pts in remaining set:  1073
72
Selecting point  170
	Selected point  568
	Pts in diverse set:  171
	Pts in remaining set:  1072
371
Selecting point  171
	Selected point  1142
	Pts in diverse set:  172
	Pts in remaining set:  1071
568
Selecting point  172
	Selected point  92
	Pts in diverse set:  173
	Pts in remaining set:  1070
1142
Selecting point  173
	Selected point  434
	Pts in diverse set:  174
	Pts in remaining set:  1069
92
Selecting point  174
	Selected point  579
	Pts in diverse set:  175
	Pts in remaining set:  1068
434
Selecting point  175
	Selected point  54
	Pts in diverse set:  176
	Pts in remaining set:  1067
579
Selecting point 

	Selected point  98
	Pts in diverse set:  251
	Pts in remaining set:  992
76
Selecting point  251
	Selected point  59
	Pts in diverse set:  252
	Pts in remaining set:  991
98
Selecting point  252
	Selected point  60
	Pts in diverse set:  253
	Pts in remaining set:  990
59
Selecting point  253
	Selected point  855
	Pts in diverse set:  254
	Pts in remaining set:  989
60
Selecting point  254
	Selected point  822
	Pts in diverse set:  255
	Pts in remaining set:  988
855
Selecting point  255
	Selected point  293
	Pts in diverse set:  256
	Pts in remaining set:  987
822
Selecting point  256
	Selected point  524
	Pts in diverse set:  257
	Pts in remaining set:  986
293
Selecting point  257
	Selected point  377
	Pts in diverse set:  258
	Pts in remaining set:  985
524
Selecting point  258
	Selected point  1027
	Pts in diverse set:  259
	Pts in remaining set:  984
377
Selecting point  259
	Selected point  37
	Pts in diverse set:  260
	Pts in remaining set:  983
1027
Selecting point  260
	Selec

	Selected point  25
	Pts in diverse set:  335
	Pts in remaining set:  908
50
Selecting point  335
	Selected point  191
	Pts in diverse set:  336
	Pts in remaining set:  907
25
Selecting point  336
	Selected point  203
	Pts in diverse set:  337
	Pts in remaining set:  906
191
Selecting point  337
	Selected point  1192
	Pts in diverse set:  338
	Pts in remaining set:  905
203
Selecting point  338
	Selected point  847
	Pts in diverse set:  339
	Pts in remaining set:  904
1192
Selecting point  339
	Selected point  338
	Pts in diverse set:  340
	Pts in remaining set:  903
847
Selecting point  340
	Selected point  468
	Pts in diverse set:  341
	Pts in remaining set:  902
338
Selecting point  341
	Selected point  1010
	Pts in diverse set:  342
	Pts in remaining set:  901
468
Selecting point  342
	Selected point  1006
	Pts in diverse set:  343
	Pts in remaining set:  900
1010
Selecting point  343
	Selected point  757
	Pts in diverse set:  344
	Pts in remaining set:  899
1006
Selecting point  3

	Selected point  595
	Pts in diverse set:  417
	Pts in remaining set:  826
586
Selecting point  417
	Selected point  354
	Pts in diverse set:  418
	Pts in remaining set:  825
595
Selecting point  418
	Selected point  950
	Pts in diverse set:  419
	Pts in remaining set:  824
354
Selecting point  419
	Selected point  1034
	Pts in diverse set:  420
	Pts in remaining set:  823
950
Selecting point  420
	Selected point  28
	Pts in diverse set:  421
	Pts in remaining set:  822
1034
Selecting point  421
	Selected point  1040
	Pts in diverse set:  422
	Pts in remaining set:  821
28
Selecting point  422
	Selected point  189
	Pts in diverse set:  423
	Pts in remaining set:  820
1040
Selecting point  423
	Selected point  320
	Pts in diverse set:  424
	Pts in remaining set:  819
189
Selecting point  424
	Selected point  867
	Pts in diverse set:  425
	Pts in remaining set:  818
320
Selecting point  425
	Selected point  109
	Pts in diverse set:  426
	Pts in remaining set:  817
867
Selecting point  42

	Selected point  223
	Pts in diverse set:  500
	Pts in remaining set:  743
1242
Selecting point  500
	Selected point  1069
	Pts in diverse set:  501
	Pts in remaining set:  742
223
Selecting point  501
	Selected point  147
	Pts in diverse set:  502
	Pts in remaining set:  741
1069
Selecting point  502
	Selected point  1014
	Pts in diverse set:  503
	Pts in remaining set:  740
147
Selecting point  503
	Selected point  677
	Pts in diverse set:  504
	Pts in remaining set:  739
1014
Selecting point  504
	Selected point  467
	Pts in diverse set:  505
	Pts in remaining set:  738
677
Selecting point  505
	Selected point  336
	Pts in diverse set:  506
	Pts in remaining set:  737
467
Selecting point  506
	Selected point  1107
	Pts in diverse set:  507
	Pts in remaining set:  736
336
Selecting point  507
	Selected point  444
	Pts in diverse set:  508
	Pts in remaining set:  735
1107
Selecting point  508
	Selected point  823
	Pts in diverse set:  509
	Pts in remaining set:  734
444
Selecting poin

	Selected point  44
	Pts in diverse set:  583
	Pts in remaining set:  660
803
Selecting point  583
	Selected point  685
	Pts in diverse set:  584
	Pts in remaining set:  659
44
Selecting point  584
	Selected point  333
	Pts in diverse set:  585
	Pts in remaining set:  658
685
Selecting point  585
	Selected point  1089
	Pts in diverse set:  586
	Pts in remaining set:  657
333
Selecting point  586
	Selected point  141
	Pts in diverse set:  587
	Pts in remaining set:  656
1089
Selecting point  587
	Selected point  646
	Pts in diverse set:  588
	Pts in remaining set:  655
141
Selecting point  588
	Selected point  774
	Pts in diverse set:  589
	Pts in remaining set:  654
646
Selecting point  589
	Selected point  978
	Pts in diverse set:  590
	Pts in remaining set:  653
774
Selecting point  590
	Selected point  980
	Pts in diverse set:  591
	Pts in remaining set:  652
978
Selecting point  591
	Selected point  966
	Pts in diverse set:  592
	Pts in remaining set:  651
980
Selecting point  592


	Selected point  704
	Pts in diverse set:  666
	Pts in remaining set:  577
1208
Selecting point  666
	Selected point  946
	Pts in diverse set:  667
	Pts in remaining set:  576
704
Selecting point  667
	Selected point  638
	Pts in diverse set:  668
	Pts in remaining set:  575
946
Selecting point  668
	Selected point  269
	Pts in diverse set:  669
	Pts in remaining set:  574
638
Selecting point  669
	Selected point  1032
	Pts in diverse set:  670
	Pts in remaining set:  573
269
Selecting point  670
	Selected point  916
	Pts in diverse set:  671
	Pts in remaining set:  572
1032
Selecting point  671
	Selected point  123
	Pts in diverse set:  672
	Pts in remaining set:  571
916
Selecting point  672
	Selected point  33
	Pts in diverse set:  673
	Pts in remaining set:  570
123
Selecting point  673
	Selected point  1045
	Pts in diverse set:  674
	Pts in remaining set:  569
33
Selecting point  674
	Selected point  869
	Pts in diverse set:  675
	Pts in remaining set:  568
1045
Selecting point  6

	Selected point  804
	Pts in diverse set:  748
	Pts in remaining set:  495
11
Selecting point  748
	Selected point  1168
	Pts in diverse set:  749
	Pts in remaining set:  494
804
Selecting point  749
	Selected point  431
	Pts in diverse set:  750
	Pts in remaining set:  493
1168
Selecting point  750
	Selected point  784
	Pts in diverse set:  751
	Pts in remaining set:  492
431
Selecting point  751
	Selected point  846
	Pts in diverse set:  752
	Pts in remaining set:  491
784
Selecting point  752
	Selected point  650
	Pts in diverse set:  753
	Pts in remaining set:  490
846
Selecting point  753
	Selected point  766
	Pts in diverse set:  754
	Pts in remaining set:  489
650
Selecting point  754
	Selected point  1215
	Pts in diverse set:  755
	Pts in remaining set:  488
766
Selecting point  755
	Selected point  547
	Pts in diverse set:  756
	Pts in remaining set:  487
1215
Selecting point  756
	Selected point  144
	Pts in diverse set:  757
	Pts in remaining set:  486
547
Selecting point  7

	Selected point  590
	Pts in diverse set:  831
	Pts in remaining set:  412
284
Selecting point  831
	Selected point  257
	Pts in diverse set:  832
	Pts in remaining set:  411
590
Selecting point  832
	Selected point  829
	Pts in diverse set:  833
	Pts in remaining set:  410
257
Selecting point  833
	Selected point  754
	Pts in diverse set:  834
	Pts in remaining set:  409
829
Selecting point  834
	Selected point  858
	Pts in diverse set:  835
	Pts in remaining set:  408
754
Selecting point  835
	Selected point  1231
	Pts in diverse set:  836
	Pts in remaining set:  407
858
Selecting point  836
	Selected point  1160
	Pts in diverse set:  837
	Pts in remaining set:  406
1231
Selecting point  837
	Selected point  964
	Pts in diverse set:  838
	Pts in remaining set:  405
1160
Selecting point  838
	Selected point  913
	Pts in diverse set:  839
	Pts in remaining set:  404
964
Selecting point  839
	Selected point  1198
	Pts in diverse set:  840
	Pts in remaining set:  403
913
Selecting point 

	Selected point  1235
	Pts in diverse set:  914
	Pts in remaining set:  329
1139
Selecting point  914
	Selected point  1233
	Pts in diverse set:  915
	Pts in remaining set:  328
1235
Selecting point  915
	Selected point  345
	Pts in diverse set:  916
	Pts in remaining set:  327
1233
Selecting point  916
	Selected point  175
	Pts in diverse set:  917
	Pts in remaining set:  326
345
Selecting point  917
	Selected point  783
	Pts in diverse set:  918
	Pts in remaining set:  325
175
Selecting point  918
	Selected point  746
	Pts in diverse set:  919
	Pts in remaining set:  324
783
Selecting point  919
	Selected point  1174
	Pts in diverse set:  920
	Pts in remaining set:  323
746
Selecting point  920
	Selected point  238
	Pts in diverse set:  921
	Pts in remaining set:  322
1174
Selecting point  921
	Selected point  253
	Pts in diverse set:  922
	Pts in remaining set:  321
238
Selecting point  922
	Selected point  267
	Pts in diverse set:  923
	Pts in remaining set:  320
253
Selecting poin