## Cosine similarity anisotropy

In [1]:
import os
import torch
import torch.nn.functional as F
import pickle
import numpy as np
import random
import argparse
from tqdm import tqdm

In [2]:
def cos_contrib(emb1, emb2):
    numerator_terms = emb1 * emb2
    denom = np.linalg.norm(emb1) * np.linalg.norm(emb2)
    return np.array(numerator_terms / denom)


def measure_anisotropy(filepath):
    with open(filepath, "rb") as f:
        embeddings, _ = pickle.load(f)
    
    indices = torch.randperm(embeddings.size(0))
    embeddings = embeddings[indices]
    
    layer_cosine_contribs = []

    for i in tqdm(range(embeddings.shape[0] - 1)):
        emb1, emb2 = embeddings[i, :], embeddings[i+1, :]
        layer_cosine_contribs.append(cos_contrib(emb1, emb2))
    
    layer_cosine_contribs = np.stack(layer_cosine_contribs)
    layer_cosine_contribs_mean = layer_cosine_contribs.mean(axis=0)
    
    aniso = layer_cosine_contribs_mean.sum()    
    top_dims = np.argsort(layer_cosine_contribs_mean)[-10:]
    top_dims = np.flip(top_dims)
    
    print(f"### {filepath} ###")
    print(f"Top 10 dims: {top_dims}")
    print(f"Estimated anisotropy: {aniso}")
    for i in range(10):
        d = top_dims[i]
        print(d, layer_cosine_contribs_mean[d])

### YaTC

In [3]:
directory = '../data/yatc/'
for filename in os.listdir(directory):
    filepath = os.path.join(directory, filename)
    result = measure_anisotropy(filepath)
    print(f"{filename} - {result}")

100%|██████████| 1351679/1351679 [01:12<00:00, 18583.78it/s]


### ../data/yatc/caida_emb.pkl ###
Top 10 dims: [129 137 146 191  98  99 170  28  25 157]
Estimated anisotropy: 0.865411102771759
129 0.21263592
137 0.05699241
146 0.043920062
191 0.023646057
98 0.021721823
99 0.020705616
170 0.017779438
28 0.016885027
25 0.01393512
157 0.013082485
caida_emb.pkl - None


100%|██████████| 1576959/1576959 [01:30<00:00, 17366.74it/s]


### ../data/yatc/cicapt_emb.pkl ###
Top 10 dims: [129 137  99 191 146 170 190  53 157  98]
Estimated anisotropy: 0.8659031391143799
129 0.26673865
137 0.039190803
99 0.03370274
191 0.027015366
146 0.023962041
170 0.019764489
190 0.014813196
53 0.014804101
157 0.013966684
98 0.013241946
cicapt_emb.pkl - None


100%|██████████| 557055/557055 [00:32<00:00, 16987.07it/s]


### ../data/yatc/cicids_emb.pkl ###
Top 10 dims: [129 137 146 191  99  98 170 157  91  25]
Estimated anisotropy: 0.8516557216644287
129 0.21515563
137 0.055743594
146 0.038011402
191 0.027106835
99 0.026548881
98 0.01980882
170 0.017351635
157 0.015405835
91 0.012069315
25 0.0116455965
cicids_emb.pkl - None


100%|██████████| 43007/43007 [00:02<00:00, 18122.05it/s]


### ../data/yatc/cross_emb.pkl ###
Top 10 dims: [129 137 146  99 191 170  98 157 190  53]
Estimated anisotropy: 0.8554381132125854
129 0.24075688
137 0.05156711
146 0.03332429
99 0.027809046
191 0.027096847
170 0.018212706
98 0.017234754
157 0.013460992
190 0.0125210695
53 0.0123032
cross_emb.pkl - None


100%|██████████| 999423/999423 [00:56<00:00, 17776.35it/s]


### ../data/yatc/mawi_emb.pkl ###
Top 10 dims: [129 137 146  98 191  99  28 170  25 157]
Estimated anisotropy: 0.8780504465103149
129 0.1916993
137 0.054223947
146 0.044488553
98 0.024154905
191 0.023772154
99 0.021879213
28 0.020735603
170 0.017948812
25 0.015574772
157 0.013815002
mawi_emb.pkl - None


100%|██████████| 39/39 [00:00<00:00, 13005.08it/s]


### ../data/yatc/perf_emb.pkl ###
Top 10 dims: [146 137  99  53 151 191   8   0 177 157]
Estimated anisotropy: 0.4664914011955261
146 0.073350415
137 0.05819031
99 0.03429897
53 0.014116148
151 0.013221607
191 0.0120427115
8 0.010854108
0 0.010630346
177 0.010072563
157 0.010047817
perf_emb.pkl - None


100%|██████████| 471/471 [00:00<00:00, 16764.54it/s]

### ../data/yatc/synth_emb.pkl ###
Top 10 dims: [129 146  99 137 104  38 151 191 144  91]
Estimated anisotropy: 0.8937067985534668
129 0.2717587
146 0.039089907
99 0.035860244
137 0.0332283
104 0.020407695
38 0.017156394
151 0.016437395
191 0.01504674
144 0.0148752
91 0.013876657
synth_emb.pkl - None





### ET-BERT

In [4]:
directory = '../data/etbert/'
for filename in os.listdir(directory):
    filepath = os.path.join(directory, filename)
    result = measure_anisotropy(filepath)
    print(f"{filename} - {result}")

100%|██████████| 990721/990721 [00:55<00:00, 17964.30it/s]


### ../data/etbert/caida_emb.pkl ###
Top 10 dims: [732 527 434 146 554 273 658 487 731   1]
Estimated anisotropy: 0.71210777759552
732 0.010904367
527 0.010602097
434 0.010409167
146 0.009280721
554 0.008162248
273 0.0073591
658 0.0070569064
487 0.007018146
731 0.0067071207
1 0.006614727
caida_emb.pkl - None


100%|██████████| 1232895/1232895 [01:07<00:00, 18179.33it/s]


### ../data/etbert/cicapt_emb.pkl ###
Top 10 dims: [732  98 434 527 658 273 146 487 731 424]
Estimated anisotropy: 0.8833835124969482
732 0.018100841
98 0.012479185
434 0.011423393
527 0.010416415
658 0.010103782
273 0.009998572
146 0.009342802
487 0.009291889
731 0.009282329
424 0.008284853
cicapt_emb.pkl - None


100%|██████████| 438271/438271 [00:24<00:00, 18162.69it/s]


### ../data/etbert/cicids_emb.pkl ###
Top 10 dims: [527 732 434 146 554 273 658 487 731   1]
Estimated anisotropy: 0.737087607383728
527 0.012040578
732 0.011936383
434 0.011218723
146 0.0090443725
554 0.008604147
273 0.0076598055
658 0.0076558427
487 0.0073213046
731 0.0070838463
1 0.0068670968
cicids_emb.pkl - None


100%|██████████| 28184/28184 [00:01<00:00, 17676.75it/s]


### ../data/etbert/cross_emb.pkl ###
Top 10 dims: [732  98 434 487 273 527 146 658 424 731]
Estimated anisotropy: 0.8808528780937195
732 0.017033136
98 0.012608572
434 0.011787814
487 0.010169675
273 0.0097851185
527 0.009545998
146 0.009055334
658 0.008846505
424 0.008018802
731 0.007752925
cross_emb.pkl - None


100%|██████████| 700415/700415 [00:39<00:00, 17912.77it/s]


### ../data/etbert/mawi_emb.pkl ###
Top 10 dims: [527 434 130 146 554  65  32 475   1 732]
Estimated anisotropy: 0.7753531336784363
527 0.013091619
434 0.011235626
130 0.0103749
146 0.0103641115
554 0.008638456
65 0.008511971
32 0.0075308434
475 0.007415808
1 0.0068034553
732 0.006760994
mawi_emb.pkl - None


100%|██████████| 39/39 [00:00<00:00, 9758.27it/s]


### ../data/etbert/perf_emb.pkl ###
Top 10 dims: [732 434  98 487 527 146 273 556 731 424]
Estimated anisotropy: 0.955191433429718
732 0.018963823
434 0.01442529
98 0.013029771
487 0.012491687
527 0.01097663
146 0.010929242
273 0.010522216
556 0.0087153325
731 0.008630252
424 0.008346031
perf_emb.pkl - None


100%|██████████| 471/471 [00:00<00:00, 13626.42it/s]

### ../data/etbert/synth_emb.pkl ###
Top 10 dims: [732  98 434 487 658 273 527 424 146 731]
Estimated anisotropy: 0.9682331085205078
732 0.019504761
98 0.014760234
434 0.011639453
487 0.010605966
658 0.010585682
273 0.010231343
527 0.010223525
424 0.010026148
146 0.00992473
731 0.009624093
synth_emb.pkl - None





### netFound

In [5]:
directory = '../data/netfound/'
for filename in os.listdir(directory):
    filepath = os.path.join(directory, filename)
    result = measure_anisotropy(filepath)
    print(f"{filename} - {result}")

100%|██████████| 1351679/1351679 [01:15<00:00, 17823.14it/s]


### ../data/netfound/caida_emb.pkl ###
Top 10 dims: [ 537  282  816  354  309  131  450  339 1008  220]
Estimated anisotropy: 0.8578384518623352
537 0.014706018
282 0.014426841
816 0.00922521
354 0.008765003
309 0.008046439
131 0.0073407227
450 0.006339016
339 0.0057218834
1008 0.005690359
220 0.005683669
caida_emb.pkl - None


100%|██████████| 1576959/1576959 [01:28<00:00, 17770.26it/s]


### ../data/netfound/cicapt_emb.pkl ###
Top 10 dims: [ 309 1008  537  450  429  816  804  282  339  639]
Estimated anisotropy: 0.8244582414627075
309 0.010214825
1008 0.008676095
537 0.00812383
450 0.007712792
429 0.0075981515
816 0.007508918
804 0.0074566356
282 0.007193122
339 0.0069275443
639 0.006654771
cicapt_emb.pkl - None


100%|██████████| 557055/557055 [00:31<00:00, 17900.57it/s]


### ../data/netfound/cicids_emb.pkl ###
Top 10 dims: [ 537  282  816  490 1008  309  354  186  339  131]
Estimated anisotropy: 0.6974029541015625
537 0.010298992
282 0.009330481
816 0.008544019
490 0.0065027713
1008 0.006348744
309 0.0058152177
354 0.0055949176
186 0.005393271
339 0.0052996716
131 0.005242066
cicids_emb.pkl - None


100%|██████████| 44543/44543 [00:02<00:00, 17707.68it/s]


### ../data/netfound/cross_emb.pkl ###
Top 10 dims: [1008  282  816  537  186  462  490  131  133  246]
Estimated anisotropy: 0.6873654723167419
1008 0.00936622
282 0.008608188
816 0.0077941893
537 0.0075720083
186 0.006471811
462 0.005828882
490 0.0056572915
131 0.0048241275
133 0.00453444
246 0.004444408
cross_emb.pkl - None


100%|██████████| 997375/997375 [00:55<00:00, 17898.32it/s]


### ../data/netfound/mawi_emb.pkl ###
Top 10 dims: [ 282  537  354  816  309  131  450  339 1008  639]
Estimated anisotropy: 0.9407020807266235
282 0.01601847
537 0.016012745
354 0.0105300415
816 0.009666093
309 0.0077765035
131 0.0076370384
450 0.006957253
339 0.006576104
1008 0.006571175
639 0.0065308856
mawi_emb.pkl - None


100%|██████████| 39/39 [00:00<00:00, 8356.47it/s]


### ../data/netfound/perf_emb.pkl ###
Top 10 dims: [599 981 415 519 462  22 331 325 395 878]
Estimated anisotropy: 0.7089725732803345
599 0.010789762
981 0.010519536
415 0.007023431
519 0.0069862986
462 0.006646792
22 0.0063646343
331 0.0059830863
325 0.005647886
395 0.0053256126
878 0.005261731
perf_emb.pkl - None


100%|██████████| 471/471 [00:00<00:00, 10510.70it/s]

### ../data/netfound/synth_emb.pkl ###
Top 10 dims: [1007  429  636  599  519  878  123  768  415  480]
Estimated anisotropy: 0.9443901777267456
1007 0.012096071
429 0.009647986
636 0.00963402
599 0.0086316345
519 0.0077462075
878 0.0070843208
123 0.006704348
768 0.0066913865
415 0.00603047
480 0.005978826
synth_emb.pkl - None





### netMamba

In [6]:
directory = '../data/netmamba/'
for filename in os.listdir(directory):
    filepath = os.path.join(directory, filename)
    result = measure_anisotropy(filepath)
    print(f"{filename} - {result}")

100%|██████████| 1351679/1351679 [01:11<00:00, 18881.51it/s]


### ../data/netmamba/netmamba_caida_emb.pkl ###
Top 10 dims: [ 94 165 175 177 254 242 222  84 225 207]
Estimated anisotropy: 0.9928917288780212
94 0.026014712
165 0.024533119
175 0.02285657
177 0.021446897
254 0.021139132
242 0.020985968
222 0.019559007
84 0.019452285
225 0.019061534
207 0.018741526
netmamba_caida_emb.pkl - None


100%|██████████| 1578553/1578553 [01:25<00:00, 18523.25it/s]


### ../data/netmamba/netmamba_cicapt_emb.pkl ###
Top 10 dims: [ 94  37 222 165 175 177  84 225 242 254]
Estimated anisotropy: 0.9827951192855835
94 0.025563367
37 0.022351263
222 0.02173882
165 0.021533655
175 0.021470066
177 0.02115106
84 0.020247802
225 0.02001214
242 0.019052235
254 0.018873153
netmamba_cicapt_emb.pkl - None


100%|██████████| 558883/558883 [00:30<00:00, 18569.59it/s]


### ../data/netmamba/netmamba_cicids_emb.pkl ###
Top 10 dims: [ 94 165 175 177 254 222  37 242  84 225]
Estimated anisotropy: 0.9207777976989746
94 0.025498515
165 0.022008564
175 0.020857379
177 0.020856973
254 0.02031274
222 0.018931473
37 0.018813564
242 0.018800039
84 0.018328073
225 0.018085778
netmamba_cicids_emb.pkl - None


100%|██████████| 44646/44646 [00:02<00:00, 18449.36it/s]


### ../data/netmamba/netmamba_cross_emb.pkl ###
Top 10 dims: [ 94 175 177 165 254 225 222  37  84 242]
Estimated anisotropy: 0.9251422882080078
94 0.023438862
175 0.021288972
177 0.021196235
165 0.021158399
254 0.02069964
225 0.020363314
222 0.020335509
37 0.020237874
84 0.019994263
242 0.018872496
netmamba_cross_emb.pkl - None


100%|██████████| 999423/999423 [00:53<00:00, 18543.36it/s]


### ../data/netmamba/netmamba_mawi_emb.pkl ###
Top 10 dims: [165  94 175 242 254 177 225  84 222 207]
Estimated anisotropy: 0.9977025985717773
165 0.025580527
94 0.025553748
175 0.023789516
242 0.021901743
254 0.021429295
177 0.02139116
225 0.01939068
84 0.019385757
222 0.018896874
207 0.018696602
netmamba_mawi_emb.pkl - None


100%|██████████| 39/39 [00:00<00:00, 15519.72it/s]


### ../data/netmamba/netmamba_perf_emb.pkl ###
Top 10 dims: [ 60 254 222 218  37 225 165  47 169 177]
Estimated anisotropy: 0.7180217504501343
60 0.02292091
254 0.019717531
222 0.01814854
218 0.016579213
37 0.016463697
225 0.016382962
165 0.016339542
47 0.016228337
169 0.015916266
177 0.012348879
netmamba_perf_emb.pkl - None


100%|██████████| 471/471 [00:00<00:00, 16809.05it/s]

### ../data/netmamba/netmamba_synth_emb.pkl ###
Top 10 dims: [165 225 254  37  60  94 177 222  84 175]
Estimated anisotropy: 0.954470157623291
165 0.025490442
225 0.024499414
254 0.023187943
37 0.022753246
60 0.021905668
94 0.021697994
177 0.021658178
222 0.021163434
84 0.020285957
175 0.02023076
netmamba_synth_emb.pkl - None



