In [1]:
from neo4j import GraphDatabase
import pandas as pd

# Init the connection to the database
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "neuroinformatics_orc_id"), encrypted=False)
pd.set_option('display.max_columns', None)

def cyperQueryToDataFrame(query):
  with driver.session() as session:
    result = session.run(query)
    return pd.DataFrame(result.data(), columns=result.keys())



In [5]:
resources = cyperQueryToDataFrame("MATCH (n:Resource) RETURN n.id, n.main_title, n.__fastrp_embedding, n.__fastrp_resources_and_keyword ") # main title può essere la descrizione in caso non c'è il titolo (prendendo solo quelli con orc id siamo a circa 34k di risorse (~15%))

In [7]:
!pip install -U sentence-transformers



In [13]:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('stsb-roberta-base') # roberta base good and not too large

# Two lists of sentences
sentences1 = ['The cat sits outside',
             'A man is playing guitar',
             'The new movie is awesome']

sentences2 = ['The dog plays in the garden',
              'A woman watches TV',
              'The new movie is so great']

#Compute embedding for both lists
embeddings1 = model.encode(sentences1, convert_to_tensor=True)
embeddings2 = model.encode(sentences2, convert_to_tensor=True)

#Compute cosine-similarits
cosine_scores = util.pytorch_cos_sim(embeddings1, embeddings2)

#Output the pairs with their score
for i in range(len(sentences1)):
    print("{} \t\t {} \t\t Score: {:.4f}".format(sentences1[i], sentences2[i], cosine_scores[i][i]))

The cat sits outside 		 The dog plays in the garden 		 Score: -0.0686
A man is playing guitar 		 A woman watches TV 		 Score: 0.0891
The new movie is awesome 		 The new movie is so great 		 Score: 0.9907


In [14]:
sentences = resources['n.main_title']

In [15]:
sentences

0        Theoretical Limitations on Functional Imaging ...
1        Head model based on the shape of the subject’s...
2        How Pleasant and Unpleasant Stimuli Combine in...
3        Genetic Contributions to Human Gyrification: S...
4        Prediction error and repetition suppression ha...
                               ...                        
34307    Functionally and Spatially Distinct Modes of m...
34308    Air Pollution, Stress, and Allostatic Load: Li...
34309    The enigma of multiple sclerosis: inflammation...
34310    Functional Networks in Disorders of Consciousness
34311    Optical brain imaging in vivo: techniques and ...
Name: n.main_title, Length: 34312, dtype: object

In [16]:
embeddings = model.encode(sentences, convert_to_tensor=True)

In [17]:
json_to_import = []
for i in range(len(sentences)):
    json_to_import.append({ "id": resources['n.id'][i], "embeddings": embeddings[i].tolist(),  })

In [18]:
pd.DataFrame(json_to_import).to_csv('embeddings_resources.csv')

In [19]:
from neo4j import GraphDatabase

# Init the connection to the database
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "neuroinformatics_orc_id"), encrypted=False)
session = driver.session()
for i in range(len(sentences)):
    session.run("MATCH (n:Resource) WHERE n.id = $id SET n.r_embeddings = $embeddings RETURN n", { "id": resources['n.id'][i], "embeddings": embeddings[i].tolist() })

In [6]:
resources = cyperQueryToDataFrame("MATCH (n:Resource) RETURN n.id, n.main_title, n.__fastrp_embedding, n.__fastrp_resources_and_keyword, n.r_embeddings ") # 

In [1]:
import numpy as np

from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import LocalOutlierFactor, NearestNeighbors
import pandas as pd
from sklearn.manifold import TSNE



In [11]:
X = np.array(resources["n.r_embeddings"].to_list())
unq, count = np.unique(X, axis=0, return_counts=True)
count.max() # non molti con lo stesso embeddings

5

In [17]:
l2_norm = np.sum(np.abs(X)**2,axis=-1)**(1./2)
l2_norm[l2_norm == 0] = 1
l2_norm.max()
X_norm = X / l2_norm[:, np.newaxis]
l2_norm = np.sum(np.abs(X_norm)**2,axis=-1)**(1./2)
l2_norm.max()
neigh = NearestNeighbors(n_neighbors=2, metric='euclidean')
neigh.fit(X_norm)
distances = neigh.kneighbors(X_norm, 2, return_distance=True)
distances_second = distances[0][:, 1]
indices_second = distances[1][:, 1]
nearest_neighbour_df = resources[['n.id', 'n.main_title']].iloc[indices_second]
resources['nearest_neighbour_re'] = nearest_neighbour_df['n.id'].to_list()
resources['nearest_neighbour_re_main_title'] = nearest_neighbour_df['n.main_title'].to_list()
resources['distance_to_nearest_neighbour_re'] = distances_second

In [18]:
resources.sort_values(by=['distance_to_nearest_neighbour_re'], ascending=True)

Unnamed: 0,n.id,n.main_title,n.__fastrp_embedding,n.__fastrp_resources_and_keyword,n.r_embeddings,nearest_neighbour_re,distance_to_nearest_neighbour_re,nearest_neighbour_re_main_title
11783,50|dedup_wf_001::0b3de9422ad6df8ca4f2588d1f2a361a,Clinical Policy: Neuroimaging and Decisionmaki...,"[0.0, -0.7715167999267578, -0.7715167999267578...","[-0.1034482792019844, -0.1034482792019844, 0.0...","[-0.4298747777938843, 0.17372481524944305, 0.5...",50|dedup_wf_001::0b3de9422ad6df8ca4f2588d1f2a361a,0.000000,Clinical Policy: Neuroimaging and Decisionmaki...
11134,50|doiboost____::25f6adf3f27cc267478bc175477b764e,The NeuroDevNet Neuroinformatics Core,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, -0.053916387259960175, 0.053916387259960...","[0.3671809434890747, -0.218675434589386, -0.20...",50|doiboost____::25f6adf3f27cc267478bc175477b764e,0.000000,The NeuroDevNet Neuroinformatics Core
29328,50|doiboost____::55e064077df362827e8f465ad7d7be82,Neuroimaging,"[0.694023072719574, -0.04507644474506378, 0.07...","[-0.050572171807289124, -0.050572171807289124,...","[0.16414006054401398, -0.5832186937332153, 0.2...",50|doiboost____::55e064077df362827e8f465ad7d7be82,0.000000,Neuroimaging
24449,50|dedup_wf_001::28e9cd9610d2c7d5846a795c40bc6993,White Matter Microstructure in Adolescents and...,"[-2.032072067260742, 0.45756861567497253, 1.68...","[0.09928550571203232, -0.1323806792497635, 0.0...","[-0.20027559995651245, 0.09026981890201569, -4...",50|dedup_wf_001::71ad43a16a421609505b013a4736a0e8,0.000000,White Matter Microstructure in Adolescents and...
13204,50|dedup_wf_001::981024fa77ebaeb33c4afc179e7d8c58,a Systematic Review,"[-1.0014089345932007, 0.27566030621528625, 0.4...","[0.03227486088871956, 0.0, 0.0, 0.129099443554...","[0.4015590250492096, -1.0903278589248657, 0.51...",50|dedup_wf_001::af75a83a9eabcbb5cafe1e203192cde0,0.000000,A systematic review
...,...,...,...,...,...,...,...,...
33806,50|dedup_wf_001::78c963a7201f265a84fddadb56b7dbe6,The GAS trial,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.12237256020307541, -0.12237256020307541, 0....","[-1.1110875606536865, -0.12367327511310577, -0...",50|dedup_wf_001::ff32d0133659ed59da33d230847d0e89,1.071714,Pulmonary hyperinflation due to gas trapping a...
3054,50|dedup_wf_001::756653113595672bb9bf18bd79cbe38c,Just Say No to Testing,"[0.8333333730697632, 0.0, -0.8219949007034302,...","[0.09938079863786697, -0.04969039931893349, 0....","[0.0841265395283699, 0.07913415879011154, 0.02...",50|dedup_wf_001::45aecc96141403ea43e08990b9e6e644,1.077585,Null results in TMS: From absence of evidence ...
4216,50|dedup_wf_001::2026282d02dd1dddfe3be0b8f59830b0,Holding a stick at both ends: on faces and exp...,"[-0.3441852033138275, -0.3037493824958801, 0.7...","[-0.18823674321174622, -0.0627455860376358, 0....","[1.222137689590454, -0.6082175374031067, 0.695...",50|dedup_wf_001::4f444a035bc81506da0151ffa9b54745,1.081691,The Role of Experience in the Face-Selective R...
14150,50|dedup_wf_001::cfad20e6b7a4ddc8bb3754f5e8f438ae,Is that a bathtub in your kitchen?,"[-0.8089836239814758, -0.806226372718811, 0.95...","[-0.07838618755340576, 0.15677236020565033, 0....","[-0.4029924273490906, -1.069010615348816, -0.4...",50|doiboost____::d9fece1e68477ac28fe124ca863e8107,1.103685,What constitutes the M1 segment of the middle ...


In [19]:
resources.sort_values(by=['distance_to_nearest_neighbour_re'], ascending=True).to_csv('resource_embeddings_similarity.csv')

In [20]:
X = np.array(resources["n.__fastrp_embedding"].to_list())
unq, count = np.unique(X, axis=0, return_counts=True)
print(count.max()) # davvero tanti con lo stesso embeddings
l2_norm = np.sum(np.abs(X)**2,axis=-1)**(1./2)
l2_norm[l2_norm == 0] = 1
l2_norm.max()
X_norm = X / l2_norm[:, np.newaxis]
l2_norm = np.sum(np.abs(X_norm)**2,axis=-1)**(1./2)
l2_norm.max()
neigh = NearestNeighbors(n_neighbors=2, metric='euclidean')
neigh.fit(X_norm)
distances = neigh.kneighbors(X_norm, 2, return_distance=True)
distances_second = distances[0][:, 1]
indices_second = distances[1][:, 1]
nearest_neighbour_df = resources[['n.id', 'n.main_title']].iloc[indices_second]
resources['nearest_neighbour_rp_a'] = nearest_neighbour_df['n.id'].to_list()
resources['nearest_neighbour_rp_a_main_title'] = nearest_neighbour_df['n.main_title'].to_list()
resources['distance_to_nearest_neighbour_rp_a'] = distances_second

4240


In [22]:
resources.sort_values(by=['distance_to_nearest_neighbour_rp_a'], ascending=True) # i primi titoli sono comunque simili

Unnamed: 0,n.id,n.main_title,n.__fastrp_embedding,n.__fastrp_resources_and_keyword,n.r_embeddings,nearest_neighbour_re,distance_to_nearest_neighbour_re,nearest_neighbour_re_main_title,nearest_neighbour_rp_a,nearest_neighbour_rp_a_main_title,distance_to_nearest_neighbour_rp_a
17155,50|dedup_wf_001::fb289c8a3cc400f779332b97946a5608,The Eye as a Biomarker for Alzheimer’s Disease,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.06700251996517181, -6.521621465083172e-09, ...","[-0.46835947036743164, -1.1381034851074219, 0....",50|dedup_wf_001::8c24023a132b0d6122b94d4b53eb98a5,4.528645e-01,Ocular biomarkers of Alzheimer’s disease,50|dedup_wf_001::1e9fe1b7eea6b50785e6ec61b91fa29f,White-matter structure in the right hemisphere...,0.000000
11045,50|dedup_wf_001::62e6e092431be32638ba562b6ac1e04e,The Flavonoid Isoquercitrin Precludes Initiati...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.10585122555494308, -0.026462800800800323, 0...","[0.24246859550476074, -0.4322826564311981, 0.5...",50|dedup_wf_001::db769a33d3858cd2ccc6f7349ed189ce,8.052525e-01,Infection-induced resistance to experimental c...,50|dedup_wf_001::1e9fe1b7eea6b50785e6ec61b91fa29f,White-matter structure in the right hemisphere...,0.000000
11042,50|dedup_wf_001::5e96c8bc0f0320047766d591e995b6bd,GABA Modulating Bacteria of the Human Gut Micr...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.05248638242483139, 5.108708567291842e-09, ...","[0.36844417452812195, 0.2876003682613373, -0.2...",50|dedup_wf_001::9c55711230da264e622c9ae473e885c8,7.901516e-01,Cognitive impairment by antibiotic-induced gut...,50|dedup_wf_001::1e9fe1b7eea6b50785e6ec61b91fa29f,White-matter structure in the right hemisphere...,0.000000
29791,50|dedup_wf_001::d30fe0360a8694e19660e4736e03e87d,Identificación y caracterización de un ADNc qu...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.03242722153663635, 0.0648544430732727, 0.06...","[0.2074400633573532, -0.3540390729904175, -0.3...",50|dedup_wf_001::f50a2532e43a41e7f4828fce8c9dad8d,8.542215e-01,Structural and Functional Characterization of ...,50|dedup_wf_001::1e9fe1b7eea6b50785e6ec61b91fa29f,White-matter structure in the right hemisphere...,0.000000
11029,50|dedup_wf_001::4b0e729996535e7c5f568f5ab27cd0b6,Eulerian videography technology improves class...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.13461071252822876, 0.10095804184675217, -0...","[-0.2558005154132843, -0.2689337730407715, 0.1...",50|doiboost____::0c70f4fe3e2b6ab68394bf0438ac0415,7.453155e-01,Deep learning enables sleep staging from photo...,50|dedup_wf_001::1e9fe1b7eea6b50785e6ec61b91fa29f,White-matter structure in the right hemisphere...,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
29286,50|dedup_wf_001::cce4f2758e12bd25dce981fd7058fd7e,"ECoG STUDIES OF VALPROATE, CARBAMAZEPINE AND H...","[0.09599552303552628, 0.20644429326057434, -0....","[-0.16874182224273682, 0.03374836593866348, 0....","[-0.317645400762558, -0.15578943490982056, 0.0...",50|dedup_wf_001::12d0e53af5647ad0590dcda0d20e42c4,6.808906e-01,Neuroprotective effect of pyruvate and oxaloac...,50|dedup_wf_001::63f499d252c997dfc7abe60c025f96e4,Altered functional and structural brain networ...,0.752503
11171,50|doiboost____::fb44aeb6d15c0c0e2a0e98940729b371,Neuroactivation imaging using a monogenic fram...,"[-0.049654994159936905, 0.5624501705169678, -0...","[-0.14586499333381653, -0.18233123421669006, 0...","[0.1833518147468567, -0.1455278992652893, 0.11...",50|dedup_wf_001::67c8a4d9d91d5f3c79c6159bfe4bc14b,7.340466e-01,Monoaminergic control of brain states and sens...,50|doiboost____::81d5e94e714a66cfdd679dcb06215e04,Electrocorticography Evidence of Tactile Respo...,0.773966
12121,50|dedup_wf_001::90161836ce116af83341770f4c70d6e4,Toward discovery science of human brain function,"[0.5331141352653503, -0.8062261343002319, 1.11...","[0.03874921426177025, -0.0774984285235405, 0.1...","[0.2769312262535095, 0.14422354102134705, 0.32...",50|dedup_wf_001::488ec5aeeec65b48ef0bf4be0a6b4544,7.279696e-01,Cognitive neuroscience 2.0: building a cumulat...,50|dedup_wf_001::af0c1bf885ee42361d792f3ca7f0ba66,MVPA to enhance the study of rare cognitive ev...,0.783141
18606,50|doiboost____::e81a780969f06251017b6ff200173006,Image processing and analysis methods for the ...,"[1.3218730688095093, 0.2391142100095749, 0.713...","[-0.0789337009191513, -0.11840055882930756, -0...","[-0.1874198168516159, -0.8372016549110413, 0.0...",50|dedup_wf_001::1dc38e723f4731107c70ec6aa4ee598f,2.107342e-08,Image processing and analysis methods for the ...,50|dedup_wf_001::d928f3e2cc0eef119321ec977e8d6593,Adolescents' Neural Processing of Risky Decisi...,0.785923


In [23]:
resources.sort_values(by=['distance_to_nearest_neighbour_rp_a'], ascending=True).to_csv('resource_embeddings_similarity_rp.csv')

In [24]:
X = np.array(resources["n.__fastrp_resources_and_keyword"].to_list())
unq, count = np.unique(X, axis=0, return_counts=True)
print(count.max()) # davvero tanti con lo stesso embeddings
l2_norm = np.sum(np.abs(X)**2,axis=-1)**(1./2)
l2_norm[l2_norm == 0] = 1
l2_norm.max()
X_norm = X / l2_norm[:, np.newaxis]
l2_norm = np.sum(np.abs(X_norm)**2,axis=-1)**(1./2)
l2_norm.max()
neigh = NearestNeighbors(n_neighbors=2, metric='euclidean')
neigh.fit(X_norm)
distances = neigh.kneighbors(X_norm, 2, return_distance=True)
distances_second = distances[0][:, 1]
indices_second = distances[1][:, 1]
nearest_neighbour_df = resources[['n.id', 'n.main_title']].iloc[indices_second]
resources['nearest_neighbour_rp_keyword'] = nearest_neighbour_df['n.id'].to_list()
resources['nearest_neighbour_rp_keyword_main_title'] = nearest_neighbour_df['n.main_title'].to_list()
resources['distance_to_nearest_neighbour_rp_keyword'] = distances_second

12


In [25]:
resources.sort_values(by=['distance_to_nearest_neighbour_rp_keyword'], ascending=True) # i primi titoli sono comunque simili

Unnamed: 0,n.id,n.main_title,n.__fastrp_embedding,n.__fastrp_resources_and_keyword,n.r_embeddings,nearest_neighbour_re,distance_to_nearest_neighbour_re,nearest_neighbour_re_main_title,nearest_neighbour_rp_a,nearest_neighbour_rp_a_main_title,distance_to_nearest_neighbour_rp_a,nearest_neighbour_rp_keyword,nearest_neighbour_rp_keyword_main_title,distance_to_nearest_neighbour_rp_keyword
1655,50|dedup_wf_001::373f6f113e02d1d3749e4cfa174f7c6a,11. Machine Consciousness,"[0.46713292598724365, 1.3510477542877197, 0.93...","[0.0, 0.10127393156290054, -0.0506369657814502...","[0.15762940049171448, -0.04268881306052208, -0...",50|dedup_wf_001::304a8e4179dbb520a571331c37d2b936,0.914100,Characterization of A11 Neurons Projecting to ...,50|dedup_wf_001::1d6e29c516c426097505f2594da0ce16,10. Modification and Enhancement of Consciousness,0.034696,50|dedup_wf_001::2f5230708a1f33604175514a3f28f023,5. From Correlates to Theories of Consciousness,0.0
33922,50|doiboost____::afaba98ddd8df05df622113e3460248d,Image segmentation based on Frequency domain o...,"[-0.651212215423584, -2.0908501148223877, -0.6...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.133258581161499, -0.3103930354118347, -0.22...",50|dedup_wf_001::558c4a58a2a47524db9569eef5994db2,0.786584,Comparison and Evaluation of Retrospective Int...,50|doiboost____::e29443e0cf7d7ad41dead03b741c16e0,Classification of Cells Based on Scale-space M...,0.053737,50|doiboost____::58e6df3250ada7006fdcfd5687f51505,Accurate detection of dysmorphic nuclei in neu...,0.0
7042,50|doiboost____::29f0237f080f89bd290566a880c1377c,Applications of multiscale segmentation and cl...,"[-0.7148295044898987, -2.153642416000366, -0.7...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.8204248547554016, -0.9004565477371216, 0.24...",50|doiboost____::a62f3885332127b2dddcdd2e6f02df76,0.803135,Back-Projection Cortical Potential Imaging Usi...,50|doiboost____::3e20075c2d870a91e8a376d711c3d95e,Scale-space based segmentation of cells in fun...,0.052096,50|doiboost____::58e6df3250ada7006fdcfd5687f51505,Accurate detection of dysmorphic nuclei in neu...,0.0
6551,50|dedup_wf_001::9ee88460b95f7faf614a1dbed0ca3ec7,7. Information Theories of Consciousness,"[0.46717774868011475, 1.3511306047439575, 0.97...","[0.0, 0.10127393156290054, -0.0506369657814502...","[-0.12249121069908142, -0.37973785400390625, 0...",50|dedup_wf_001::4accfaa9095fc6c8bbcd70653b84b7ca,0.773848,Prestimulus Oscillatory Phase at 7 Hz Gates Co...,50|dedup_wf_001::1d6e29c516c426097505f2594da0ce16,10. Modification and Enhancement of Consciousness,0.034438,50|dedup_wf_001::2f5230708a1f33604175514a3f28f023,5. From Correlates to Theories of Consciousness,0.0
22932,50|dedup_wf_001::1ce25fec8783a3bc13c979fb715a80d6,4. The Measurement of Consciousness,"[0.4691685438156128, 1.355076551437378, 0.9819...","[0.0, 0.10127393156290054, -0.0506369657814502...","[0.3090241551399231, 0.2599884271621704, 0.767...",50|doiboost____::01b3c1647a242535fe482968e1e7a1dc,0.857560,Calculations of consciousness,50|dedup_wf_001::9232911fb4ccc1e460a645c95bf7b67d,Human and Machine Consciousness,0.035988,50|dedup_wf_001::2f5230708a1f33604175514a3f28f023,5. From Correlates to Theories of Consciousness,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3693,50|dedup_wf_001::37f7c97d283afe26e6d529ab2ba9f10a,When family looks strange and strangers look n...,"[-0.2911837100982666, 0.11807271838188171, -0....","[-0.15321284532546997, -0.09192771464586258, 0...","[-0.022974306717514992, -0.25471898913383484, ...",50|dedup_wf_001::5a24a85c39210ab754cb2072ded03669,0.889139,Event-related potential (ERP) correlates of fa...,50|dedup_wf_001::ffd01c8514f98f3594e35ceedf94678c,The Thatcher illusion reveals orientation depe...,0.028754,50|doiboost____::29f0237f080f89bd290566a880c1377c,Applications of multiscale segmentation and cl...,1.0
27613,50|dedup_wf_001::e7e4643d7896701958a06ac5f512bc91,"Uniformity, specificity and variability of cor...","[0.9031261801719666, -0.773048460483551, 1.088...","[-0.0855921059846878, 0.042796049267053604, 0....","[0.8312994837760925, -0.5692006349563599, 0.43...",50|doiboost____::f76f8a3408a3ec4cf44258e8f83b9909,0.678657,Modulation of corticomuscular coherence by per...,50|doiboost____::fcf76fe716aac8ce83451de71b24310c,The PUPS-MOSIX Environment: A Homeostatic Envi...,0.025613,50|doiboost____::29f0237f080f89bd290566a880c1377c,Applications of multiscale segmentation and cl...,1.0
393,50|dedup_wf_001::49ebd76ebf4813f4a83b0481ef58ab97,The BBSome Controls Energy Homeostasis by Medi...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.19129681587219238, -0.13664059340953827, -...","[-0.2960711419582367, 0.3843090236186981, 0.08...",50|dedup_wf_001::767b8713548854d534495e030ad0c0d5,0.772233,Automated Protein Localization of Blood Brain ...,50|dedup_wf_001::1e9fe1b7eea6b50785e6ec61b91fa29f,White-matter structure in the right hemisphere...,0.000000,50|doiboost____::29f0237f080f89bd290566a880c1377c,Applications of multiscale segmentation and cl...,1.0
33096,50|dedup_wf_001::b63a8519c331096899c9c1369028c6a4,Scale-Free Brain Functional Networks,"[-0.5553407669067383, -0.00024266322725452483,...","[0.0708436369895935, -0.03542181849479675, 0.1...","[-0.024111540988087654, -0.5054394602775574, -...",50|dedup_wf_001::267e20137420bb8a731a84da7186769f,0.684575,"Scale-free brain activity: past, present, and ...",50|dedup_wf_001::a2aacba033d97ee3b560dad5adcbb55f,Ising-like dynamics in large-scale functional ...,0.027545,50|doiboost____::29f0237f080f89bd290566a880c1377c,Applications of multiscale segmentation and cl...,1.0


In [26]:
resources['combined_distance'] = resources['distance_to_nearest_neighbour_re'] + resources['distance_to_nearest_neighbour_rp_a'] + resources['distance_to_nearest_neighbour_rp_keyword']

In [27]:
resources.sort_values(by=['combined_distance'], ascending=True)

Unnamed: 0,n.id,n.main_title,n.__fastrp_embedding,n.__fastrp_resources_and_keyword,n.r_embeddings,nearest_neighbour_re,distance_to_nearest_neighbour_re,nearest_neighbour_re_main_title,nearest_neighbour_rp_a,nearest_neighbour_rp_a_main_title,distance_to_nearest_neighbour_rp_a,nearest_neighbour_rp_keyword,nearest_neighbour_rp_keyword_main_title,distance_to_nearest_neighbour_rp_keyword,combined_distance
8569,50|doiboost____::dd70a3bccb243d9298cfeca323f8ab16,The role of placental growth factor in regulat...,"[0.8333333730697632, 0.0, 0.8333333730697632, ...","[-0.05564148724079132, -0.05564148724079132, 0...","[0.08699651062488556, -0.1518295854330063, 0.5...",50|doiboost____::81a857f0f402530093c9e7a6d419148f,0.000000e+00,The role of placental growth factor in regulat...,50|doiboost____::81a857f0f402530093c9e7a6d419148f,The role of placental growth factor in regulat...,0.000000e+00,50|doiboost____::81a857f0f402530093c9e7a6d419148f,The role of placental growth factor in regulat...,0.000000e+00,0.000000e+00
9428,50|doiboost____::81a857f0f402530093c9e7a6d419148f,The role of placental growth factor in regulat...,"[0.8333333730697632, 0.0, 0.8333333730697632, ...","[-0.05564148724079132, -0.05564148724079132, 0...","[0.08699651062488556, -0.1518295854330063, 0.5...",50|doiboost____::81a857f0f402530093c9e7a6d419148f,0.000000e+00,The role of placental growth factor in regulat...,50|doiboost____::81a857f0f402530093c9e7a6d419148f,The role of placental growth factor in regulat...,0.000000e+00,50|doiboost____::81a857f0f402530093c9e7a6d419148f,The role of placental growth factor in regulat...,0.000000e+00,0.000000e+00
17010,50|doiboost____::a0be171fd857a4f9bb6d476a953768d6,Functional Connectivity of Resting State as a ...,"[0.0, 0.0, 0.0, -0.8111070990562439, -0.811107...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.25695669651031494, -0.0634072944521904, 0.4...",50|doiboost____::0ef4c17bc43087471b973c2d6ad3457c,2.980232e-08,Functional Connectivity of Resting State as a ...,50|doiboost____::0ef4c17bc43087471b973c2d6ad3457c,Functional Connectivity of Resting State as a ...,2.107342e-08,50|doiboost____::58e6df3250ada7006fdcfd5687f51505,Accurate detection of dysmorphic nuclei in neu...,0.000000e+00,5.087575e-08
17350,50|doiboost____::2e9b05720375a4fb67bb490f9f399e8a,The Neuroscience Information Framework (NIF): ...,"[-1.4971646070480347, 0.1738317310810089, -0.5...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.31248366832733154, -0.3532136082649231, -0....",50|doiboost____::ceefa59cafb16c4a0ac40ec2160c8585,0.000000e+00,The Neuroscience Information Framework (NIF): ...,50|doiboost____::ceefa59cafb16c4a0ac40ec2160c8585,The Neuroscience Information Framework (NIF): ...,2.240835e-02,50|doiboost____::58e6df3250ada7006fdcfd5687f51505,Accurate detection of dysmorphic nuclei in neu...,0.000000e+00,2.240835e-02
12551,50|doiboost____::1fd4a6cdf8d1f93f91669986da6f00c1,Neonatal Neuroimaging,"[-0.5997130870819092, 0.8413398861885071, -0.1...","[0.07647190988063812, -0.15294381976127625, 0....","[0.568478524684906, -0.07862906157970428, 0.72...",50|doiboost____::1fd4a6cdf8d1f93f91669986da6f00c1,0.000000e+00,Neonatal Neuroimaging,50|doiboost____::d5d77ff2b14ec97668176717f465a1b5,CT versus MR in neonatal brain imaging at term,4.261900e-02,50|dedup_wf_001::d55e654f5a6d41a99f88c712c696f62a,Neonatal Neuroimaging,2.980232e-08,4.261903e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12121,50|dedup_wf_001::90161836ce116af83341770f4c70d6e4,Toward discovery science of human brain function,"[0.5331141352653503, -0.8062261343002319, 1.11...","[0.03874921426177025, -0.0774984285235405, 0.1...","[0.2769312262535095, 0.14422354102134705, 0.32...",50|dedup_wf_001::488ec5aeeec65b48ef0bf4be0a6b4544,7.279696e-01,Cognitive neuroscience 2.0: building a cumulat...,50|dedup_wf_001::af0c1bf885ee42361d792f3ca7f0ba66,MVPA to enhance the study of rare cognitive ev...,7.831409e-01,50|dedup_wf_001::fd963875b2df1de23b2f0dfca36bdfdb,Investigating the electrophysiological basis o...,8.984456e-01,2.409556e+00
25831,50|dedup_wf_001::140f9a8e2eeac3877cae567af21c1b02,"Children can implicitly, but not voluntarily, ...","[-0.07994960248470306, 0.6832262277603149, -1....","[-0.12451456487178802, -0.09338591992855072, 0...","[-0.7888246774673462, -0.5435617566108704, -0....",50|dedup_wf_001::90465d301ab97c71cde4a2467c3b3bed,8.599042e-01,Connectivity supporting attention in children ...,50|dedup_wf_001::eff24f276804841331d50a3c0ae15232,The Supplementary Motor Area in motor and perc...,5.580953e-01,50|doiboost____::29f0237f080f89bd290566a880c1377c,Applications of multiscale segmentation and cl...,1.000000e+00,2.418000e+00
4401,50|dedup_wf_001::1bf03096711a6d7ee5779aee970006cb,The dopaminergic reward system underpins gende...,"[0.7894926071166992, -0.32077810168266296, 1.4...","[0.035691529512405396, -0.035691529512405396, ...","[0.0968027263879776, 0.2506863474845886, 0.632...",50|doiboost____::5b6857fa5395be0c4c4694e1ccf489bc,8.444976e-01,Frequency-spatial organization of brain electr...,50|dedup_wf_001::6680e2159d67b20ff898cecd723c02d5,Clinical and molecular phenotype of Aicardi-Go...,5.880277e-01,50|doiboost____::29f0237f080f89bd290566a880c1377c,Applications of multiscale segmentation and cl...,1.000000e+00,2.432525e+00
26510,50|dedup_wf_001::066ff09e16f41d80992dc9ce8ee1acb4,Personal Reflections on James S. Hyde,"[1.3078356981277466, 0.795092761516571, 0.7788...","[0.0, 0.12194215506315231, 0.06097107753157616...","[1.1492505073547363, 0.03476341813802719, 0.22...",50|dedup_wf_001::5216ff08653220d6c39c33a13f546f4b,9.638719e-01,Neural Correlates of Reflection on Present and...,50|dedup_wf_001::804fd6afcc8f6b6a01b708fe1e042ed7,Mapping cortical representations of the rodent...,4.870534e-01,50|doiboost____::29f0237f080f89bd290566a880c1377c,Applications of multiscale segmentation and cl...,1.000000e+00,2.450925e+00


In [28]:
resources.sort_values(by=['combined_distance'], ascending=True).to_csv('resource_similarity_combined_distance.csv') # sembrano interessanti queste combined distances

# TSNE

In [6]:
resources = cyperQueryToDataFrame("MATCH (n:Resource) RETURN n.id, n.main_title, n.__fastrp_embedding, n.__fastrp_resources_and_keyword, n.r_embeddings ") # 

NameError: name 'cyperQueryToDataFrame' is not defined

In [45]:
X = np.array(resources["n.__fastrp_embedding"].to_list()).astype(float)
l2_norm = np.sum(np.abs(X)**2,axis=-1)**(1./2)
l2_norm[l2_norm == 0] = 1
X = X / l2_norm[:, np.newaxis]
X_embedded = TSNE(n_components=2).fit_transform(X)

In [13]:
with open('tsne_fastrp_embedding_by_author_resource.npy', 'wb') as f:
    np.save(f, X_embedded)

In [2]:
with open('tsne_fastrp_embedding_by_author_resource.npy', 'rb') as f:
    X_imp = np.load(f)

In [3]:
import matplotlib.pyplot as plt
%matplotlib widget
plt.scatter(X_imp[:, 0], X_imp[:, 1], s=0.5)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [39]:
a = np.logical_and(X_imp[:, 0] > 1.25, X_imp[:, 0] < 1.5)

In [43]:
b = np.logical_and(X_imp[:, 1] > 2, X_imp[:, 1] < 2.2)

In [46]:
X[np.logical_and(a, b)]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [48]:
resources[np.logical_and(a, b)]

Unnamed: 0,n.id,n.main_title,n.__fastrp_embedding,n.__fastrp_resources_and_keyword,n.r_embeddings
18,50|dedup_wf_001::1e9fe1b7eea6b50785e6ec61b91fa29f,White-matter structure in the right hemisphere...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.029148206114768982, -0.029148206114768982, ...","[-0.5000877380371094, -0.11486520618200302, 0...."
24,50|dedup_wf_001::2d42fb543c8b4c0ef6229a0e64c7839f,Neuropathogenic human coronaviruses: A review,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.05763904005289078, -0.11527808010578156, 0....","[-0.06657435745000839, -0.07711543887853622, 0..."
43,50|dedup_wf_001::57bbb12b9fc101b9b5bb894183de8489,The imitation game: Effects of social cues on ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.06526753306388855, -0.09790130704641342, 0....","[-0.009520758874714375, -0.18153658509254456, ..."
59,50|dedup_wf_001::6b610acd1a3918097f845a792bc31798,In Vivo MRI of Functionalized Iron Oxide Nanop...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.09415023028850555, 0.07061267644166946, -0...","[0.13570387661457062, -0.06682687997817993, 0...."
60,50|dedup_wf_001::6d0efcd327660ca150fcd00c7eccfe78,Isometric Exercise Training for Managing Vascu...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.06482037156820297, -0.06482037901878357, 0...","[-0.8796325325965881, 0.4016982913017273, 0.26..."
...,...,...,...,...,...
34251,50|dedup_wf_001::eb17f393e6c1d6de6c37d5ed8ee9371b,Human 3D Cellular Model of Hypoxic Brain Injur...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.10540925711393356, 0.02635231986641884, -0...","[-0.8065544962882996, -0.044503044337034225, 0..."
34258,50|dedup_wf_001::f7a4a8cb0a85789dc68887e476bb962c,The Brazilian Zika virus strain causes birth d...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.02579411305487156, 0.0, -0.128970533609390...","[0.34966498613357544, -0.24529807269573212, -0..."
34300,50|doiboost____::e34db052cc539fc6c1b33e4329ee1c8a,Microelectrode array electrical impedance tomo...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.1671023815870285, 0.041775595396757126, 0.0...","[0.21709617972373962, -0.11356403678655624, -0..."
34301,50|doiboost____::e7bc63098f8b6e6c9f6d09c6f697f526,Aberrant Dynamical Connectivity in Autism Spec...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.1393282264471054, -0.15116360783576965, -0..."


In [4]:
a = np.logical_and(X_imp[:, 0] > -1.6, X_imp[:, 0] < -1.5)
b = np.logical_and(X_imp[:, 1] > 3.4, X_imp[:, 1] < 3.5)

In [5]:
X[np.logical_and(a, b)]

NameError: name 'X' is not defined

In [16]:
plt.close('all')
X = np.array(resources["n.__fastrp_resources_and_keyword"].to_list()).astype(float)
l2_norm = np.sum(np.abs(X)**2,axis=-1)**(1./2)
l2_norm[l2_norm == 0] = 1
X = X / l2_norm[:, np.newaxis]
X_embedded = TSNE(n_components=2).fit_transform(X)
with open('tsne_fastrp_embedding_by_keyword_resource.npy', 'wb') as f:
    np.save(f, X_embedded)

In [23]:
plt.close('all')
with open('tsne_fastrp_embedding_by_keyword_resource.npy', 'rb') as f:
    X_imp = np.load(f)
plt.scatter(X_imp[:, 0], X_imp[:, 1], s=0.5)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [18]:
plt.close('all')
X = np.array(resources["n.r_embeddings"].to_list()).astype(float)
l2_norm = np.sum(np.abs(X)**2,axis=-1)**(1./2)
l2_norm[l2_norm == 0] = 1
X = X / l2_norm[:, np.newaxis]
X_embedded = TSNE(n_components=2).fit_transform(X)
with open('tsne_bert_embedding_resource.npy', 'wb') as f:
    np.save(f, X_embedded)

In [24]:
plt.close('all')
with open('tsne_bert_embedding_resource.npy', 'wb') as f:
    np.save(f, X_embedded)
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], s=0.5)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …