In [1]:
from bertopic import BERTopic
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import jensenshannon
import pandas as pd
import numpy as np
import json



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open('dim_sample.json') as f:
    all_data = json.load(f)

def get_unique_dis(data):
    disciplines = set()
    for pro_id in data.keys():
        disciplines.update(data[pro_id]['disciplines'])
    return disciplines

In [3]:
split = 180
train_data = {k: all_data[k] for k in list(all_data.keys())[:split]}
test_data = {k: all_data[k] for k in list(all_data.keys())[split:]}

train_project_data = {}
train_pub_data = []
for pro_id in train_data.keys():
    train_project_data[pro_id] = (train_data[pro_id]['title'], train_data[pro_id]['disciplines'])
    for pub_id in train_data[pro_id]['publications'].keys():
        train_pub_data.append((pub_id, train_data[pro_id]['publications'][pub_id]['title'], train_data[pro_id]['publications'][pub_id]['disciplines']))

test_project_data = {}
test_pub_data = []
for pro_id in test_data.keys():
    test_project_data[pro_id] = (test_data[pro_id]['title'], test_data[pro_id]['disciplines'])
    for pub_id in test_data[pro_id]['publications'].keys():
        test_pub_data.append((pub_id, test_data[pro_id]['publications'][pub_id]['title'], test_data[pro_id]['publications'][pub_id]['disciplines']))

print(len(train_project_data), len(train_pub_data))
print(len(test_project_data), len(test_pub_data))
print(len(get_unique_dis(train_data)), len(get_unique_dis({pub[0]: {'disciplines': pub[2]} for pub in train_pub_data})))

180 506
45 129
170 170


In [4]:
train_docs = [project[0] for project in train_project_data.values()] + [pub[1] for pub in train_pub_data]
test_docs = [project[0] for project in test_project_data.values()] + [pub[1] for pub in test_pub_data]

In [5]:
topic_model = BERTopic(language="english", calculate_probabilities=True, verbose=True)
topics, probs = topic_model.fit_transform(train_docs)

2024-06-16 19:28:51,630 - BERTopic - Embedding - Transforming documents to embeddings.
Batches: 100%|██████████| 22/22 [00:03<00:00,  6.68it/s]
2024-06-16 19:28:57,327 - BERTopic - Embedding - Completed ✓
2024-06-16 19:28:57,327 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm




2024-06-16 19:29:02,407 - BERTopic - Dimensionality - Completed ✓
2024-06-16 19:29:02,408 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-06-16 19:29:02,446 - BERTopic - Cluster - Completed ✓
2024-06-16 19:29:02,449 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-06-16 19:29:02,475 - BERTopic - Representation - Completed ✓


In [7]:
project_abstracts = [project[0] for project in train_project_data.values()] + [project[0] for project in test_project_data.values()]
pub_abstracts = [pub[1] for pub in train_pub_data] + [pub[1] for pub in test_pub_data]

project_matrix = topic_model.approximate_distribution(project_abstracts)[0]

pub_matrix = topic_model.approximate_distribution(pub_abstracts)[0]

100%|██████████| 1/1 [00:00<00:00, 64.04it/s]
100%|██████████| 1/1 [00:00<00:00, 26.97it/s]


In [8]:
distance_matrix = np.zeros((len(project_matrix), len(pub_matrix)))

for i in range(len(project_matrix)):
    for j in range(len(pub_matrix)):
        distance_matrix[i, j] = 1 - cosine_similarity([project_matrix[i]], [pub_matrix[j]])[0][0]

distance_matrix.shape

(225, 635)

In [11]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
# create a dataframe with row indices as project ids and column indices as publication ids
# if publication is in project, then 1, else 0
# if publication date is before project start date, then 0, else 1
# ratio of intersection of organisations from project and publication to organisations from publication
# ratio of intersection of authors from project and publication to authors from publication

all_data = {**train_data, **test_data}
pub_data = []
for pro_id in all_data.keys():
    for pub_id in all_data[pro_id]['publications'].keys():
        pub_data.append((pub_id, all_data[pro_id]['publications'][pub_id]))

data_matrix = [[0.] * len(pub_data) for _ in range(len(all_data))]
for i, pro_id in enumerate(all_data.keys()):
    for j, pub_id in enumerate([pub[0] for pub in pub_data]):
        belongs = 1 if pub_id in all_data[pro_id]['publications'].keys() else 0
        distance = distance_matrix[i, j]
        data_matrix[i][j] = [belongs, distance]

df = pd.DataFrame(distance_matrix, index=all_data.keys(), columns=[pub[0] for pub in pub_data])
df.to_csv('distance_bert_dim_title.csv')
df.head()

Unnamed: 0,10.3389/fmars.2023.1254461,10.1007/978-3-031-49133-7_8,10.1007/978-3-031-43619-2_15,10.1080/11663081.2023.2246863,10.1177/03057356231165327,10.1016/j.jconrel.2024.05.037,10.1007/s13346-024-01618-6,10.1002/smll.202303682,10.1371/journal.pone.0298895,10.1007/s00334-024-00992-y,10.1038/s41598-023-44445-4,10.1101/2023.09.15.557939,10.1016/j.jasrep.2023.104204,10.1016/j.palaeo.2023.111655,10.1007/jhep05(2024)154,10.1140/epjc/s10052-024-12711-y,10.1140/epjc/s10052-024-12416-2,10.1140/epjc/s10052-024-12415-3,10.1007/jhep02(2024)124,10.1007/jhep11(2023)103,10.1007/jhep06(2023)086,10.1093/iob/obad026,10.1007/s00359-023-01629-7,10.1101/2023.03.11.531676,10.1093/mnras/stae997,10.1093/mnras/stae616,10.1051/0004-6361/202348958,10.1093/mnras/stad3163,10.1093/mnras/stad2183,10.1101/2024.01.14.574481,10.1007/s40313-024-01095-9,10.1109/tpwrs.2023.3321700,10.1109/tits.2024.3392914,10.1007/978-981-97-0989-2_24,10.1109/tsc.2023.3332701,10.1007/978-3-031-48424-7_15,10.1007/978-3-031-48424-7_13,10.1145/3631353,10.1145/3629517,10.1109/icdcs57875.2023.00111,10.1109/icws60048.2023.00056,10.1109/icws60048.2023.00031,10.1109/icws60048.2023.00068,10.1109/tsc.2023.3307143,10.1145/3600232,10.1109/mic.2023.3267266,10.1109/tsc.2023.3241975,10.1109/tmc.2022.3230856,10.1007/978-3-031-20984-0_9,10.1109/tsc.2022.3160469,10.3389/fneur.2023.1182561,10.1038/s41598-024-57498-w,10.1016/j.jmrt.2023.11.014,10.1016/j.jmst.2023.05.020,10.1016/j.fuel.2023.130187,10.1017/9781009297349,10.1007/s11229-023-04282-4,10.3390/rel14060758,10.1002/smsc.202400096,10.1016/j.actpsy.2024.104252,10.1080/02691728.2024.2326828,10.5964/ejop.12031,10.1111/jopy.12805,10.1109/lsp.2023.3277792,10.21203/rs.3.rs-3644422/v1,10.1016/j.renene.2024.120101,10.1093/rheumatology/kead301,10.1097/mlr.0000000000002000,10.1111/tmi.13827,10.1063/5.0167167,10.1021/acsami.2c19376,10.1021/acsanm.2c05047,10.1101/2024.05.21.595254,10.1038/s41467-024-47964-4,10.1093/jb/mvae021,10.1016/j.neuron.2024.01.014,10.1016/j.isci.2023.107385,10.1016/j.peptides.2023.171051,10.3389/fphar.2023.1138666,10.3389/fphar.2023.1098976,10.1186/s43058-023-00521-4,10.1177/26334895231199467,10.1002/adom.202303199,10.1002/adom.202301232,10.1016/j.energy.2024.130322,10.1126/sciimmunol.abo4365,10.1515/sem-2023-0178,10.1364/boe.506664,10.1364/boe.494585,10.3847/1538-4357/ac4601,10.1371/journal.pone.0284416,10.1080/07350015.2023.2271039,10.1093/mnras/stae385,10.1093/mnras/stad2760,10.1002/ange.202320045,10.1002/anie.202320045,10.1016/j.chembiol.2024.02.012,10.1101/2023.11.29.569293,10.1038/s41467-023-41197-7,10.35772/ghm.2023.01065,10.3390/v15040896,10.1111/dar.13836,10.1063/5.0153862,10.1109/imoc57131.2023.10379741,10.1109/imoc57131.2023.10379729,10.1109/access.2023.3339563,10.1109/ojpel.2023.3339014,10.1109/csci58124.2022.00203,10.1016/j.ssci.2022.105967,10.1007/978-3-031-06015-1_8,10.1101/2023.01.19.524726,10.1007/s00382-023-07058-1,10.1002/oa.3284,10.1136/bmjopen-2023-080603,10.1101/2023.08.02.551649,10.1099/mgen.0.000965,10.2140/ant.2022.16.2005,10.1103/physrevlett.132.103402,10.21468/scipostphys.15.2.068,10.1080/02606755.2023.2213550,10.3390/rs15204928,10.1063/5.0203603,10.1103/physrevapplied.21.054027,10.1109/icecom58258.2023.10367924,10.1109/icecom58258.2023.10367934,10.1109/icecom58258.2023.10367938,10.1109/icecom58258.2023.10367923,10.1109/metamaterials58257.2023.10289546,10.23919/ursigass57860.2023.10265525,10.1109/elmar59410.2023.10253914,10.1002/pen.26771,10.1016/j.canep.2023.102451,10.3390/biomedicines11051344,10.3390/pharmaceutics15010181,10.1037/fam0001189,10.3390/su15129371,10.1038/s44221-024-00208-7,10.1093/evolut/qpae036,10.1093/evolut/qpae024,10.3390/micro4010008,10.3390/molecules28196822,10.1007/s11673-023-10285-0,10.1021/acsnano.3c09776,10.1101/2023.09.08.556607,10.1016/j.snb.2023.133733,10.1016/j.ocecoaman.2023.106961,10.1016/j.tre.2023.103169,10.1016/j.elerap.2023.101336,10.1590/0001-3765202320230351,10.1590/s1678-3921.pab2023.v58.03246,10.1038/s41598-024-56540-1,10.1364/ol.494369,10.1017/s0963180124000112,10.1136/jme-2023-109568,10.1111/japp.12655,10.1038/s41539-023-00170-1,10.1016/j.neubiorev.2023.105153,10.1007/s00158-023-03663-0,10.3390/ma16145076,10.1007/978-3-031-55260-1_15,10.1007/s11228-024-00719-1,10.1007/s00205-023-01948-8,10.1142/s0219199723500657,10.1007/s12220-023-01484-7,10.1007/s10957-023-02344-8,10.1007/978-3-031-46359-4_2,10.1007/s10208-023-09629-4,10.1007/s00245-023-10029-x,10.1007/s00028-023-00902-1,10.1016/j.jde.2023.06.004,10.1007/s00030-023-00857-9,10.1007/s11579-023-00333-z,10.1061/jmenea.meeng-5794,10.1177/21695067231194338,10.1038/s41597-024-03034-4,10.1039/d3ew00347g,10.1101/2024.05.17.24307550,10.1101/2024.01.09.24301073,10.1038/s41598-023-41853-4,10.1111/sjos.12685,10.1038/s41593-023-01321-8,10.1016/j.jclinepi.2023.02.020,10.1371/journal.pone.0280192,10.1016/j.jbi.2022.104243,10.1016/j.eclinm.2022.101724,10.38107/037,10.1002/adfm.202314696,10.1063/5.0168129,10.1103/physrevmaterials.7.064202,10.1038/s41699-023-00404-1,10.1061/jsendh.steng-12655,10.1186/s40623-023-01940-2,10.1029/2022ja031231,10.1029/2023ja031663,10.1029/2022gl101465,10.21203/rs.3.rs-4345687/v1,10.1093/toxsci/kfae021,10.1101/2023.08.28.555101,10.1103/physrevb.109.205125,10.1111/cgf.14942,10.1109/tpami.2022.3163720,10.1016/j.csl.2024.101623,10.1039/d0sc01737j,10.1186/s12889-023-17570-y,10.1080/13218719.2023.2243303,10.5694/mja2.52089,10.1177/00938548231170799,10.1007/s11150-024-09710-z,10.1016/j.jbankfin.2023.107047,10.1016/j.euroecorev.2023.104632,10.1038/s41598-023-30724-7,10.1038/s41598-023-29536-6,10.1111/obes.12545,10.1016/j.jebo.2022.11.027,10.3389/fimmu.2023.1060258,10.1016/j.jhydrol.2024.130983,10.1101/2024.03.18.585583,10.1016/j.celrep.2024.113675,10.1103/physrevx.14.021017,10.1038/s41559-024-02331-w,10.1016/j.gloplacha.2024.104354,10.1038/s41467-023-41685-w,10.1126/sciadv.adg7679,10.1144/sp532-2022-1,10.1523/jneurosci.0226-23.2023,10.1101/2023.01.11.523491,10.1073/pnas.2309379120,10.1073/pnas.2301947120,10.1038/s41467-024-48673-8,10.1038/s41467-024-47953-7,10.1016/j.fsigen.2024.103048,10.1093/bib/bbad163,10.1101/2023.12.12.570420,10.1016/j.celrep.2023.113331,10.1103/physrevd.108.105013,10.1007/jhep11(2023)005,10.1101/2024.01.29.574039,10.1126/scitranslmed.adi9711,10.3390/cancers15143723,10.1021/acsnano.3c11337,10.1038/s41467-023-36133-8,10.1016/j.cobme.2024.100547,10.1002/ange.202319567,10.1002/anie.202319567,10.1016/j.jfca.2023.105901,10.1016/j.foodcont.2023.109969,10.1016/j.respol.2024.104954,10.1371/journal.pbio.3002513,10.1109/qce57702.2023.00111,10.1109/iccad57390.2023.10323863,10.1016/j.ic.2023.105077,10.1145/3582016.3582039,10.1109/tcad.2022.3179223,10.1145/3517145,10.1016/j.tcs.2022.02.017,10.1145/3514355,10.1145/3514239,10.1038/s41598-024-51799-w,10.3390/cells12242815,10.5771/9783748935544,10.1016/j.eswa.2024.124100,10.1016/j.frl.2024.105308,10.12688/openreseurope.16436.1,10.12688/openreseurope.16278.1,10.12688/openreseurope.15386.1,10.3390/foods12244484,10.1057/s42984-024-00090-3,10.1002/anie.202315985,10.1002/ange.202315985,10.1161/atvbaha.123.319821,10.1093/cvr/cvad149,10.1161/strokeaha.123.042714,10.1016/j.jtha.2023.04.002,10.1182/bloodadvances.2023010100,10.3389/fphar.2024.1379236,10.1101/2023.07.03.547532,10.1103/physreve.107.l052602,10.1063/5.0133871,10.1109/icnwc57852.2023.10127328,10.1016/j.atech.2023.100265,10.1016/j.cct.2023.107387,10.1155/2023/2397564,10.1007/s10502-024-09441-1,10.1111/gcb.16637,10.3390/ma16051978,10.21203/rs.3.rs-4271873/v1,10.1038/s41419-024-06693-8,10.1038/s41598-024-56928-z,10.1016/j.chemgeo.2024.122157,10.1016/j.chemgeo.2022.121064,10.2343/geochemj.gj22004,10.1016/j.jwpe.2023.104395,10.1016/j.jece.2023.111107,10.1080/02614367.2022.2162109,10.1109/twc.2024.3400601,10.1109/globecom54140.2023.10437972,10.1109/twc.2023.3344802,10.1109/tifs.2023.3268443,10.1093/ptep/ptae021,10.1021/accountsmr.4c00071,10.26434/chemrxiv-2024-hvxml,10.1039/d4nr01024h,10.1038/s42004-024-01143-0,10.1039/d3dd00236e,10.1039/d3tc03606e,10.1021/acs.chemmater.3c01744,10.1021/acs.inorgchem.3c02337,10.1063/5.0144827,10.1002/anie.202314378,10.1002/ange.202314378,10.1039/d3ta04707e,10.1039/d3ce00881a,10.1002/asia.202300673,10.1038/s41467-023-38737-6,10.26434/chemrxiv-2023-d1s8w,10.1002/adma.202211478,10.1145/3589334.3645329,10.1029/2023gl104396,10.20396/cel.v65i00.8673331,10.1016/j.procs.2023.08.230,10.1371/journal.pbio.3001984,10.1101/2022.04.22.489138,10.1002/brb3.3490,10.1016/j.clinph.2024.04.002,10.1111/ejn.16001,10.3389/fneur.2023.1177589,10.1016/j.clinph.2022.08.015,10.1101/2024.05.23.24307833,10.1093/ofid/ofad655,10.1093/ofid/ofae018,10.1371/journal.pone.0297775,10.21203/rs.3.rs-2640564/v3,10.1002/saj2.20688,10.1071/sr23164,10.1029/2023gl104852,10.1038/s41598-022-10957-8,10.1186/s40623-022-01584-8,10.1111/nph.19336,10.3389/fpsyg.2022.1014713,10.3390/v15102029,10.3389/fnut.2023.1215768,10.3390/educsci13080838,10.12688/routledgeopenres.17798.2,10.12688/routledgeopenres.17798.1,10.1080/00031305.2023.2232006,10.1080/0969725x.2023.2270357,10.1177/00345237231183343,10.1016/j.ijheatmasstransfer.2023.124665,10.1039/d3qm01106b,10.1016/j.mechmachtheory.2023.105508,10.1016/j.scib.2024.02.010,10.1088/1674-4527/ace179,10.1126/science.abl7759,10.1016/j.scitotenv.2024.172840,10.1016/j.envint.2024.108601,10.1016/j.envint.2024.108454,10.1016/j.envint.2023.108375,10.1016/j.envres.2023.117611,10.1016/j.envres.2023.117832,10.2337/dc23-0327,10.1021/acs.est.3c02765,10.1021/acs.est.3c00848,10.1007/s40726-023-00269-4,10.1021/acs.analchem.3c00376,10.1289/ehp11372,10.1101/2023.02.09.527886,10.3390/make6020035,10.1016/j.micromeso.2024.113042,10.3390/make6010014,10.1016/j.compag.2024.108650,10.1038/s41598-023-34320-7,10.1016/j.compag.2022.107325,10.1145/3615894.3628507,10.1016/j.scs.2023.104435,10.1093/jipm/pmac028,10.1145/3651171,10.1016/j.clsr.2024.105934,10.1145/3594739.3605113,10.1093/lambio/ovae041,10.1007/s12155-024-10734-7,10.1016/j.carres.2024.109068,10.1007/s00253-023-12908-3,10.1007/s00289-024-05152-w,10.3390/polysaccharides4020013,10.3389/fagro.2023.1148969,10.1038/s41586-024-07473-2,10.1101/2023.11.30.569198,10.1186/s13015-024-00254-6,10.1007/978-1-0716-3989-4_3,10.4230/lipics.wabi.2023.17,10.1186/s13015-024-00261-7,10.1101/2023.11.22.568259,10.1093/gbe/evad205,10.1101/gr.277679.123,10.1101/2023.06.21.545862,10.1145/3571723,10.1101/2023.05.12.540616,10.1101/2023.03.02.530874,10.1101/2023.01.30.526175,10.1101/2022.12.23.521809,10.1038/s41390-023-02992-6,10.1001/jama.2023.20358,10.1542/hpeds.2023-007246,10.1016/j.jpeds.2023.113499,10.1093/jamia/ocad053,10.1093/jamia/ocac251,10.1038/s41598-024-54655-z,10.1172/jci175031,10.3389/fgene.2023.1304425,10.1186/s13578-023-01102-7,10.1016/j.gendis.2022.11.003,10.1002/mp.16880,10.1002/mp.16352,10.1016/j.scitotenv.2022.158267,10.1016/j.foreco.2022.120301,10.1016/j.ecolind.2022.109004,10.1073/pnas.2318333121,10.1063/5.0158930,10.1145/3588432.3591521,10.1063/5.0142166,10.1038/s41467-024-46494-3,10.1128/mbio.02479-23,10.1101/2023.09.08.556855,10.1101/2023.08.31.555785,10.1186/s40168-022-01378-w,10.1101/2023.12.18.572279,10.1093/nar/gkad1164,10.1101/2023.12.03.569795,10.1101/2023.07.26.550759,10.1186/s12864-023-09265-w,10.1021/acsanm.3c05701,10.1016/j.apsusc.2023.158316,10.1080/03057240.2024.2333577,10.3390/rel14121517,10.3390/rel14111427,10.1109/icops45740.2023.10480948,10.1109/icops45740.2023.10481052,10.1109/icops45740.2023.10481130,10.1038/s41598-024-51298-y,10.1109/ppc47928.2023.10311041,10.1109/ppc47928.2023.10310918,10.1049/hve2.12382,10.21203/rs.3.rs-3143506/v1,10.4271/03-16-08-0061,10.1080/02560046.2024.2316302,10.1177/01634437231219141,10.1177/16094069231182015,10.1177/00113921231182182,10.1002/cjce.25286,10.1080/07373937.2024.2318437,10.1016/j.etap.2024.104382,10.1080/19440049.2023.2283769,10.3390/ani13152499,10.1016/j.jet.2024.105798,10.1038/s41467-023-41107-x,10.1016/j.oneear.2023.05.006,10.1088/1748-9326/acd5e8,10.1088/1748-9326/aca815,10.1145/3617232.3624856,10.1109/cloudcom59040.2023.00047,10.1007/s11761-024-00391-1,10.1109/percomworkshops56833.2023.10150370,10.1016/j.comcom.2023.04.027,10.1109/apsec57359.2022.00034,10.1109/cloudcom55334.2022.00016,10.1371/journal.pone.0285434,10.1186/s13578-023-01012-8,10.1161/strokeaha.122.041651,10.1080/15534510.2023.2279662,10.1111/nana.12994,10.1080/14680777.2023.2258298,10.1038/s41598-023-35252-y,10.1177/10596011231193176,10.1016/j.jhin.2023.03.002,10.1177/10596011221150756,10.1080/08039488.2024.2322495,10.1007/s00787-024-02406-w,10.1142/s0219877024500275,10.1155/2023/2516746,10.1080/08039488.2023.2222698,10.3389/fpsyg.2023.1183704,10.1111/acer.15119,10.1080/20008066.2023.2191396,10.1007/s00787-022-02107-2,10.1007/s11218-022-09734-2,10.1016/j.rssm.2023.100876,10.3390/f15020354,10.1007/s11629-023-8914-3,10.14746/quageo-2023-0016,10.18146/view.297,10.18146/tmg.835,10.5117/9789463727815,10.1177/08912424231163485,10.1007/978-94-6265-515-7,10.1038/s41467-023-41057-4,10.1093/gigascience/giad047,10.1038/s41592-023-01886-z,10.1093/nar/gkad289,10.1101/2023.01.21.525030,10.1016/j.gpb.2022.09.011,10.1021/acs.jpcb.4c01987,10.1021/acs.jctc.3c00663,10.1021/acs.jpcb.3c04473,10.26434/chemrxiv-2023-w0h1p,10.1073/pnas.2305899120,10.1101/2023.05.01.538927,10.1101/2023.12.22.23300430,10.1002/agt2.386,10.1002/adfm.202214179,10.1109/milcom58377.2023.10356332,10.1101/2024.05.03.24306699,10.1101/2024.04.16.24305925,10.1101/2023.10.25.23297571,10.3389/fphys.2023.1202186,10.1103/physrevb.109.205301,10.1103/physrevb.108.195303,10.1523/eneuro.0197-23.2023,10.3390/v15091897,10.1016/j.prdoa.2024.100251,10.3390/ijms24108995,10.3390/ijms24032247,10.1016/j.isci.2023.108171,10.1158/2326-6066.cir-23-0496,10.1101/2023.12.07.570715,10.1016/j.cell.2023.11.008,10.1093/abt/tbad025,10.1016/j.celrep.2023.113280,10.1101/2023.06.18.545488,10.3390/cancers15102788,10.1101/2023.03.07.531525,10.1080/13543784.2023.2179482,10.1016/j.bbrc.2022.09.019,10.3389/fimmu.2022.996026,10.1111/liv.15927,10.1101/2023.11.17.23298672,10.1111/liv.15665,10.1016/j.cgh.2023.04.016,10.1016/j.jhepr.2022.100667,10.1007/s11914-023-00843-1,10.1007/s11914-023-00842-2,10.1007/s11914-023-00855-x,10.3390/rs15235545,10.1029/2022gl102689,10.3390/rs14236069,10.1038/s41467-024-46327-3,10.1002/cam4.6761,10.3389/fonc.2023.1100585,10.1109/vtc2023-fall60731.2023.10333522,10.1128/jvi.00100-23,10.1021/acs.jpcc.3c05477,10.1021/acsaem.3c01370,10.3389/fnins.2023.1217993,10.1016/j.visres.2023.108296,10.1016/j.jad.2023.07.014,10.1001/jamanetworkopen.2023.17905,10.1007/s00371-024-03454-8,10.1109/smc53992.2023.10394105,10.1007/s00521-023-09367-2,10.1145/3582700.3582707,10.1007/s10803-024-06302-9,10.1016/j.psychres.2024.115838,10.1016/j.rasd.2024.102339,10.1177/13623613231213543,10.1007/s10803-023-06039-x,10.1016/j.jplph.2023.154138,10.1016/j.freeradbiomed.2024.04.217,10.1016/j.freeradbiomed.2023.06.021,10.1093/jncimonographs/lgad014,10.1038/s41467-023-37269-3,10.1111/liv.15893,10.1101/2023.04.25.538102,10.1063/5.0142200,10.1016/b978-0-323-95124-1.00017-6,10.1101/2023.05.17.541187,10.1101/2023.05.17.541174,10.1007/s00415-024-12430-0,10.1093/ntr/ntae084,10.1093/schbul/sbae024,10.1038/s41386-024-01842-1,10.21203/rs.3.rs-3168186/v1,10.1101/2023.08.21.23294348,10.1038/s41380-023-02165-1,10.1136/bmjopen-2022-066249,10.1038/s41585-024-00869-9,10.1158/0008-5472.can-23-0555,10.1101/2022.11.02.514910,10.3390/s24072335,10.1109/sensors56945.2023.10324963,10.1038/s41528-023-00258-z,10.3390/bios12050322,10.1371/journal.pone.0288803,10.3389/fpubh.2023.1219676,10.1016/j.soard.2023.09.026,10.3389/fpsyg.2023.1106571,10.2196/46415,10.1101/2023.04.20.23288698,10.1111/joes.12620,10.1101/2024.04.22.24306094,10.1007/s00401-024-02721-1,10.1101/2024.02.28.582621,10.1016/j.neuron.2024.01.008,10.1002/alz.13576,10.1001/jamaneurol.2023.3599,10.1186/s40478-023-01626-6,10.1111/acel.13938,10.1101/2023.07.20.23292771,10.1101/2023.07.06.23292312,10.1101/2023.04.21.23288938,10.1186/s13024-022-00590-4,10.1186/s13058-024-01777-x,10.1117/1.jmi.10.5.054503,10.1002/mp.17040,10.1016/j.brat.2024.104494,10.1176/appi.ajp.20230272,10.1001/jamapsychiatry.2023.5138,10.1016/j.addbeh.2023.107911,10.1016/j.explore.2023.11.001,10.1038/s44220-023-00084-2,10.1007/s40429-023-00501-7,10.1016/j.drugalcdep.2023.109890,10.1145/3631461.3631550,10.1016/j.jcss.2023.02.004,10.1080/15384101.2021.2023304,10.3390/ijms241713272,10.1016/j.jval.2024.01.020,10.1038/s41366-023-01396-0,10.1021/acs.jpcc.3c04680,10.1021/acs.jpcc.4c01814,10.4025/actascilangcult.v45i2.67723,10.1007/s10915-023-02353-9,10.1007/s42967-023-00287-5,10.1007/s10543-023-00954-2,10.1128/jvi.00398-22,10.1093/genetics/iyad216,10.1101/2023.10.17.562696
9852805,0.18304,1.0,1.0,1.0,1.0,1.0,0.578558,1.0,0.002003,1.0,1.0,1.0,1.0,1.0,0.0,0.005932,0.031721,0.041795,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.189134,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.862298,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.839794,1.0,1.0,1.0,1.0,0.41178,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.720063,1.0,1.0,1.0,1.0,0.2628,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.024808,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.958479,1.0,1.0,1.0,1.0,1.0,0.0,0.026933,1.0,1.0,1.0,1.0,1.0,1.0,0.361941,1.0,1.0,1.0,1.0,0.72822,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.559965,1.0,1.0,0.0,0.114089,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.495507,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.74273,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.677209,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.851777,1.0,1.0,0.722752,1.0,1.0,0.775327,1.0,1.0,1.0,1.0,1.0,0.064848,1.0,1.0,1.0,0.0,0.212334,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.002653,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.644091,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.82401,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.663191,1.0,0.473082,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.371788,1.0,1.0,0.794493,1.0,0.866216,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.774252,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.585339,1.0,1.0,1.0,1.0,1.0,0.941629,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.888686,1.0,1.0,1.0,1.0,1.0,1.0,0.046197,1.0,1.0,1.0,0.919727,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.46988,1.0,0.691049,1.0,1.0,0.964955,0.900226,1.0,1.0,1.0,0.79422,1.0,1.0,1.0,0.861779,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.859181,1.0,1.0,1.0,1.0,1.0,1.0,0.704838,1.0,0.666003,1.0,1.0,1.0,1.0,1.0,0.699469,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.334718,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.864522,1.0,1.0,1.0,1.0,1.0,1.0,0.871405,1.0,0.710479,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.908845,1.0,0.643826,0.636778,1.0,0.685452,1.0,0.687222,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.637248,1.0,0.687222,1.0,1.0,0.611779,0.77737,0.640163,1.0,0.635022,1.0,1.0,0.696762,1.0,1.0,1.0,0.20433,0.56469,0.607473,0.278527,0.476196,0.057919,1.0,1.0,1.0,1.0,0.746188,1.0,1.0,1.0,0.723547,1.0,1.0,1.0,1.0,1.0,0.727228,1.0,1.0,0.857256,0.50453,0.635022,1.0,0.650498,0.694075,1.0,1.0,1.0,1.0,0.47,1.0,0.744526,0.763876,1.0,0.75388,0.75388,1.0,1.0,0.495393,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.523874,1.0,1.0,1.0,1.0,0.701563,1.0,0.812952,1.0,0.658031,0.726516,1.0,0.812952,0.663144,0.726516,1.0,1.0,0.801702,1.0,0.849996,1.0,0.827167,1.0,1.0,0.669526,0.677358,0.687222,1.0,1.0,1.0,1.0,1.0,0.315663,1.0,0.709515,1.0,1.0,1.0,0.823515,0.603985,0.724322,0.719421
13062422,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
13528070,0.897364,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.391084,1.0,1.0,1.0,0.705617,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.451574,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.883294,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.452141,1.0,1.0,1.0,1.0,0.764884,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.88757,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.924297,0.005325,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.834617,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.860295,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.804092,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.568551,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.560579,1.0,1.0,0.852516,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.837989,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.619812,1.0,0.663131,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.009599,0.0,0.00356,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.684292,1.0,0.743546,1.0,1.0,1.0,1.0,1.0,0.156814,0.00182,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.902773,0.0,0.010022,0.0,1.0,0.0,1.0,1.0,1.0,0.754623,1.0,1.0,1.0,0.833806,1.0,1.0,1.0,1.0,1.0,0.789278,0.917767,1.0,0.862445,0.662063,1.0,0.892848,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.692558,1.0,1.0,1.0,1.0,0.670972,1.0,0.324882,1.0,0.728601,1.0,1.0,0.740193,1.0,1.0,1.0,0.791537,0.687595,0.854365,0.785997,0.627358,1.0,1.0,1.0,1.0,1.0,0.882516,1.0,0.0,1.0,0.794428,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.876762,0.50519,0.728601,1.0,0.764292,0.770996,1.0,1.0,1.0,1.0,0.65776,1.0,1.0,0.795729,1.0,0.667978,0.667978,1.0,1.0,0.631567,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.636377,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.710171,0.768215,1.0,1.0,1.0,0.768215,1.0,1.0,0.913867,1.0,1.0,0.0,1.0,1.0,1.0,0.719914,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.902855,0.808921,0.875171,0.872952
13057717,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
13308163,0.18304,1.0,1.0,1.0,1.0,1.0,0.578558,1.0,0.002003,1.0,1.0,1.0,1.0,1.0,0.0,0.005932,0.031721,0.041795,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.189134,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.862298,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.839794,1.0,1.0,1.0,1.0,0.41178,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.720063,1.0,1.0,1.0,1.0,0.2628,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.024808,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.958479,1.0,1.0,1.0,1.0,1.0,0.0,0.026933,1.0,1.0,1.0,1.0,1.0,1.0,0.361941,1.0,1.0,1.0,1.0,0.72822,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.559965,1.0,1.0,0.0,0.114089,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.495507,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.74273,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.677209,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.851777,1.0,1.0,0.722752,1.0,1.0,0.775327,1.0,1.0,1.0,1.0,1.0,0.064848,1.0,1.0,1.0,0.0,0.212334,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.002653,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.644091,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.82401,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.663191,1.0,0.473082,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.371788,1.0,1.0,0.794493,1.0,0.866216,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.774252,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.585339,1.0,1.0,1.0,1.0,1.0,0.941629,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.888686,1.0,1.0,1.0,1.0,1.0,1.0,0.046197,1.0,1.0,1.0,0.919727,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.46988,1.0,0.691049,1.0,1.0,0.964955,0.900226,1.0,1.0,1.0,0.79422,1.0,1.0,1.0,0.861779,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.859181,1.0,1.0,1.0,1.0,1.0,1.0,0.704838,1.0,0.666003,1.0,1.0,1.0,1.0,1.0,0.699469,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.334718,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.864522,1.0,1.0,1.0,1.0,1.0,1.0,0.871405,1.0,0.710479,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.908845,1.0,0.643826,0.636778,1.0,0.685452,1.0,0.687222,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.637248,1.0,0.687222,1.0,1.0,0.611779,0.77737,0.640163,1.0,0.635022,1.0,1.0,0.696762,1.0,1.0,1.0,0.20433,0.56469,0.607473,0.278527,0.476196,0.057919,1.0,1.0,1.0,1.0,0.746188,1.0,1.0,1.0,0.723547,1.0,1.0,1.0,1.0,1.0,0.727228,1.0,1.0,0.857256,0.50453,0.635022,1.0,0.650498,0.694075,1.0,1.0,1.0,1.0,0.47,1.0,0.744526,0.763876,1.0,0.75388,0.75388,1.0,1.0,0.495393,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.523874,1.0,1.0,1.0,1.0,0.701563,1.0,0.812952,1.0,0.658031,0.726516,1.0,0.812952,0.663144,0.726516,1.0,1.0,0.801702,1.0,0.849996,1.0,0.827167,1.0,1.0,0.669526,0.677358,0.687222,1.0,1.0,1.0,1.0,1.0,0.315663,1.0,0.709515,1.0,1.0,1.0,0.823515,0.603985,0.724322,0.719421


In [12]:
data_matrix = np.array(data_matrix)

gb_raw_shape = data_matrix.shape
print(gb_raw_shape)

gb_data = []
for i in range(gb_raw_shape[1]):
    gb_data.extend(data_matrix[:, i, :])
gb_data = np.array(gb_data)
print(gb_data.shape)

project_numbers = np.array(list(range(1, gb_raw_shape[0] + 1)) * gb_raw_shape[1])

project_numbers = np.array(list(range(1, gb_raw_shape[0] + 1)) * gb_raw_shape[1])

# get the projects for lowest predictions for each publication with distance matrix
for num_top in range(10, 0, -1):
    project_numbers_pred = []
    for i in range(len(train_pub_data), distance_matrix.shape[1]):
        project_numbers_pred.append(list(np.argsort(distance_matrix[:, i] + 1))[:num_top])

    project_lists = []
    for i in range(0, gb_data.shape[0], gb_raw_shape[0]):
        pub_belongs = gb_data[i:i + gb_raw_shape[0], 0] * project_numbers[i:i + gb_raw_shape[0]]
        project = sorted(list(pub_belongs[pub_belongs != 0].astype(int)), reverse=True)
        project_lists.append(project)

    project_lists_test = project_lists[len(train_pub_data):]

    # calculate accuracy
    # use y_test as mask for project_numbers_test
    accuracy_list = []
    for i, project_list in enumerate(list(project_lists_test)):
        ratio = len(set(project_numbers_pred[i]) & set(project_list)) / len(set(project_list))
        accuracy_list.append(ratio)

    accuracy = sum(accuracy_list) / len(accuracy_list)
    print(accuracy)

(225, 635, 2)
(142875, 2)
0.03875968992248062
0.03875968992248062
0.03875968992248062
0.031007751937984496
0.023255813953488372
0.023255813953488372
0.023255813953488372
0.015503875968992248
0.007751937984496124
0.007751937984496124
