# Cosine Similarity

In [1]:
import nltk
import PyPDF2
import string
import math
from collections import defaultdict
from ordered_set import OrderedSet

In [2]:
documents_folder_name = fr"..\Documents"
documents_name = "Document"
documents_count = 20
all_doc_ids = list()

''' 
dictionary to store the total words in each documents
# keys document names and values are the total words are there in the document
'''
documents_words_num = defaultdict(int)

''' 
a dictionary to store the term frequencies (TF)
keys are words and values are defaultdict that keys are document name and
values are the word TF in the document
'''
tf = defaultdict(lambda: defaultdict(float))

''' 
a dictionary to store the Inverse Document Frequency (IDF)
keys are words and values are the word idf in the collection
'''
idf = defaultdict(float)

''' 
a dictionary to store the document frequencies (DF)
keys are words and values are the set of documents name that contain the word
'''
df = defaultdict(OrderedSet)

''' 
a dictionary to store the TF-IDF scores
keys are words and values are defaultdict that keys are document name and
values are the word TF_IDF in the document
'''
tfidf = defaultdict(lambda: defaultdict(float))

In [3]:
for i in range(documents_count):
    doc_name = f"{documents_name}{i + 1}"
    doc_address = fr"{documents_folder_name}\{doc_name}.pdf"
    all_doc_ids.append(doc_name)

    number_of_words_in_doc = 0
    with open(doc_address, "rb") as pdf_file:
        read_pdf = PyPDF2.PdfReader(pdf_file)
        number_of_pages = len(read_pdf.pages)
        for page_num in range(number_of_pages):
            page = read_pdf.pages[page_num]
            page_content = page.extract_text()

            lower_text = page_content.lower()
            text_without_punctuation_marks = lower_text.translate(str.maketrans('', '', string.punctuation))

            tokens = nltk.word_tokenize(text_without_punctuation_marks)
            
            for term in tokens:
                number_of_words_in_doc += 1
                tf[term][doc_name] += 1
                df[term].append(doc_name)
                
    documents_words_num[doc_name] = number_of_words_in_doc

In [4]:
for iden, words in documents_words_num.items():
    print(iden, words)

Document1 7621
Document2 1394
Document3 7380
Document4 4005
Document5 3434
Document6 4474
Document7 3351
Document8 4009
Document9 7129
Document10 4107
Document11 10184
Document12 7497
Document13 58356
Document14 4468
Document15 7689
Document16 1883
Document17 3012
Document18 2986
Document19 6825
Document20 3643


In [5]:
# calculate tf and idf for each words
for term, doc_tf in tf.items():
    for doc_id, term_freq in doc_tf.items():
        # calculate tf 
        tf[term][doc_id] /= documents_words_num[doc_id]
        
        # calculate idf 
        idf[term] = math.log(documents_count / (len(df[term]) + 1))
        print(term, doc_id, tf[term][doc_id], idf[term])

efficient Document1 0.003017976643485107 0.7985076962177716
efficient Document3 0.00027100271002710027 0.7985076962177716
efficient Document4 0.00024968789013732833 0.7985076962177716
efficient Document5 0.0037856726849155504 0.7985076962177716
efficient Document8 0.0002494387627837366 0.7985076962177716
efficient Document13 0.00022277058057440537 0.7985076962177716
efficient Document14 0.00022381378692927484 0.7985076962177716
efficient Document15 0.00013005592404734037 0.7985076962177716
video Document1 0.012203122949744127 1.3862943611198906
video Document2 0.0014347202295552368 1.3862943611198906
video Document3 0.0008130081300813008 1.3862943611198906
video Document6 0.0049172999552972736 1.3862943611198906
object Document1 0.009841228185277523 1.0498221244986776
object Document3 0.0004065040650406504 1.0498221244986776
object Document6 0.00022351363433169424 1.0498221244986776
object Document8 0.0004988775255674732 1.0498221244986776
object Document9 0.00014027212792818068 1.0498

model Document13 0.0007711289327575571 0.0
model Document14 0.00693822739480752 0.0
model Document15 0.00013005592404734037 0.0
model Document16 0.011152416356877323 0.0
model Document17 0.0026560424966799467 0.0
model Document18 0.0056932350971198925 0.0
model Document19 0.007619047619047619 0.0
model Document20 0.0008234971177600879 0.0
temporal Document1 0.0015745965096444037 1.8971199848858813
temporal Document6 0.001117568171658471 1.8971199848858813
smoothness Document1 0.0005248655032148012 2.302585092994046
without Document1 0.0009185146306259021 0.22314355131420976
without Document2 0.0007173601147776184 0.22314355131420976
without Document3 0.0004065040650406504 0.22314355131420976
without Document4 0.0009987515605493133 0.22314355131420976
without Document5 0.00145602795573675 0.22314355131420976
without Document6 0.00022351363433169424 0.22314355131420976
without Document7 0.0008952551477170994 0.22314355131420976
without Document8 0.0002494387627837366 0.22314355131420976


keywords Document1 0.0001312163758037003 1.2039728043259361
keywords Document6 0.00022351363433169424 1.2039728043259361
keywords Document9 0.00014027212792818068 1.2039728043259361
keywords Document10 0.00024348672997321646 1.2039728043259361
keywords Document14 0.00022381378692927484 1.2039728043259361
·memory Document1 0.0001312163758037003 2.302585092994046
seg Document1 0.0006560818790185015 1.8971199848858813
seg Document6 0.003576218149307108 1.8971199848858813
mentation Document1 0.0005248655032148012 1.2039728043259361
mentation Document6 0.000894054537326777 1.2039728043259361
mentation Document7 0.0002984183825723665 1.2039728043259361
mentation Document13 3.427239701144698e-05 1.2039728043259361
mentation Document19 0.00014652014652014652 1.2039728043259361
·realtime Document1 0.0001312163758037003 2.302585092994046
1 Document1 0.0031491930192888073 -0.048790164169432056
1 Document2 0.0007173601147776184 -0.048790164169432056
1 Document3 0.0035230352303523035 -0.04879016416

terms Document15 0.0023410066328521262 0.43078291609245434
terms Document18 0.0003348961821835231 0.43078291609245434
terms Document20 0.0019214932747735384 0.43078291609245434
along Document1 0.0002624327516074006 0.5108256237659907
along Document3 0.0008130081300813008 0.5108256237659907
along Document4 0.00024968789013732833 0.5108256237659907
along Document5 0.0020384391380314504 0.5108256237659907
along Document9 0.0005610885117127227 0.5108256237659907
along Document10 0.00024348672997321646 0.5108256237659907
along Document11 0.0002945797329143755 0.5108256237659907
along Document12 0.00013338668800853674 0.5108256237659907
along Document13 0.00039413256563164027 0.5108256237659907
along Document15 0.00013005592404734037 0.5108256237659907
along Document17 0.00033200531208499334 0.5108256237659907
consumption Document1 0.0006560818790185015 1.6094379124341003
consumption Document9 0.00042081638378454203 1.6094379124341003
consumption Document10 0.0004869734599464329 1.6094379124

when Document9 0.0005610885117127227 0.05129329438755048
when Document10 0.0007304601899196494 0.05129329438755048
when Document11 0.0002945797329143755 0.05129329438755048
when Document12 0.0008003201280512205 0.05129329438755048
when Document13 0.0010624443073548565 0.05129329438755048
when Document14 0.00022381378692927484 0.05129329438755048
when Document15 0.0005202236961893615 0.05129329438755048
when Document16 0.002124269782262347 0.05129329438755048
when Document18 0.0006697923643670462 0.05129329438755048
when Document20 0.0002744990392533626 0.05129329438755048
processing Document1 0.0003936491274111009 0.6931471805599453
processing Document2 0.0007173601147776184 0.6931471805599453
processing Document3 0.0008130081300813008 0.6931471805599453
processing Document4 0.0009987515605493133 0.6931471805599453
processing Document5 0.0029120559114735 0.6931471805599453
processing Document6 0.0006705409029950827 0.6931471805599453
processing Document7 0.000596836765144733 0.69314718

transformers Document1 0.0009185146306259021 1.3862943611198906
transformers Document3 0.0005420054200542005 1.3862943611198906
transformers Document5 0.0023296447291788003 1.3862943611198906
transformers Document7 0.000596836765144733 1.3862943611198906
several Document1 0.0002624327516074006 0.5978370007556204
several Document3 0.0006775067750677507 0.5978370007556204
several Document8 0.0002494387627837366 0.5978370007556204
several Document9 0.00014027212792818068 0.5978370007556204
several Document13 0.0006511755432174926 0.5978370007556204
several Document14 0.0006714413607878246 0.5978370007556204
several Document15 0.00013005592404734037 0.5978370007556204
several Document17 0.00033200531208499334 0.5978370007556204
several Document18 0.0003348961821835231 0.5978370007556204
several Document19 0.00014652014652014652 0.5978370007556204
vision Document1 0.0005248655032148012 0.9162907318741551
vision Document2 0.0007173601147776184 0.9162907318741551
vision Document3 0.0013550135

across Document6 0.0004470272686633885 0.5108256237659907
across Document7 0.001193673530289466 0.5108256237659907
across Document8 0.001995510102269893 0.5108256237659907
across Document12 0.0009337068160597573 0.5108256237659907
across Document13 0.00013708958804578791 0.5108256237659907
across Document14 0.00022381378692927484 0.5108256237659907
across Document15 0.00013005592404734037 0.5108256237659907
across Document16 0.0005310674455655868 0.5108256237659907
lengths Document1 0.0001312163758037003 1.8971199848858813
lengths Document3 0.0004065040650406504 1.8971199848858813
sacrificing Document1 0.0001312163758037003 1.8971199848858813
sacrificing Document13 3.427239701144698e-05 1.8971199848858813
contributions Document1 0.0002624327516074006 0.7985076962177716
contributions Document3 0.00013550135501355014 0.7985076962177716
contributions Document6 0.00022351363433169424 0.7985076962177716
contributions Document7 0.0002984183825723665 0.7985076962177716
contributions Document1

works Document7 0.0002984183825723665 0.9162907318741551
works Document8 0.0002494387627837366 0.9162907318741551
works Document13 1.713619850572349e-05 0.9162907318741551
annotated Document1 0.0009185146306259021 1.3862943611198906
annotated Document2 0.0007173601147776184 1.3862943611198906
annotated Document6 0.0013410818059901655 1.3862943611198906
annotated Document7 0.0008952551477170994 1.3862943611198906
alignment Document1 0.0001312163758037003 1.8971199848858813
alignment Document7 0.0017905102954341987 1.8971199848858813
osmn Document1 0.0001312163758037003 2.302585092994046
58 Document1 0.0002624327516074006 1.2039728043259361
58 Document11 0.00019638648860958367 1.2039728043259361
58 Document13 5.140859551717047e-05 1.2039728043259361
58 Document15 0.00013005592404734037 1.2039728043259361
58 Document19 0.00029304029304029304 1.2039728043259361
utilizes Document1 0.0001312163758037003 1.3862943611198906
utilizes Document4 0.0004993757802746567 1.3862943611198906
utilizes D

its Document3 0.001084010840108401 -0.048790164169432056
its Document4 0.0032459425717852684 -0.048790164169432056
its Document5 0.0005824111822947001 -0.048790164169432056
its Document6 0.00022351363433169424 -0.048790164169432056
its Document7 0.000596836765144733 -0.048790164169432056
its Document8 0.001746071339486156 -0.048790164169432056
its Document9 0.0008416327675690841 -0.048790164169432056
its Document10 0.0014609203798392988 -0.048790164169432056
its Document11 0.00216025137470542 -0.048790164169432056
its Document12 0.0038682139522475656 -0.048790164169432056
its Document13 0.002450476386318459 -0.048790164169432056
its Document14 0.0020143240823634737 -0.048790164169432056
its Document15 0.0005202236961893615 -0.048790164169432056
its Document16 0.0005310674455655868 -0.048790164169432056
its Document17 0.00099601593625498 -0.048790164169432056
its Document18 0.0003348961821835231 -0.048790164169432056
its Document19 0.00014652014652014652 -0.048790164169432056
its Docume

new Document5 0.00029120559114735004 0.10536051565782635
new Document6 0.00022351363433169424 0.10536051565782635
new Document7 0.000596836765144733 0.10536051565782635
new Document8 0.0004988775255674732 0.10536051565782635
new Document9 0.00028054425585636136 0.10536051565782635
new Document10 0.00024348672997321646 0.10536051565782635
new Document11 0.0016692851531814612 0.10536051565782635
new Document12 0.0009337068160597573 0.10536051565782635
new Document13 0.0014908492699979437 0.10536051565782635
new Document14 0.0006714413607878246 0.10536051565782635
new Document17 0.00033200531208499334 0.10536051565782635
new Document18 0.0003348961821835231 0.10536051565782635
new Document19 0.00029304029304029304 0.10536051565782635
projected Document1 0.0006560818790185015 1.8971199848858813
projected Document17 0.00033200531208499334 1.8971199848858813
queries Document1 0.0005248655032148012 1.8971199848858813
queries Document3 0.0005420054200542005 1.8971199848858813
keys Document1 0.

47 Document1 0.0005248655032148012 0.9162907318741551
47 Document3 0.00027100271002710027 0.9162907318741551
47 Document12 0.00013338668800853674 0.9162907318741551
47 Document13 0.00013708958804578791 0.9162907318741551
47 Document14 0.00022381378692927484 0.9162907318741551
47 Document15 0.0007803355442840422 0.9162907318741551
47 Document19 0.00014652014652014652 0.9162907318741551
focal Document1 0.002624327516074006 2.302585092994046
modulation Document1 0.001312163758037003 2.302585092994046
57 Document1 0.0005248655032148012 1.2039728043259361
57 Document13 5.140859551717047e-05 1.2039728043259361
57 Document15 0.00013005592404734037 1.2039728043259361
57 Document18 0.0003348961821835231 1.2039728043259361
57 Document19 0.0005860805860805861 1.2039728043259361
choices Document1 0.0001312163758037003 1.8971199848858813
choices Document13 0.00015422578655151142 1.8971199848858813
bidirectional Document1 0.0001312163758037003 1.8971199848858813
bidirectional Document5 0.00058241118

superior Document8 0.0002494387627837366 1.3862943611198906
lev Document1 0.0001312163758037003 2.302585092994046
els Document1 0.0001312163758037003 1.6094379124341003
els Document3 0.00013550135501355014 1.6094379124341003
els Document6 0.00022351363433169424 1.6094379124341003
helps Document1 0.0001312163758037003 1.8971199848858813
helps Document13 8.568099252861745e-05 1.8971199848858813
facilitating Document1 0.0001312163758037003 1.8971199848858813
facilitating Document4 0.00024968789013732833 1.8971199848858813
situations Document1 0.0001312163758037003 1.6094379124341003
situations Document3 0.00027100271002710027 1.6094379124341003
situations Document13 8.568099252861745e-05 1.6094379124341003
interpret Document1 0.0001312163758037003 1.8971199848858813
interpret Document17 0.0006640106241699867 1.8971199848858813
show Document1 0.0007872982548222018 0.22314355131420976
show Document3 0.0012195121951219512 0.22314355131420976
show Document4 0.0004993757802746567 0.22314355131

stateoftheart Document7 0.0002984183825723665 0.7985076962177716
stateoftheart Document8 0.0002494387627837366 0.7985076962177716
stateoftheart Document17 0.00033200531208499334 0.7985076962177716
validation Document1 0.0007872982548222018 0.7985076962177716
validation Document3 0.00027100271002710027 0.7985076962177716
validation Document4 0.000749063670411985 0.7985076962177716
validation Document6 0.0004470272686633885 0.7985076962177716
validation Document7 0.0002984183825723665 0.7985076962177716
validation Document13 1.713619850572349e-05 0.7985076962177716
validation Document14 0.0006714413607878246 0.7985076962177716
validation Document18 0.0003348961821835231 0.7985076962177716
mb Document1 0.0001312163758037003 2.302585092994046
denotes Document1 0.0001312163758037003 1.2039728043259361
denotes Document8 0.0002494387627837366 1.2039728043259361
denotes Document12 0.00013338668800853674 1.2039728043259361
denotes Document17 0.00033200531208499334 1.2039728043259361
denotes Doc

highquality Document1 0.0001312163758037003 1.6094379124341003
highquality Document7 0.0002984183825723665 1.6094379124341003
highquality Document8 0.0002494387627837366 1.6094379124341003
interest Document1 0.0001312163758037003 0.7985076962177716
interest Document3 0.00013550135501355014 0.7985076962177716
interest Document10 0.0004869734599464329 0.7985076962177716
interest Document12 0.0002667733760170735 0.7985076962177716
interest Document13 0.0001884981835629584 0.7985076962177716
interest Document14 0.00022381378692927484 0.7985076962177716
interest Document15 0.00013005592404734037 0.7985076962177716
interest Document17 0.00033200531208499334 0.7985076962177716
accompanied Document1 0.0002624327516074006 1.8971199848858813
accompanied Document20 0.0002744990392533626 1.8971199848858813
pixelaccurate Document1 0.0001312163758037003 2.302585092994046
compare Document1 0.0003936491274111009 1.0498221244986776
compare Document4 0.00024968789013732833 1.0498221244986776
compare Doc

795 Document1 0.0001312163758037003 1.8971199848858813
795 Document19 0.00014652014652014652 1.8971199848858813
425 Document1 0.0001312163758037003 2.302585092994046
32 Document1 0.0003936491274111009 0.28768207245178085
32 Document3 0.000948509485094851 0.28768207245178085
32 Document4 0.00024968789013732833 0.28768207245178085
32 Document5 0.0011648223645894002 0.28768207245178085
32 Document6 0.0004470272686633885 0.28768207245178085
32 Document8 0.0002494387627837366 0.28768207245178085
32 Document9 0.0009819048954972647 0.28768207245178085
32 Document11 0.0002945797329143755 0.28768207245178085
32 Document12 0.00013338668800853674 0.28768207245178085
32 Document13 8.568099252861745e-05 0.28768207245178085
32 Document14 0.00022381378692927484 0.28768207245178085
32 Document15 0.001040447392378723 0.28768207245178085
32 Document17 0.0006640106241699867 0.28768207245178085
32 Document19 0.00014652014652014652 0.28768207245178085
746 Document1 0.0001312163758037003 2.302585092994046
3

d Document1 0.0009185146306259021 0.10536051565782635
d Document3 0.0024390243902439024 0.10536051565782635
d Document4 0.0022471910112359553 0.10536051565782635
d Document5 0.00145602795573675 0.10536051565782635
d Document6 0.0033527045149754136 0.10536051565782635
d Document7 0.0002984183825723665 0.10536051565782635
d Document9 0.0008416327675690841 0.10536051565782635
d Document10 0.00024348672997321646 0.10536051565782635
d Document11 0.001767478397486253 0.10536051565782635
d Document12 0.0008003201280512205 0.10536051565782635
d Document13 0.00015422578655151142 0.10536051565782635
d Document15 0.002210950708804786 0.10536051565782635
d Document16 0.0010621348911311736 0.10536051565782635
d Document17 0.0026560424966799467 0.10536051565782635
d Document18 0.002344273275284662 0.10536051565782635
d Document19 0.0019047619047619048 0.10536051565782635
d Document20 0.0016469942355201758 0.10536051565782635
van Document1 0.0006560818790185015 0.6931471805599453
van Document3 0.0001

state Document20 0.0041174855888004395 0.28768207245178085
art Document1 0.0001312163758037003 1.6094379124341003
art Document3 0.00027100271002710027 1.6094379124341003
art Document4 0.00024968789013732833 1.6094379124341003
arxiv170405519 Document1 0.0001312163758037003 2.302585092994046
lan Document1 0.0001312163758037003 1.8971199848858813
lan Document3 0.00013550135501355014 1.8971199848858813
tao Document1 0.0001312163758037003 1.6094379124341003
tao Document5 0.00029120559114735004 1.6094379124341003
tao Document8 0.0002494387627837366 1.6094379124341003
aaai Document1 0.0002624327516074006 2.302585092994046
li Document1 0.0009185146306259021 0.6931471805599453
li Document3 0.0016260162601626016 0.6931471805599453
li Document4 0.00024968789013732833 0.6931471805599453
li Document5 0.0005824111822947001 0.6931471805599453
li Document7 0.0008952551477170994 0.6931471805599453
li Document13 0.0001713619850572349 0.6931471805599453
li Document15 0.00013005592404734037 0.693147180559

ak Document1 0.0001312163758037003 2.302585092994046
collaborative Document1 0.0001312163758037003 1.8971199848858813
collaborative Document13 3.427239701144698e-05 1.8971199848858813
foregroundbackground Document1 0.0002624327516074006 2.302585092994046
integration Document1 0.0002624327516074006 1.0498221244986776
integration Document3 0.00013550135501355014 1.0498221244986776
integration Document10 0.00024348672997321646 1.0498221244986776
integration Document13 5.140859551717047e-05 1.0498221244986776
integration Document15 0.0006502796202367018 1.0498221244986776
integration Document17 0.0013280212483399733 1.0498221244986776
yangzweiyyangycollaborativevideoobjectsegmentationbymultiscale Document1 0.0001312163758037003 2.302585092994046
yangzyangydecouplingfeaturesinhierarchicalpropagationforvideoobject Document1 0.0001312163758037003 2.302585092994046
63 Document1 0.0001312163758037003 0.9162907318741551
63 Document4 0.00024968789013732833 0.9162907318741551
63 Document12 0.00026

city Document3 0.0004065040650406504 1.3862943611198906
city Document12 0.00013338668800853674 1.3862943611198906
city Document13 0.00039413256563164027 1.3862943611198906
ireland Document2 0.0007173601147776184 1.8971199848858813
ireland Document9 0.00014027212792818068 1.8971199848858813
2school Document2 0.0007173601147776184 2.302585092994046
sciences Document2 0.0007173601147776184 1.2039728043259361
sciences Document4 0.00024968789013732833 1.2039728043259361
sciences Document10 0.0004869734599464329 1.2039728043259361
sciences Document11 9.819324430479184e-05 1.2039728043259361
sciences Document13 0.0001884981835629584 1.2039728043259361
brac Document2 0.0007173601147776184 2.302585092994046
dhaka Document2 0.0007173601147776184 2.302585092994046
bangladesh Document2 0.0007173601147776184 1.8971199848858813
bangladesh Document10 0.00024348672997321646 1.8971199848858813
1meemmanab2maildcuie Document2 0.0007173601147776184 2.302585092994046
2meemarafatbracuacbd Document2 0.000717

correct Document14 0.00022381378692927484 1.2039728043259361
correct Document15 0.00013005592404734037 1.2039728043259361
response Document2 0.0007173601147776184 1.2039728043259361
response Document8 0.0002494387627837366 1.2039728043259361
response Document13 0.0001884981835629584 1.2039728043259361
response Document17 0.004316069057104913 1.2039728043259361
response Document18 0.0010046885465505692 1.2039728043259361
topﬁve Document2 0.0007173601147776184 2.302585092994046
reported Document2 0.0007173601147776184 1.0498221244986776
reported Document11 0.000589159465828751 1.0498221244986776
reported Document12 0.00013338668800853674 1.0498221244986776
reported Document13 0.00010281719103434094 1.0498221244986776
reported Document14 0.00022381378692927484 1.0498221244986776
reported Document15 0.0005202236961893615 1.0498221244986776
top5 Document2 0.0007173601147776184 2.302585092994046
rate Document2 0.0007173601147776184 0.5978370007556204
rate Document3 0.00027100271002710027 0.5

internal Document2 0.0007173601147776184 1.2039728043259361
internal Document4 0.00024968789013732833 1.2039728043259361
internal Document10 0.0004869734599464329 1.2039728043259361
internal Document12 0.00013338668800853674 1.2039728043259361
internal Document13 1.713619850572349e-05 1.2039728043259361
biases Document2 0.0007173601147776184 1.8971199848858813
biases Document13 3.427239701144698e-05 1.8971199848858813
tasks Document2 0.0007173601147776184 1.0498221244986776
tasks Document3 0.0005420054200542005 1.0498221244986776
tasks Document5 0.0017472335468841002 1.0498221244986776
tasks Document6 0.0006705409029950827 1.0498221244986776
tasks Document8 0.005986530306809678 1.0498221244986776
tasks Document13 1.713619850572349e-05 1.0498221244986776
pattern Document2 0.0007173601147776184 0.6931471805599453
pattern Document3 0.000948509485094851 0.6931471805599453
pattern Document4 0.0004993757802746567 0.6931471805599453
pattern Document5 0.0020384391380314504 0.6931471805599453
p

yet Document6 0.00022351363433169424 0.9162907318741551
yet Document7 0.0002984183825723665 0.9162907318741551
yet Document8 0.0002494387627837366 0.9162907318741551
yet Document11 9.819324430479184e-05 0.9162907318741551
yet Document12 0.00013338668800853674 0.9162907318741551
yet Document13 0.00015422578655151142 0.9162907318741551
find Document3 0.0004065040650406504 0.5108256237659907
find Document8 0.0002494387627837366 0.5108256237659907
find Document11 9.819324430479184e-05 0.5108256237659907
find Document13 0.00010281719103434094 0.5108256237659907
find Document14 0.0011190689346463742 0.5108256237659907
find Document15 0.0009103914683313824 0.5108256237659907
find Document16 0.0010621348911311736 0.5108256237659907
find Document17 0.0006640106241699867 0.5108256237659907
find Document18 0.0003348961821835231 0.5108256237659907
find Document19 0.00014652014652014652 0.5108256237659907
find Document20 0.0005489980785067252 0.5108256237659907
widespread Document3 0.00027100271002

others Document14 0.0011190689346463742 0.9162907318741551
others Document18 0.0003348961821835231 0.9162907318741551
scenario Document3 0.0005420054200542005 1.2039728043259361
scenario Document4 0.00024968789013732833 1.2039728043259361
scenario Document7 0.0002984183825723665 1.2039728043259361
scenario Document13 5.140859551717047e-05 1.2039728043259361
scenario Document15 0.00013005592404734037 1.2039728043259361
pioneering Document3 0.00013550135501355014 1.6094379124341003
pioneering Document8 0.0002494387627837366 1.6094379124341003
pioneering Document13 1.713619850572349e-05 1.6094379124341003
conditioned Document3 0.000948509485094851 1.8971199848858813
conditioned Document8 0.0002494387627837366 1.8971199848858813
rasterbased Document3 0.00013550135501355014 2.302585092994046
head Document3 0.0004065040650406504 1.6094379124341003
head Document7 0.001193673530289466 1.6094379124341003
head Document13 3.427239701144698e-05 1.6094379124341003
importantly Document3 0.0004065040

defined Document12 0.00040016006402561027 0.5978370007556204
defined Document13 0.0001713619850572349 0.5978370007556204
defined Document15 0.0005202236961893615 0.5978370007556204
defined Document16 0.0005310674455655868 0.5978370007556204
defined Document17 0.00033200531208499334 0.5978370007556204
defined Document18 0.0010046885465505692 0.5978370007556204
defined Document20 0.0005489980785067252 0.5978370007556204
2d Document3 0.0012195121951219512 0.9162907318741551
2d Document4 0.0009987515605493133 0.9162907318741551
2d Document5 0.00029120559114735004 0.9162907318741551
2d Document6 0.00022351363433169424 0.9162907318741551
2d Document7 0.0002984183825723665 0.9162907318741551
2d Document15 0.00026011184809468074 0.9162907318741551
2d Document16 0.002124269782262347 0.9162907318741551
position Document3 0.0006775067750677507 0.5978370007556204
position Document7 0.0002984183825723665 0.5978370007556204
position Document8 0.0002494387627837366 0.5978370007556204
position Documen

coordinates Document3 0.00013550135501355014 1.0498221244986776
coordinates Document4 0.00024968789013732833 1.0498221244986776
coordinates Document14 0.00022381378692927484 1.0498221244986776
coordinates Document15 0.00026011184809468074 1.0498221244986776
coordinates Document19 0.00029304029304029304 1.0498221244986776
coordinates Document20 0.0002744990392533626 1.0498221244986776
6dimensional Document3 0.00013550135501355014 2.302585092994046
descriptor Document3 0.00013550135501355014 2.302585092994046
existence Document3 0.0005420054200542005 1.0498221244986776
existence Document9 0.0005610885117127227 1.0498221244986776
existence Document11 0.00019638648860958367 1.0498221244986776
existence Document13 3.427239701144698e-05 1.0498221244986776
existence Document19 0.00029304029304029304 1.0498221244986776
existence Document20 0.0002744990392533626 1.0498221244986776
p∈01 Document3 0.00013550135501355014 2.302585092994046
pline Document3 0.00013550135501355014 2.302585092994046
pb

main Document15 0.0013005592404734035 0.28768207245178085
main Document17 0.00033200531208499334 0.28768207245178085
main Document18 0.0003348961821835231 0.28768207245178085
main Document19 0.00014652014652014652 0.28768207245178085
main Document20 0.0002744990392533626 0.28768207245178085
addi Document3 0.00013550135501355014 1.8971199848858813
addi Document13 1.713619850572349e-05 1.8971199848858813
tional Document3 0.00013550135501355014 0.7985076962177716
tional Document6 0.00022351363433169424 0.7985076962177716
tional Document7 0.0002984183825723665 0.7985076962177716
tional Document8 0.0002494387627837366 0.7985076962177716
tional Document9 0.00014027212792818068 0.7985076962177716
tional Document11 0.0002945797329143755 0.7985076962177716
tional Document13 0.00010281719103434094 0.7985076962177716
tional Document16 0.0005310674455655868 0.7985076962177716
comprises Document3 0.00013550135501355014 1.8971199848858813
comprises Document6 0.00022351363433169424 1.8971199848858813

considering Document3 0.00013550135501355014 0.7985076962177716
considering Document4 0.00024968789013732833 0.7985076962177716
considering Document6 0.00022351363433169424 0.7985076962177716
considering Document8 0.0002494387627837366 0.7985076962177716
considering Document9 0.00014027212792818068 0.7985076962177716
considering Document10 0.0007304601899196494 0.7985076962177716
considering Document13 0.00010281719103434094 0.7985076962177716
considering Document14 0.0006714413607878246 0.7985076962177716
sizesditb Document3 0.00013550135501355014 2.302585092994046
sizes Document3 0.00013550135501355014 1.2039728043259361
sizes Document4 0.00024968789013732833 1.2039728043259361
sizes Document5 0.0005824111822947001 1.2039728043259361
sizes Document8 0.0002494387627837366 1.2039728043259361
sizes Document14 0.00022381378692927484 1.2039728043259361
1× Document3 0.00013550135501355014 2.302585092994046
05× Document3 0.00013550135501355014 2.302585092994046
025 Document3 0.0009485094850

ng Document9 0.00014027212792818068 1.3862943611198906
ng Document13 1.713619850572349e-05 1.3862943611198906
ramesh Document3 0.00013550135501355014 2.302585092994046
httpsopenaicomresearchvideo Document3 0.00013550135501355014 2.302585092994046
generationmodelsasworldsimulators Document3 0.00013550135501355014 2.302585092994046
caesar Document3 0.00027100271002710027 2.302585092994046
kabzan Document3 0.00013550135501355014 2.302585092994046
tan Document3 0.0004065040650406504 1.8971199848858813
tan Document13 5.140859551717047e-05 1.8971199848858813
ks Document3 0.00013550135501355014 2.302585092994046
fong Document3 0.00027100271002710027 2.302585092994046
wk Document3 0.00013550135501355014 1.8971199848858813
wk Document6 0.00022351363433169424 1.8971199848858813
wolff Document3 0.00013550135501355014 1.8971199848858813
wolff Document18 0.0003348961821835231 1.8971199848858813
em Document3 0.00013550135501355014 1.3862943611198906
em Document6 0.00022351363433169424 1.386294361119

highdynamic Document4 0.000749063670411985 2.302585092994046
astronomical Document4 0.0012484394506866417 2.302585092994046
formed Document4 0.0004993757802746567 1.2039728043259361
formed Document11 0.00019638648860958367 1.2039728043259361
formed Document12 0.0002667733760170735 1.2039728043259361
formed Document13 6.854479402289396e-05 1.2039728043259361
formed Document19 0.00014652014652014652 1.2039728043259361
taking Document4 0.0004993757802746567 0.7985076962177716
taking Document7 0.0002984183825723665 0.7985076962177716
taking Document9 0.00028054425585636136 0.7985076962177716
taking Document11 0.00039277297721916735 0.7985076962177716
taking Document13 0.00010281719103434094 0.7985076962177716
taking Document14 0.00022381378692927484 0.7985076962177716
taking Document15 0.00013005592404734037 0.7985076962177716
taking Document18 0.0003348961821835231 0.7985076962177716
iteration Document4 0.00149812734082397 1.3862943611198906
iteration Document6 0.00022351363433169424 1.38

normalized Document20 0.0002744990392533626 1.6094379124341003
denormalized Document4 0.00024968789013732833 2.302585092994046
accordingly Document4 0.00024968789013732833 1.0498221244986776
accordingly Document9 0.00014027212792818068 1.0498221244986776
accordingly Document11 9.819324430479184e-05 1.0498221244986776
accordingly Document12 0.0002667733760170735 1.0498221244986776
accordingly Document14 0.00022381378692927484 1.0498221244986776
accordingly Document20 0.0002744990392533626 1.0498221244986776
aim Document4 0.00024968789013732833 1.3862943611198906
aim Document7 0.0002984183825723665 1.3862943611198906
aim Document9 0.00014027212792818068 1.3862943611198906
aim Document13 0.00020563438206868187 1.3862943611198906
solely Document4 0.00024968789013732833 1.2039728043259361
solely Document6 0.00022351363433169424 1.2039728043259361
solely Document8 0.0002494387627837366 1.2039728043259361
solely Document11 9.819324430479184e-05 1.2039728043259361
solely Document13 0.000154225

ones Document7 0.0002984183825723665 0.9162907318741551
ones Document10 0.00024348672997321646 0.9162907318741551
ones Document11 9.819324430479184e-05 0.9162907318741551
ones Document13 0.0001884981835629584 0.9162907318741551
ones Document15 0.00013005592404734037 0.9162907318741551
ones Document16 0.0005310674455655868 0.9162907318741551
163 Document4 0.00024968789013732833 1.8971199848858813
163 Document19 0.00014652014652014652 1.8971199848858813
nufftrelated Document4 0.00024968789013732833 2.302585092994046
attributed Document4 0.00024968789013732833 1.0498221244986776
attributed Document6 0.0006705409029950827 1.0498221244986776
attributed Document7 0.0002984183825723665 1.0498221244986776
attributed Document13 6.854479402289396e-05 1.0498221244986776
attributed Document14 0.00022381378692927484 1.0498221244986776
attributed Document17 0.0006640106241699867 1.0498221244986776
onclusion Document4 0.00024968789013732833 2.302585092994046
endto Document4 0.00024968789013732833 2.3

capable Document6 0.000894054537326777 1.2039728043259361
capable Document7 0.0002984183825723665 1.2039728043259361
capable Document8 0.0002494387627837366 1.2039728043259361
capable Document13 5.140859551717047e-05 1.2039728043259361
intricate Document5 0.00029120559114735004 1.6094379124341003
intricate Document6 0.00022351363433169424 1.6094379124341003
intricate Document10 0.00024348672997321646 1.6094379124341003
principles Document5 0.00029120559114735004 1.8971199848858813
principles Document13 0.0004969497566659812 1.8971199848858813
processes Document5 0.0008736167734420501 0.9162907318741551
processes Document6 0.0004470272686633885 0.9162907318741551
processes Document9 0.00042081638378454203 0.9162907318741551
processes Document11 9.819324430479184e-05 0.9162907318741551
processes Document13 8.568099252861745e-05 0.9162907318741551
processes Document15 0.00013005592404734037 0.9162907318741551
processes Document19 0.00029304029304029304 0.9162907318741551
multiscale Docume

seidenari Document5 0.00029120559114735004 2.302585092994046
marco Document5 0.00029120559114735004 2.302585092994046
bertini Document5 0.00029120559114735004 2.302585092994046
alberto Document5 0.00029120559114735004 2.302585092994046
del Document5 0.00029120559114735004 1.3862943611198906
del Document6 0.0004470272686633885 1.3862943611198906
del Document9 0.00014027212792818068 1.3862943611198906
del Document13 3.427239701144698e-05 1.3862943611198906
bimbo Document5 0.00029120559114735004 2.302585092994046
proceedings Document5 0.0020384391380314504 1.2039728043259361
proceedings Document6 0.00022351363433169424 1.2039728043259361
proceedings Document8 0.001247193813918683 1.2039728043259361
proceedings Document13 5.140859551717047e-05 1.2039728043259361
proceedings Document15 0.00013005592404734037 1.2039728043259361
pages Document5 0.0029120559114735 1.8971199848858813
pages Document8 0.001746071339486156 1.8971199848858813
4826–4835 Document5 0.00029120559114735004 2.30258509299

concerns Document7 0.0002984183825723665 1.2039728043259361
concerns Document10 0.0007304601899196494 1.2039728043259361
concerns Document11 9.819324430479184e-05 1.2039728043259361
concerns Document13 0.00025704297758585235 1.2039728043259361
intricacies Document6 0.00022351363433169424 1.8971199848858813
intricacies Document13 1.713619850572349e-05 1.8971199848858813
col Document6 0.00022351363433169424 1.2039728043259361
col Document8 0.0002494387627837366 1.2039728043259361
col Document11 9.819324430479184e-05 1.2039728043259361
col Document13 5.140859551717047e-05 1.2039728043259361
col Document19 0.0008791208791208791 1.2039728043259361
lection Document6 0.00022351363433169424 2.302585092994046
cialized Document6 0.00022351363433169424 2.302585092994046
controlled Document6 0.00022351363433169424 1.8971199848858813
controlled Document11 9.819324430479184e-05 1.8971199848858813
ly Document6 0.0004470272686633885 1.8971199848858813
ly Document9 0.00014027212792818068 1.897119984885

co Document13 3.427239701144698e-05 1.3862943611198906
co Document19 0.00014652014652014652 1.3862943611198906
nsecutive Document6 0.0004470272686633885 2.302585092994046
usion Document6 0.00022351363433169424 2.302585092994046
happens Document6 0.00022351363433169424 1.3862943611198906
happens Document13 5.140859551717047e-05 1.3862943611198906
happens Document15 0.00013005592404734037 1.3862943611198906
happens Document18 0.0003348961821835231 1.3862943611198906
generat Document6 0.0004470272686633885 2.302585092994046
indepen Document6 0.00022351363433169424 1.8971199848858813
indepen Document18 0.0006697923643670462 1.8971199848858813
tf0andtfnegationslash0are Document6 0.00022351363433169424 2.302585092994046
later Document6 0.00022351363433169424 1.2039728043259361
later Document13 0.0005140859551717047 1.2039728043259361
later Document14 0.00022381378692927484 1.2039728043259361
later Document15 0.00013005592404734037 1.2039728043259361
later Document20 0.0005489980785067252 1.2

paramount Document7 0.0002984183825723665 1.8971199848858813
paramount Document13 3.427239701144698e-05 1.8971199848858813
efcacy Document7 0.0002984183825723665 2.302585092994046
safety Document7 0.0002984183825723665 1.8971199848858813
safety Document13 3.427239701144698e-05 1.8971199848858813
sophistication Document7 0.0002984183825723665 2.302585092994046
lineation Document7 0.0002984183825723665 2.302585092994046
fraught Document7 0.0002984183825723665 2.302585092994046
interobserver Document7 0.0002984183825723665 2.302585092994046
variability Document7 0.0008952551477170994 2.302585092994046
challenged Document7 0.0002984183825723665 1.8971199848858813
challenged Document13 1.713619850572349e-05 1.8971199848858813
necessity Document7 0.0002984183825723665 1.3862943611198906
necessity Document10 0.00024348672997321646 1.3862943611198906
necessity Document12 0.00013338668800853674 1.3862943611198906
necessity Document13 5.140859551717047e-05 1.3862943611198906
extensive Document7

transner Document7 0.000596836765144733 2.302585092994046
informal Document7 0.0002984183825723665 2.302585092994046
discoveries Document7 0.0002984183825723665 1.8971199848858813
discoveries Document13 5.140859551717047e-05 1.8971199848858813
variance Document7 0.0002984183825723665 1.2039728043259361
variance Document9 0.001262449151353626 1.2039728043259361
variance Document12 0.00013338668800853674 1.2039728043259361
variance Document14 0.00022381378692927484 1.2039728043259361
variance Document19 0.00014652014652014652 1.2039728043259361
cause Document7 0.0002984183825723665 1.3862943611198906
cause Document9 0.00014027212792818068 1.3862943611198906
cause Document13 0.00020563438206868187 1.3862943611198906
cause Document15 0.00013005592404734037 1.3862943611198906
adversar Document7 0.0002984183825723665 1.8971199848858813
adversar Document8 0.0002494387627837366 1.8971199848858813
auto Document7 0.0002984183825723665 2.302585092994046
overtrained Document7 0.000298418382572366

sible Document8 0.0002494387627837366 1.8971199848858813
sible Document13 3.427239701144698e-05 1.8971199848858813
historically Document8 0.0002494387627837366 1.8971199848858813
historically Document13 3.427239701144698e-05 1.8971199848858813
dedicated Document8 0.0004988775255674732 1.6094379124341003
dedicated Document9 0.00014027212792818068 1.6094379124341003
dedicated Document13 5.140859551717047e-05 1.6094379124341003
machinery Document8 0.0002494387627837366 2.302585092994046
envelope Document8 0.0002494387627837366 2.302585092994046
imageconditioned Document8 0.0002494387627837366 2.302585092994046
onesizefitsall Document8 0.0002494387627837366 2.302585092994046
stands Document8 0.0007483162883512097 1.8971199848858813
stands Document13 8.568099252861745e-05 1.8971199848858813
universality Document8 0.0002494387627837366 2.302585092994046
solving Document8 0.0002494387627837366 1.6094379124341003
solving Document13 1.713619850572349e-05 1.6094379124341003
solving Document15 0.

perspective Document8 0.0002494387627837366 1.6094379124341003
perspective Document13 0.0003255877716087463 1.6094379124341003
perspective Document17 0.00033200531208499334 1.6094379124341003
ij Document8 0.0002494387627837366 2.302585092994046
cai Document8 0.0002494387627837366 2.302585092994046
3300–3306 Document8 0.0002494387627837366 2.302585092994046
hongchen Document8 0.0002494387627837366 2.302585092994046
hengshu Document8 0.0002494387627837366 2.302585092994046
exploiting Document8 0.0002494387627837366 1.6094379124341003
exploiting Document19 0.00014652014652014652 1.6094379124341003
exploiting Document20 0.0002744990392533626 1.6094379124341003
relation Document8 0.0002494387627837366 1.0498221244986776
relation Document11 9.819324430479184e-05 1.0498221244986776
relation Document12 0.0002667733760170735 1.0498221244986776
relation Document13 5.140859551717047e-05 1.0498221244986776
relation Document17 0.00033200531208499334 1.0498221244986776
relation Document19 0.00014652

∗π∗−12parenrightig13 Document9 0.00014027212792818068 2.302585092994046
ε13−1−γπ∗ Document9 0.00014027212792818068 2.302585092994046
γparenleftig Document9 0.00014027212792818068 2.302585092994046
γπ∗π∗−1 Document9 0.00028054425585636136 2.302585092994046
6parenrightig13 Document9 0.00028054425585636136 2.302585092994046
ε23oε22 Document9 0.00014027212792818068 2.302585092994046
vthe Document9 0.00014027212792818068 2.302585092994046
esrrγσ2 Document9 0.00042081638378454203 2.302585092994046
∗−γσ2 Document9 0.00042081638378454203 2.302585092994046
2parenleftig Document9 0.00028054425585636136 2.302585092994046
∗π∗−12parenrightig23 Document9 0.00028054425585636136 2.302585092994046
ε23oε Document9 0.00028054425585636136 2.302585092994046
ﬁnitevariation Document9 0.00014027212792818068 2.302585092994046
hedging Document9 0.00042081638378454203 2.302585092994046
1εvariance Document9 0.00014027212792818068 2.302585092994046
swaps9on Document9 0.00014027212792818068 2.302585092994046
s

xtϕttildewidest≥ϕtst Document9 0.00014027212792818068 2.302585092994046
xtϕtstϕtst Document9 0.00014027212792818068 2.302585092994046
wtπt Document9 0.00014027212792818068 2.302585092994046
xtϕttildewidest≤1−εϕtst Document9 0.00014027212792818068 2.302585092994046
xtϕt1−εst1−επt Document9 0.00014027212792818068 2.302585092994046
bounds Document9 0.00014027212792818068 1.8971199848858813
bounds Document15 0.0003901677721420211 1.8971199848858813
π±are Document9 0.00014027212792818068 2.302585092994046
obvious Document9 0.00014027212792818068 1.3862943611198906
obvious Document13 1.713619850572349e-05 1.3862943611198906
obvious Document14 0.0004476275738585497 1.3862943611198906
obvious Document16 0.0005310674455655868 1.3862943611198906
inequality Document9 0.00014027212792818068 2.302585092994046
knowing Document9 0.00014027212792818068 2.302585092994046
επ1 Document9 0.00014027212792818068 2.302585092994046
xcess Document9 0.00014027212792818068 2.302585092994046
wtbracketrightigg Do

2γsπ∗c1−εs Document9 0.00014027212792818068 2.302585092994046
a10 Document9 0.0007013606396409033 1.8971199848858813
a10 Document15 0.00013005592404734037 1.8971199848858813
ized Document9 0.00028054425585636136 2.302585092994046
inc Document9 0.00028054425585636136 2.302585092994046
c1−εs1−2γ Document9 0.00014027212792818068 2.302585092994046
1−2γs1−2γπ∗−1 Document9 0.00014027212792818068 2.302585092994046
tains Document9 0.00014027212792818068 2.302585092994046
c1−εs2γs−2γπ∗ Document9 0.00014027212792818068 2.302585092994046
2γths Document9 0.00014027212792818068 2.302585092994046
root Document9 0.00014027212792818068 1.8971199848858813
root Document13 0.00030845157310302283 1.8971199848858813
converse Document9 0.00014027212792818068 2.302585092994046
impli Document9 0.00014027212792818068 2.302585092994046
cation Document9 0.00014027212792818068 1.6094379124341003
cation Document11 9.819324430479184e-05 1.6094379124341003
cation Document13 5.140859551717047e-05 1.6094379124341003
γ

focusing Document10 0.0004869734599464329 1.8971199848858813
focusing Document13 0.00011995338954006444 1.8971199848858813
female Document10 0.0004869734599464329 2.302585092994046
revealing Document10 0.00024348672997321646 1.8971199848858813
revealing Document13 1.713619850572349e-05 1.8971199848858813
correlatio Document10 0.00024348672997321646 1.8971199848858813
correlatio Document12 0.00013338668800853674 1.8971199848858813
enhanced Document10 0.00024348672997321646 1.6094379124341003
enhanced Document11 9.819324430479184e-05 1.6094379124341003
enhanced Document19 0.00014652014652014652 1.6094379124341003
finding Document10 0.0004869734599464329 1.2039728043259361
finding Document13 3.427239701144698e-05 1.2039728043259361
finding Document14 0.00022381378692927484 1.2039728043259361
finding Document15 0.00013005592404734037 1.2039728043259361
finding Document20 0.0002744990392533626 1.2039728043259361
ingenuity Document10 0.00024348672997321646 2.302585092994046
nurturing Documen

ney Document13 0.00010281719103434094 1.6094379124341003
maryam Document10 0.00024348672997321646 2.302585092994046
masoumik Document10 0.00024348672997321646 2.302585092994046
ezutah Document10 0.00024348672997321646 2.302585092994046
malaysian Document10 0.00024348672997321646 2.302585092994046
procedia Document10 0.0004869734599464329 2.302585092994046
cirp Document10 0.00024348672997321646 2.302585092994046
646 Document10 0.00024348672997321646 2.302585092994046
652 Document10 0.00024348672997321646 2.302585092994046
moqadam Document10 0.00024348672997321646 2.302585092994046
aio Document10 0.00024348672997321646 2.302585092994046
behnoush Document10 0.00024348672997321646 2.302585092994046
economics Document10 0.0004869734599464329 1.6094379124341003
economics Document12 0.00013338668800853674 1.6094379124341003
economics Document13 0.0009596271163205155 1.6094379124341003
tehran Document10 0.00024348672997321646 2.302585092994046
prospect Document10 0.00024348672997321646 1.89711

α Document17 0.00033200531208499334 1.8971199848858813
teleported Document11 9.819324430479184e-05 2.302585092994046
1−α Document11 9.819324430479184e-05 2.302585092994046
damping Document11 9.819324430479184e-05 2.302585092994046
085 Document11 9.819324430479184e-05 2.302585092994046
tial Document11 9.819324430479184e-05 1.6094379124341003
tial Document13 1.713619850572349e-05 1.6094379124341003
tial Document15 0.00013005592404734037 1.6094379124341003
perron Document11 9.819324430479184e-05 2.302585092994046
ψ Document11 0.0009819324430479183 1.8971199848858813
ψ Document15 0.0005202236961893615 1.8971199848858813
gψ Document11 0.00019638648860958367 2.302585092994046
ψcof Document11 0.00019638648860958367 2.302585092994046
journey Document11 9.819324430479184e-05 2.302585092994046
forever Document11 9.819324430479184e-05 2.302585092994046
wandering Document11 9.819324430479184e-05 2.302585092994046
jour Document11 0.0002945797329143755 1.8971199848858813
jour Document15 0.0002601118

principle Document18 0.0003348961821835231 1.2039728043259361
gqrma Document11 9.819324430479184e-05 2.302585092994046
trix Document11 9.819324430479184e-05 2.302585092994046
forbidden Document11 9.819324430479184e-05 2.302585092994046
grelements Document11 9.819324430479184e-05 2.302585092994046
gqrsee Document11 9.819324430479184e-05 2.302585092994046
negativity Document11 9.819324430479184e-05 2.302585092994046
swingfig Document11 0.00019638648860958367 2.302585092994046
opt Document11 9.819324430479184e-05 2.302585092994046
extensions Document11 9.819324430479184e-05 1.8971199848858813
extensions Document16 0.0005310674455655868 1.8971199848858813
bilities Document11 9.819324430479184e-05 2.302585092994046
gconstructed Document11 9.819324430479184e-05 2.302585092994046
moderate Document11 0.00019638648860958367 1.8971199848858813
moderate Document12 0.00013338668800853674 1.8971199848858813
gold Document11 0.0007855459544383347 1.8971199848858813
gold Document13 1.713619850572349e-

178701 Document11 9.819324430479184e-05 2.302585092994046
watts Document11 9.819324430479184e-05 1.8971199848858813
watts Document13 3.427239701144698e-05 1.8971199848858813
dodds Document11 9.819324430479184e-05 2.302585092994046
influentials Document11 9.819324430479184e-05 2.302585092994046
441 Document11 9.819324430479184e-05 2.302585092994046
2007 Document11 9.819324430479184e-05 1.0498221244986776
2007 Document12 0.0002667733760170735 1.0498221244986776
2007 Document13 0.0003769963671259168 1.0498221244986776
2007 Document18 0.0003348961821835231 1.0498221244986776
2007 Document19 0.00029304029304029304 1.0498221244986776
2007 Document20 0.0002744990392533626 1.0498221244986776
httpsdoiorg101086518527 Document11 9.819324430479184e-05 2.302585092994046
sociophysics Document11 9.819324430479184e-05 2.302585092994046
409 Document11 9.819324430479184e-05 1.8971199848858813
409 Document17 0.00033200531208499334 1.8971199848858813
httpsdoiorg101142s0129183108012297 Document11 9.8193244

shops Document12 0.00013338668800853674 2.302585092994046
launched Document12 0.00013338668800853674 1.8971199848858813
launched Document13 3.427239701144698e-05 1.8971199848858813
traders Document12 0.00013338668800853674 2.302585092994046
cutting Document12 0.00013338668800853674 2.302585092994046
urgeoning Document12 0.00013338668800853674 2.302585092994046
entrepreneurial Document12 0.00013338668800853674 2.302585092994046
youths15 Document12 0.00013338668800853674 2.302585092994046
population16 Document12 0.00013338668800853674 2.302585092994046
discussions Document12 0.00013338668800853674 1.3862943611198906
discussions Document13 1.713619850572349e-05 1.3862943611198906
discussions Document19 0.00014652014652014652 1.3862943611198906
discussions Document20 0.0002744990392533626 1.3862943611198906
attracting Document12 0.0002667733760170735 2.302585092994046
nominally Document12 0.00040016006402561027 2.302585092994046
whom Document12 0.0002667733760170735 1.8971199848858813
whom

stateowned Document12 0.00013338668800853674 2.302585092994046
benefited Document12 0.00013338668800853674 1.8971199848858813
benefited Document13 3.427239701144698e-05 1.8971199848858813
cheap Document12 0.00013338668800853674 2.302585092994046
fdi40 Document12 0.00013338668800853674 2.302585092994046
cores Document12 0.00013338668800853674 1.8971199848858813
cores Document13 1.713619850572349e-05 1.8971199848858813
651million Document12 0.00013338668800853674 2.302585092994046
slid Document12 0.00013338668800853674 2.302585092994046
insertion Document12 0.00013338668800853674 2.302585092994046
investmen Document12 0.00013338668800853674 2.302585092994046
regarded Document12 0.00013338668800853674 1.8971199848858813
regarded Document20 0.0002744990392533626 1.8971199848858813
appraisals Document12 0.00013338668800853674 2.302585092994046
worthiness Document12 0.00013338668800853674 2.302585092994046
ultimatel Document12 0.00013338668800853674 2.302585092994046
distil Document12 0.0001

nobel Document13 0.00013708958804578791 2.302585092994046
lloyd Document13 6.854479402289396e-05 2.302585092994046
shapley Document13 0.00023990677908012888 2.302585092994046
allocations Document13 1.713619850572349e-05 2.302585092994046
clark Document13 1.713619850572349e-05 2.302585092994046
medal Document13 1.713619850572349e-05 2.302585092994046
paul Document13 0.0001713619850572349 2.302585092994046
milgrom Document13 0.00030845157310302283 2.302585092994046
robert Document13 0.00030845157310302283 2.302585092994046
wilson Document13 0.00022277058057440537 2.302585092994046
inventions Document13 1.713619850572349e-05 2.302585092994046
formats Document13 1.713619850572349e-05 2.302585092994046
cording Document13 1.713619850572349e-05 2.302585092994046
microeconomic Document13 3.427239701144698e-05 2.302585092994046
contributed Document13 6.854479402289396e-05 2.302585092994046
relevance Document13 6.854479402289396e-05 1.8971199848858813
relevance Document20 0.0002744990392533626 1

ambition Document13 1.713619850572349e-05 2.302585092994046
water Document13 3.427239701144698e-05 2.302585092994046
airtight Document13 1.713619850572349e-05 2.302585092994046
israeli Document13 3.427239701144698e-05 2.302585092994046
internship Document13 3.427239701144698e-05 2.302585092994046
designers Document13 0.00013708958804578791 2.302585092994046
alon Document13 3.427239701144698e-05 2.302585092994046
really Document13 5.140859551717047e-05 2.302585092994046
utilitari Document13 1.713619850572349e-05 2.302585092994046
ambi Document13 1.713619850572349e-05 2.302585092994046
tious Document13 1.713619850572349e-05 2.302585092994046
uninvited Document13 1.713619850572349e-05 2.302585092994046
actual Document13 0.00010281719103434094 1.6094379124341003
actual Document14 0.0004476275738585497 1.6094379124341003
actual Document15 0.00013005592404734037 1.6094379124341003
desiderata Document13 0.0003255877716087463 2.302585092994046
util Document13 1.713619850572349e-05 2.3025850929

centrally Document13 1.713619850572349e-05 2.302585092994046
allo Document13 3.427239701144698e-05 2.302585092994046
prioritybased Document13 1.713619850572349e-05 2.302585092994046
unitdemand Document13 1.713619850572349e-05 2.302585092994046
indivisible Document13 3.427239701144698e-05 2.302585092994046
formalized Document13 8.568099252861745e-05 2.302585092994046
axiom Document13 0.0003427239701144698 2.302585092994046
17if Document13 1.713619850572349e-05 2.302585092994046
stu Document13 5.140859551717047e-05 2.302585092994046
dents Document13 1.713619850572349e-05 2.302585092994046
ijwhich Document13 1.713619850572349e-05 2.302585092994046
iwould Document13 1.713619850572349e-05 2.302585092994046
concern Document13 0.00015422578655151142 2.302585092994046
litigation Document13 1.713619850572349e-05 2.302585092994046
rect Document13 3.427239701144698e-05 2.302585092994046
18this Document13 1.713619850572349e-05 2.302585092994046
prestigious Document13 1.713619850572349e-05 2.302585

horan Document13 3.427239701144698e-05 2.302585092994046
chief Document13 3.427239701144698e-05 2.302585092994046
staff Document13 3.427239701144698e-05 2.302585092994046
intrigued Document13 3.427239701144698e-05 2.302585092994046
suggestion Document13 3.427239701144698e-05 2.302585092994046
soughtafter Document13 5.140859551717047e-05 2.302585092994046
relief Document13 3.427239701144698e-05 2.302585092994046
alienation Document13 3.427239701144698e-05 2.302585092994046
attributable Document13 1.713619850572349e-05 2.302585092994046
sullivan Document13 1.713619850572349e-05 2.302585092994046
father Document13 5.140859551717047e-05 2.302585092994046
tended Document13 1.713619850572349e-05 2.302585092994046
critique Document13 3.427239701144698e-05 2.302585092994046
resonated Document13 3.427239701144698e-05 2.302585092994046
reinforced Document13 1.713619850572349e-05 2.302585092994046
belief Document13 1.713619850572349e-05 2.302585092994046
manipulability Document13 5.14085955171704

partial Document13 3.427239701144698e-05 1.8971199848858813
partial Document20 0.0002744990392533626 1.8971199848858813
fourway Document13 3.427239701144698e-05 2.302585092994046
exchanges47 Document13 1.713619850572349e-05 2.302585092994046
200748 Document13 1.713619850572349e-05 2.302585092994046
altruistic Document13 8.568099252861745e-05 2.302585092994046
neys Document13 1.713619850572349e-05 2.302585092994046
200049however Document13 1.713619850572349e-05 2.302585092994046
initiate Document13 3.427239701144698e-05 1.8971199848858813
initiate Document14 0.00022381378692927484 1.8971199848858813
ndd Document13 0.00013708958804578791 2.302585092994046
volunteering Document13 6.854479402289396e-05 2.302585092994046
47for Document13 1.713619850572349e-05 2.302585092994046
pools Document13 5.140859551717047e-05 2.302585092994046
48with Document13 1.713619850572349e-05 2.302585092994046
maximiza Document13 1.713619850572349e-05 2.302585092994046
49data Document13 1.713619850572349e-05 2.

67the Document13 1.713619850572349e-05 2.302585092994046
proper Document13 3.427239701144698e-05 1.6094379124341003
proper Document14 0.00022381378692927484 1.6094379124341003
proper Document15 0.00013005592404734037 1.6094379124341003
paragraph Document13 5.140859551717047e-05 2.302585092994046
68this Document13 1.713619850572349e-05 2.302585092994046
economically Document13 3.427239701144698e-05 2.302585092994046
weak Document13 1.713619850572349e-05 2.302585092994046
69this Document13 1.713619850572349e-05 2.302585092994046
analyzes Document13 1.713619850572349e-05 2.302585092994046
70see Document13 1.713619850572349e-05 2.302585092994046
categorysc Document13 1.713619850572349e-05 2.302585092994046
phasize Document13 1.713619850572349e-05 2.302585092994046
164 Document13 5.140859551717047e-05 1.6094379124341003
164 Document18 0.0006697923643670462 1.6094379124341003
164 Document19 0.00014652014652014652 1.6094379124341003
communal Document13 1.713619850572349e-05 2.302585092994046


alaska Document13 5.140859551717047e-05 2.302585092994046
3frontline Document13 1.713619850572349e-05 2.302585092994046
4people Document13 1.713619850572349e-05 2.302585092994046
1664 Document13 1.713619850572349e-05 2.302585092994046
comorbidities Document13 1.713619850572349e-05 2.302585092994046
86categoryspecific Document13 1.713619850572349e-05 2.302585092994046
uninsured Document13 1.713619850572349e-05 2.302585092994046
communicated Document13 1.713619850572349e-05 2.302585092994046
emily Document13 8.568099252861745e-05 2.302585092994046
hospi Document13 1.713619850572349e-05 2.302585092994046
brigham Document13 3.427239701144698e-05 2.302585092994046
chaotic Document13 1.713619850572349e-05 2.302585092994046
tremely Document13 1.713619850572349e-05 2.302585092994046
materialize Document13 5.140859551717047e-05 2.302585092994046
authorized Document13 1.713619850572349e-05 2.302585092994046
eua Document13 1.713619850572349e-05 2.302585092994046
monoclonal Document13 6.8544794022

homogeneous Document13 1.713619850572349e-05 2.302585092994046
candidly Document13 1.713619850572349e-05 2.302585092994046
disclose Document13 1.713619850572349e-05 2.302585092994046
astrategyproof Document13 1.713619850572349e-05 2.302585092994046
minimizing Document13 1.713619850572349e-05 1.8971199848858813
minimizing Document20 0.0002744990392533626 1.8971199848858813
misinformation Document13 1.713619850572349e-05 2.302585092994046
vide Document13 1.713619850572349e-05 2.302585092994046
evaluates Document13 3.427239701144698e-05 1.6094379124341003
evaluates Document14 0.00022381378692927484 1.6094379124341003
evaluates Document15 0.00013005592404734037 1.6094379124341003
icymakers Document13 1.713619850572349e-05 2.302585092994046
utilitarianism—the Document13 1.713619850572349e-05 2.302585092994046
—was Document13 1.713619850572349e-05 2.302585092994046
inherits Document13 1.713619850572349e-05 2.302585092994046
antecedent Document13 1.713619850572349e-05 2.302585092994046
benign

caterina Document13 1.713619850572349e-05 2.302585092994046
bella Document13 1.713619850572349e-05 2.302585092994046
massimo Document13 1.713619850572349e-05 2.302585092994046
cardillo Document13 1.713619850572349e-05 2.302585092994046
emanuele Document13 3.427239701144698e-05 2.302585092994046
cozzi Document13 3.427239701144698e-05 2.302585092994046
rigotti Document13 3.427239701144698e-05 2.302585092994046
donorinitiated Document13 1.713619850572349e-05 2.302585092994046
transpl Document13 1.713619850572349e-05 2.302585092994046
3310 Document13 1.713619850572349e-05 2.302585092994046
1177– Document13 1.713619850572349e-05 2.302585092994046
1184 Document13 1.713619850572349e-05 2.302585092994046
cristina Document13 3.427239701144698e-05 2.302585092994046
cornelio Document13 1.713619850572349e-05 2.302585092994046
silvestre Document13 1.713619850572349e-05 2.302585092994046
flavia Document13 1.713619850572349e-05 2.302585092994046
neri Document13 1.713619850572349e-05 2.302585092994046

hammond Document13 1.713619850572349e-05 2.302585092994046
inga Document13 1.713619850572349e-05 2.302585092994046
lennes Document13 1.713619850572349e-05 2.302585092994046
alyssa Document13 1.713619850572349e-05 2.302585092994046
letourneau Document13 1.713619850572349e-05 2.302585092994046
sonmez Document13 3.427239701144698e-05 2.302585092994046
2324–2331 Document13 1.713619850572349e-05 2.302585092994046
139 Document13 1.713619850572349e-05 2.302585092994046
threeway Document13 1.713619850572349e-05 2.302585092994046
815 Document13 1.713619850572349e-05 2.302585092994046
773–782 Document13 1.713619850572349e-05 2.302585092994046
saad Document13 1.713619850572349e-05 2.302585092994046
arsalan Document13 1.713619850572349e-05 2.302585092994046
faisal Document13 1.713619850572349e-05 2.302585092994046
saud Document13 1.713619850572349e-05 2.302585092994046
dar Document13 1.713619850572349e-05 2.302585092994046
1582 Document13 1.713619850572349e-05 2.302585092994046
210–211 Document13 

sectorsthen Document14 0.00022381378692927484 2.302585092994046
behaved Document14 0.00022381378692927484 2.302585092994046
divide Document14 0.0004476275738585497 2.302585092994046
acquirement Document14 0.00022381378692927484 2.302585092994046
msci Document14 0.0006714413607878246 2.302585092994046
industries Document14 0.0013428827215756492 2.302585092994046
158 Document14 0.00022381378692927484 1.8971199848858813
158 Document15 0.00013005592404734037 1.8971199848858813
subindustries Document14 0.00022381378692927484 2.302585092994046
industrials Document14 0.0004476275738585497 2.302585092994046
discretionary Document14 0.0017905102954341987 2.302585092994046
staples Document14 0.0008952551477170994 2.302585092994046
financials Document14 0.0008952551477170994 2.302585092994046
utilities Document14 0.001566696508504924 2.302585092994046
estate Document14 0.0017905102954341987 2.302585092994046
sp500 Document14 0.0004476275738585497 2.302585092994046
gics Document14 0.00022381378692

d3rϕ∗rnlψr Document15 0.00013005592404734037 2.302585092994046
d3rϕ∗rψr−nl Document15 0.00013005592404734037 2.302585092994046
ˆtnˆtm Document15 0.00013005592404734037 2.302585092994046
ˆtnm Document15 0.00013005592404734037 2.302585092994046
copies Document15 0.0009103914683313824 2.302585092994046
ofψasm⟩to Document15 0.00013005592404734037 2.302585092994046
ψlasm⟩x Document15 0.00013005592404734037 2.302585092994046
nˆtnψasm⟩ Document15 0.00013005592404734037 2.302585092994046
linearity Document15 0.00026011184809468074 2.302585092994046
commutes Document15 0.00013005592404734037 2.302585092994046
ψlasm⟩also Document15 0.00013005592404734037 2.302585092994046
nite Document15 0.00013005592404734037 2.302585092994046
cfigure Document15 0.00013005592404734037 2.302585092994046
interior Document15 0.00026011184809468074 2.302585092994046
r∈a Document15 0.00013005592404734037 2.302585092994046
r∈b Document15 0.00013005592404734037 2.302585092994046
cover Document15 0.00013005592404734037

atic Document15 0.00013005592404734037 2.302585092994046
simplifications Document15 0.00013005592404734037 2.302585092994046
578866122009667 Document15 0.00013005592404734037 2.302585092994046
59273 Document15 0.00013005592404734037 2.302585092994046
589849429592643 Document15 0.00013005592404734037 2.302585092994046
595278 Document15 0.00013005592404734037 2.302585092994046
594046766392319 Document15 0.00013005592404734037 2.302585092994046
596072 Document15 0.00013005592404734037 2.302585092994046
595575413626522 Document15 0.00013005592404734037 2.302585092994046
5963055 Document15 0.00013005592404734037 2.302585092994046
596112250896048 Document15 0.00013005592404734037 2.302585092994046
5963661 Document15 0.00013005592404734037 2.302585092994046
596295400058964 Document15 0.00013005592404734037 2.302585092994046
59638142 Document15 0.00013005592404734037 2.302585092994046
5963857 Document15 0.00013005592404734037 2.302585092994046
v0−14 Document15 0.00013005592404734037 2.30258509

snellings Document16 0.0005310674455655868 1.8971199848858813
snellings Document19 0.00014652014652014652 1.8971199848858813
202301 Document16 0.0010621348911311736 2.302585092994046
schenke Document16 0.002124269782262347 1.8971199848858813
schenke Document19 0.0013186813186813187 1.8971199848858813
024907 Document16 0.0005310674455655868 1.8971199848858813
024907 Document19 0.00014652014652014652 1.8971199848858813
171000881 Document16 0.0005310674455655868 2.302585092994046
064905 Document16 0.0010621348911311736 2.302585092994046
044912 Document16 0.0005310674455655868 2.302585092994046
soeder Document16 0.0005310674455655868 2.302585092994046
230608665 Document16 0.0005310674455655868 2.302585092994046
cpod2017 Document16 0.0005310674455655868 2.302585092994046
171110544 Document16 0.0005310674455655868 2.302585092994046
bearden Document16 0.0005310674455655868 2.302585092994046
014904 Document16 0.0005310674455655868 1.8971199848858813
014904 Document19 0.00014652014652014652 1.8

kand Document18 0.0006697923643670462 2.302585092994046
π Document18 0.0013395847287340924 1.8971199848858813
π Document19 0.0016117216117216117 1.8971199848858813
ckπ Document18 0.0006697923643670462 2.302585092994046
acbdτ Document18 0.0013395847287340924 2.302585092994046
mρ† Document18 0.0003348961821835231 2.302585092994046
kmac Document18 0.0010046885465505692 2.302585092994046
τρkmbd Document18 0.0003348961821835231 2.302585092994046
ρkmac Document18 0.0006697923643670462 2.302585092994046
 Document18 0.0003348961821835231 2.302585092994046
a† Document18 0.0003348961821835231 2.302585092994046
a×˜ackm Document18 0.0003348961821835231 2.302585092994046
ajcmc Document18 0.0003348961821835231 2.302585092994046
−jc−mcajc−mc Document18 0.0003348961821835231 2.302585092994046
ρ† Document18 0.0010046885465505692 2.302585092994046
eτˆhρ† Document18 0.0003348961821835231 2.302585092994046
kmace−τˆh Document18 0.0003348961821835231 2.302585092994046
β Document18 0.002344273275284662 2.3

00000020040060080100120140 Document19 0.00014652014652014652 2.302585092994046
00020040060080100 Document19 0.00014652014652014652 2.302585092994046
0v2ptnp Document19 0.00014652014652014652 2.302585092994046
tngevchydrocoalfrag Document19 0.00029304029304029304 2.302585092994046
aalice Document19 0.00014652014652014652 2.302585092994046
tngevchydrofrag Document19 0.00014652014652014652 2.302585092994046
bcms Document19 0.00014652014652014652 2.302585092994046
250c Document19 0.00014652014652014652 2.302585092994046
150v2ptnp Document19 0.00014652014652014652 2.302585092994046
tngevcdpp Document19 0.00014652014652014652 2.302585092994046
ppb Document19 0.002490842490842491 2.302585092994046
ofπ Document19 0.00014652014652014652 2.302585092994046
2ptn2 Document19 0.00014652014652014652 2.302585092994046
slight Document19 0.00014652014652014652 2.302585092994046
mediate Document19 0.00014652014652014652 2.302585092994046
ptare Document19 0.00014652014652014652 2.302585092994046
scal Docu

p0 Document20 0.0002744990392533626 2.302585092994046
e−p22mriϵ Document20 0.0002744990392533626 2.302585092994046
gce Document20 0.0002744990392533626 2.302585092994046
⟨rgcer′⟩zd3p Document20 0.0002744990392533626 2.302585092994046
2π3ψprψ∗ Document20 0.0002744990392533626 2.302585092994046
pr′ Document20 0.0002744990392533626 2.302585092994046
e−p2 Document20 0.0008234971177600879 2.302585092994046
2mriϵ Document20 0.0008234971177600879 2.302585092994046
andψpr Document20 0.0002744990392533626 2.302585092994046
ψpr Document20 0.0002744990392533626 2.302585092994046
∞x Document20 0.0002744990392533626 2.302585092994046
l02l Document20 0.0002744990392533626 2.302585092994046
1ileiσlflη Document20 0.0002744990392533626 2.302585092994046
prplˆp·ˆr Document20 0.0002744990392533626 2.302585092994046
ηkcpflare Document20 0.0002744990392533626 2.302585092994046
eq9 Document20 0.0002744990392533626 2.302585092994046
pds Document20 0.0002744990392533626 2.302585092994046
−g2 Document20 0.0002

In [6]:
df["efficient"]

OrderedSet(['Document1', 'Document3', 'Document4', 'Document5', 'Document8', 'Document13', 'Document14', 'Document15'])

In [7]:
# Calculate the TF-IDF scores
for term, doc_tf in tf.items():
    for doc_id, term_freq in doc_tf.items():
        tfidf[term][doc_id] = term_freq * idf[term]

In [8]:
for term, doc_id in tfidf.items():
    print(term+ " => ",tfidf[term])

efficient =>  defaultdict(<class 'float'>, {'Document1': 0.002409877576828336, 'Document3': 0.00021639774965251263, 'Document4': 0.00019937770192703411, 'Document5': 0.003022888774266462, 'Document8': 0.00019917877181785276, 'Document13': 0.0001778840230795639, 'Document14': 0.00017871703138267047, 'Document15': 0.00010385065629051524})
video =>  defaultdict(<class 'float'>, {'Document1': 0.01691712053328301, 'Document2': 0.00198894456401706, 'Document3': 0.0011270685862763339, 'Document6': 0.0068168251999637})
object =>  defaultdict(<class 'float'>, {'Document1': 0.010331539081144314, 'Document3': 0.00042675696117832425, 'Document6': 0.00023464955844851982, 'Document8': 0.0005237326637558881, 'Document9': 0.00014726078334951294, 'Document11': 0.00010308544034747424})
segmentation =>  defaultdict(<class 'float'>, {'Document1': 0.011848571096240023, 'Document4': 0.00030061742929486543, 'Document5': 0.0007012072244181341, 'Document6': 0.004843878068365411, 'Document7': 0.0097007656570576

convolution =>  defaultdict(<class 'float'>, {'Document1': 0.0002111846099506758, 'Document4': 0.0008037143133253934, 'Document5': 0.0018747092748213165})
obtained =>  defaultdict(<class 'float'>, {'Document1': 7.549719786164043e-05, 'Document5': 8.377462797081562e-05, 'Document6': 6.430086554577132e-05, 'Document8': 0.00014351812045486698, 'Document9': 8.070755293920069e-05, 'Document10': 0.0002801870683728082, 'Document11': 0.000367229668290765, 'Document12': 0.00019186479421887478, 'Document13': 3.450843970050151e-05, 'Document15': 3.741475776457028e-05, 'Document17': 9.551197624561117e-05, 'Document18': 0.0004817181387337255, 'Document19': 0.0002107560970342717, 'Document20': 0.00031587380999372036})
average =>  defaultdict(<class 'float'>, {'Document1': 0.00039223002280253273, 'Document6': 0.00013362472077684855, 'Document8': 0.0002982474436296435, 'Document9': 0.0005870190777513457, 'Document11': 0.00011740710933928132, 'Document12': 7.974349749974928e-05, 'Document13': 1.0244653

katsaggelos =>  defaultdict(<class 'float'>, {'Document1': 0.00030213687088230496})
ak =>  defaultdict(<class 'float'>, {'Document1': 0.00030213687088230496})
collaborative =>  defaultdict(<class 'float'>, {'Document1': 0.00024893320888149604, 'Document13': 6.501884930035921e-05})
foregroundbackground =>  defaultdict(<class 'float'>, {'Document1': 0.0006042737417646099})
integration =>  defaultdict(<class 'float'>, {'Document1': 0.00027550770883051504, 'Document3': 0.00014225232039277475, 'Document10': 0.00025561775614771793, 'Document13': 5.396988096332909e-05, 'Document15': 0.0006826779324350875, 'Document17': 0.0013941860883116568})
yangzweiyyangycollaborativevideoobjectsegmentationbymultiscale =>  defaultdict(<class 'float'>, {'Document1': 0.00030213687088230496})
yangzyangydecouplingfeaturesinhierarchicalpropagationforvideoobject =>  defaultdict(<class 'float'>, {'Document1': 0.00030213687088230496})
63 =>  defaultdict(<class 'float'>, {'Document1': 0.00012023234901904673, 'Docume

infig2 =>  defaultdict(<class 'float'>, {'Document3': 0.00031200340013469457})
ρmapsthethreepolylineentitytypes =>  defaultdict(<class 'float'>, {'Document3': 0.00031200340013469457})
lrgandthreeboundingbox =>  defaultdict(<class 'float'>, {'Document3': 0.00031200340013469457})
entitytypes =>  defaultdict(<class 'float'>, {'Document3': 0.00031200340013469457})
pvotoimagepixellocationsassigning2channelstoeachentity =>  defaultdict(<class 'float'>, {'Document3': 0.00031200340013469457})
attributes =>  defaultdict(<class 'float'>, {'Document3': 0.001560017000673473})
directional =>  defaultdict(<class 'float'>, {'Document3': 0.00031200340013469457})
∆ =>  defaultdict(<class 'float'>, {'Document3': 0.00014225232039277475, 'Document5': 0.0003057140723642043, 'Document9': 0.00014726078334951294, 'Document15': 0.004505674354071578, 'Document18': 0.0014063256858656097, 'Document20': 0.000576350329123622})
dx =>  defaultdict(<class 'float'>, {'Document3': 0.00025706232857532267, 'Document20': 0

random =>  defaultdict(<class 'float'>, {'Document4': 0.00022878669959404622, 'Document6': 0.0004096069431712808, 'Document7': 0.001914065987203547, 'Document8': 0.00045711685301778757, 'Document11': 0.0009897091565804895, 'Document13': 6.280695948140071e-05, 'Document19': 0.0001342550522892535})
n0 =>  defaultdict(<class 'float'>, {'Document4': 0.0005749276137313472})
τ2φcan =>  defaultdict(<class 'float'>, {'Document4': 0.0005749276137313472})
implemented =>  defaultdict(<class 'float'>, {'Document4': 0.0006012348585897309, 'Document7': 0.00035928761692806215, 'Document10': 0.00029315140110200535, 'Document11': 0.00011822199571150198, 'Document13': 0.0004332618563788584})
nufft =>  defaultdict(<class 'float'>, {'Document4': 0.01264840750208964})
ieas =>  defaultdict(<class 'float'>, {'Document4': 0.0005749276137313472})
product =>  defaultdict(<class 'float'>, {'Document4': 0.000346140914137301, 'Document9': 0.0001944584599691248, 'Document10': 0.006075797053849046, 'Document15': 0.0

raveling =>  defaultdict(<class 'float'>, {'Document6': 0.0005146591624930814})
tumor =>  defaultdict(<class 'float'>, {'Document6': 0.00042403218258513215, 'Document7': 0.003396812864612142})
progres =>  defaultdict(<class 'float'>, {'Document6': 0.0005146591624930814})
sion =>  defaultdict(<class 'float'>, {'Document6': 0.00042403218258513215, 'Document19': 0.00027796629815177745})
identifying =>  defaultdict(<class 'float'>, {'Document6': 0.0003098556909074409, 'Document9': 0.0001944584599691248, 'Document10': 0.0003375442807693914, 'Document13': 0.000166290707516609})
abnormal =>  defaultdict(<class 'float'>, {'Document6': 0.00042403218258513215, 'Document7': 0.000566135477435357})
morphol =>  defaultdict(<class 'float'>, {'Document6': 0.0005146591624930814})
ogy =>  defaultdict(<class 'float'>, {'Document6': 0.00042403218258513215, 'Document13': 3.250942465017961e-05})
growth =>  defaultdict(<class 'float'>, {'Document6': 0.00026910433713141175, 'Document9': 0.00016888382723045817

revalidation =>  defaultdict(<class 'float'>, {'Document7': 0.0006871337191865253})
clinical =>  defaultdict(<class 'float'>, {'Document7': 0.000566135477435357, 'Document13': 6.501884930035921e-05})
436– =>  defaultdict(<class 'float'>, {'Document7': 0.0006871337191865253})
438 =>  defaultdict(<class 'float'>, {'Document7': 0.0006871337191865253})
july =>  defaultdict(<class 'float'>, {'Document7': 0.001698406432306071, 'Document13': 0.0005526602190530534})
carlos =>  defaultdict(<class 'float'>, {'Document7': 0.0006871337191865253})
cardenas =>  defaultdict(<class 'float'>, {'Document7': 0.0006871337191865253})
jinzhong =>  defaultdict(<class 'float'>, {'Document7': 0.0006871337191865253})
anderson =>  defaultdict(<class 'float'>, {'Document7': 0.0004802858586792302, 'Document13': 8.273894265032388e-05, 'Document15': 0.00020931693489843937})
laurence =>  defaultdict(<class 'float'>, {'Document7': 0.0006871337191865253})
kristy =>  defaultdict(<class 'float'>, {'Document7': 0.00068713

sha =>  defaultdict(<class 'float'>, {'Document9': 0.00032298851072998263})
dow =>  defaultdict(<class 'float'>, {'Document9': 0.00032298851072998263})
expo =>  defaultdict(<class 'float'>, {'Document9': 0.00032298851072998263})
tildewidest =>  defaultdict(<class 'float'>, {'Document9': 0.0006459770214599653})
tildewidestgπtst =>  defaultdict(<class 'float'>, {'Document9': 0.00032298851072998263})
wheregsatisﬁes =>  defaultdict(<class 'float'>, {'Document9': 0.00032298851072998263})
gπ− =>  defaultdict(<class 'float'>, {'Document9': 0.0009689655321899478})
gπ =>  defaultdict(<class 'float'>, {'Document9': 0.0009689655321899478})
1−ε =>  defaultdict(<class 'float'>, {'Document9': 0.0019379310643798957})
reﬂecting =>  defaultdict(<class 'float'>, {'Document9': 0.0016149425536499129})
π± =>  defaultdict(<class 'float'>, {'Document9': 0.0006459770214599653})
tildewidesprecisely =>  defaultdict(<class 'float'>, {'Document9': 0.00032298851072998263})
1−εs =>  defaultdict(<class 'float'>, {'D

slovak =>  defaultdict(<class 'float'>, {'Document10': 0.0005606489147781948})
219 =>  defaultdict(<class 'float'>, {'Document10': 0.00046192354148670106, 'Document13': 3.250942465017961e-05})
226 =>  defaultdict(<class 'float'>, {'Document10': 0.00046192354148670106, 'Document15': 0.00024673169266300967})
hakkak =>  defaultdict(<class 'float'>, {'Document10': 0.0005606489147781948})
aeiny =>  defaultdict(<class 'float'>, {'Document10': 0.0005606489147781948})
vafaei =>  defaultdict(<class 'float'>, {'Document10': 0.0011212978295563896})
zadeh =>  defaultdict(<class 'float'>, {'Document10': 0.0011212978295563896})
hanifah =>  defaultdict(<class 'float'>, {'Document10': 0.0011212978295563896})
smes =>  defaultdict(<class 'float'>, {'Document10': 0.0005606489147781948})
mediating =>  defaultdict(<class 'float'>, {'Document10': 0.0005606489147781948})
productivity =>  defaultdict(<class 'float'>, {'Document10': 0.0005606489147781948})
394 =>  defaultdict(<class 'float'>, {'Document10': 0.

registering =>  defaultdict(<class 'float'>, {'Document12': 0.0009214025982369132})
getting =>  defaultdict(<class 'float'>, {'Document12': 0.00018491321343469262, 'Document13': 4.7511630719031134e-05, 'Document14': 0.0003102717907609424, 'Document15': 0.00018029579413706472})
credit =>  defaultdict(<class 'float'>, {'Document12': 0.001771353860771131, 'Document13': 3.250942465017961e-05})
protecting =>  defaultdict(<class 'float'>, {'Document12': 0.00030713419941230435})
paying =>  defaultdict(<class 'float'>, {'Document12': 0.000214677592694958, 'Document13': 5.515929510021592e-05, 'Document14': 0.0003602143940094226})
taxes =>  defaultdict(<class 'float'>, {'Document12': 0.0006142683988246087})
borders =>  defaultdict(<class 'float'>, {'Document12': 0.0006142683988246087})
enforcing =>  defaultdict(<class 'float'>, {'Document12': 0.000214677592694958, 'Document13': 5.515929510021592e-05, 'Document16': 0.0008547200809527882})
contracts =>  defaultdict(<class 'float'>, {'Document12': 

identified =>  defaultdict(<class 'float'>, {'Document13': 0.0003901130958021553, 'Document19': 0.0038915281741248847})
design3 =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
articulated =>  defaultdict(<class 'float'>, {'Document13': 0.000157830220919463})
intuition =>  defaultdict(<class 'float'>, {'Document13': 0.0003901130958021553, 'Document15': 0.00024673169266300967})
repeatedly =>  defaultdict(<class 'float'>, {'Document13': 7.89151104597315e-05})
emphasized =>  defaultdict(<class 'float'>, {'Document13': 0.0003551179970687918})
policymaking =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
circum =>  defaultdict(<class 'float'>, {'Document13': 7.89151104597315e-05})
stances =>  defaultdict(<class 'float'>, {'Document13': 7.89151104597315e-05})
tradeoffs =>  defaultdict(<class 'float'>, {'Document13': 0.00011837266568959725})
altering =>  defaultdict(<class 'float'>, {'Document13': 7.89151104597315e-05})
advising =>  defaultdict(<c

precautions =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
quently =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
viding =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
2005b44 =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
arranging =>  defaultdict(<class 'float'>, {'Document13': 7.89151104597315e-05})
eligibility =>  defaultdict(<class 'float'>, {'Document13': 0.000157830220919463})
feasible =>  defaultdict(<class 'float'>, {'Document13': 0.0003551179970687918})
rtoc =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
renal =>  defaultdict(<class 'float'>, {'Document13': 0.00011837266568959725})
oversight =>  defaultdict(<class 'float'>, {'Document13': 0.00011837266568959725})
43unos =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
nonprofit =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
44according =>  defaultdict(<cla

sequent =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
363 =>  defaultdict(<class 'float'>, {'Document13': 7.89151104597315e-05})
fatality =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
fueled =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
trig =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
gered =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
ldlt =>  defaultdict(<class 'float'>, {'Document13': 0.00019728777614932876})
josefson =>  defaultdict(<class 'float'>, {'Document13': 7.89151104597315e-05})
allure =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
plainly =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
practices—a =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
design—liver =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
detailing =>  defaultdict(<class 'float'>, {

lauronen =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
jørn =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
petter =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
lindahl =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
karin =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
skov =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
søren =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
schwarz =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
sørensen =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
wennberg =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
lindner =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
scandiatransplant =>  defaultdict(<class 'float'>, {'Document13': 3.945755522986575e-05})
e1549 =>  defaultdict(<class 'float'>,

γ2e−κl =>  defaultdict(<class 'float'>, {'Document15': 0.0002994648319669718})
8κ3 =>  defaultdict(<class 'float'>, {'Document15': 0.0002994648319669718})
8κ4lr =>  defaultdict(<class 'float'>, {'Document15': 0.0002994648319669718})
−48κ =>  defaultdict(<class 'float'>, {'Document15': 0.0002994648319669718})
3α =>  defaultdict(<class 'float'>, {'Document15': 0.0002994648319669718})
a13 =>  defaultdict(<class 'float'>, {'Document15': 0.0002994648319669718})
sates =>  defaultdict(<class 'float'>, {'Document15': 0.0002994648319669718})
athat =>  defaultdict(<class 'float'>, {'Document15': 0.0002994648319669718})
fitted =>  defaultdict(<class 'float'>, {'Document15': 0.00020931693489843937, 'Document17': 0.0005343419363991037, 'Document20': 0.00044178916070109804})
l¨ =>  defaultdict(<class 'float'>, {'Document15': 0.0012336584633150483, 'Document18': 0.0006353382400823447})
uscher =>  defaultdict(<class 'float'>, {'Document15': 0.000740195077989029, 'Document18': 0.0006353382400823447})

092 =>  defaultdict(<class 'float'>, {'Document19': 0.0006747502104011856})
fm =>  defaultdict(<class 'float'>, {'Document19': 0.0010121253156017784})
wc =>  defaultdict(<class 'float'>, {'Document19': 0.0006747502104011856})
ηs =>  defaultdict(<class 'float'>, {'Document19': 0.0010121253156017784})
003ζs =>  defaultdict(<class 'float'>, {'Document19': 0.0003373751052005928})
tswitch =>  defaultdict(<class 'float'>, {'Document19': 0.0006747502104011856})
502 =>  defaultdict(<class 'float'>, {'Document19': 0.008097002524814228})
thetrento =>  defaultdict(<class 'float'>, {'Document19': 0.0003373751052005928})
tuned =>  defaultdict(<class 'float'>, {'Document19': 0.0013495004208023713})
nonflow =>  defaultdict(<class 'float'>, {'Document19': 0.0003373751052005928})
subtraction =>  defaultdict(<class 'float'>, {'Document19': 0.0005559325963035549, 'Document20': 0.0005207576131995282})
magnitudes =>  defaultdict(<class 'float'>, {'Document19': 0.0003373751052005928})
peripheral =>  default

In [9]:
def get_query_tfidf(query):
    query_terms = query.lower().split() # Tokenize the query
    
    '''
    a dictionary to store TF for each term are there in query
    keys are terms in query and values are term TF
    '''
    tf_query_term = defaultdict(float)
    
    '''
    a dictionary to store count duplicate terms are there in query
    keys are terms in query and values are counts
    '''
    term_count = defaultdict(float)
    
    '''
    a dictionary to store TF_IDF for each term are there in query
    keys are terms in query and values are term TF_IDF
    '''
    tfidf_query_term = defaultdict(float)
    
    '''
    a dictionary to store TF_IDF score for each query term
    keys are query terms and values are terms TF_IDF
    '''
    result = defaultdict(float)

    # calculate tf_idf for query tokens
    for term in query_terms:
        term_count[term] += 1
    for term, count in term_count.items():
        tf_query_term[term] = count / len(query_terms)
    for term, tf_value in tf_query_term.items():
        idf_value = idf[term]
        tfidf_query_term[term] = tf_value * idf_value

    query_terms = set(query_terms) # ignore same and duplicate words
    for term in query_terms:
        result[term] = tfidf_query_term[term]
    return result

In [10]:
def cosine_similarity(query_dic_tfidf, tfidf):
    similarities = defaultdict(float)
    query_magnitude = math.sqrt(sum(score ** 2 for score in query_dic_tfidf.values()))
    
    for word, doc_tfidf in tfidf.items():
        if word in query_dic_tfidf:
            for doc_name, doc_score in doc_tfidf.items():
                doc_magnitude = math.sqrt(sum(val ** 2 for val in doc_tfidf.values()))
                if doc_magnitude > 0:
                    similarities[doc_name] = \
                    query_dic_tfidf[word] * doc_score / (query_magnitude * doc_magnitude)
    similarities = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
    return similarities

In [11]:
n = 20 # return Top n of documents
# query = "the"
# query = "kasra"
query = "γSF"

query_tfidf_results = get_query_tfidf(query)
results = cosine_similarity(query_tfidf_results, tfidf)
if len(results) > 0:
    print(f"Top {n} most relevant documents:")
    for doc_id, score in results[:n]:
        if score != 0:
            print(f"{doc_id}: {score}")
else :
    print(f"'{query}' NOT FOUND!")

Top 20 most relevant documents:
Document17: 1.0
