
# TFIDF and cosine similarity - toy example
#### Inspired by, and partly taken from the contributions of <a href="https://markhneedham.com/blog/2016/07/27/scitkit-learn-tfidf-and-cosine-similarity-for-computer-science-papers/">Mark Needham</a>  and <a href="https://towardsdatascience.com/tf-idf-for-document-ranking-from-scratch-in-python-on-real-world-dataset-796d339a4089">William Scott</a>

### This Notebook demonstrate the use of TFIDF in retrieval. <br> ~6000 very short documents (stored in the papers/ directory) are read into memory, preprocessed to various degrees and are indexed for retrieval.<br> 
#### A toy of a toy (10 documents) are available in the directory <b>papers1/</b>

In [140]:

from IPython.display import HTML, display
import tabulate
import glob
#
corpus = [] # A list of tuples

i=0
for file in glob.glob("papers/*.txt"): #"papers1/*.txt" - 10 documents ...
    with open(file, "r") as paper:
#        filesfile.write(file[7:-4]+":  "+paper.read()+"\n")
        corpus.append((file, paper.read()))
        i+=1

#Define N, the number of documents
N=len(corpus)
print(corpus[0])


('papers/28418.txt', 'The complexity of perfect zero-knowledge')


In [141]:
def token_split(doc_or_query):
    tokens = doc_or_query.lower().split()
    processed_doc_or_query = []
    for w in tokens:

      
        if w not in stopwords.words("english"):
            processed_doc_or_query.append(w)
    return processed_doc_or_query

# Preprocessing
## we introduce preprocessing in two steps that use the nltk-package to different degrees
### 1. simple_preprocess() which only uses stop-words and punct. removal
### 2. preprocess():             here we can comment in / out different steps, to see the effect
### remember to call the correct function both for texts AND queries when experimenting with different preprocessing

### Define the simple preprocessing.

In [142]:
#### 1. simple_preprocess:
#### HERE WE ONLY IMPORT STOPWORDS LIST FROM NLTK, AND HANDLE PUNCTUATION

#### The nltk-package has a lot of useful tools for language technology.<br> 
from nltk.corpus import stopwords

symbols = r"!\"#$%&()*+-—.,/:;<=>?@[\]^_`{|}~"

# HERE WE USE THE STOPWORDS (NO Stemming, Lemmatization or any other stuff)
def simple_preprocess(doc_or_query):
    # returns a list of tokens
    txt = doc_or_query

    # REMOVE PUNCTUATION
    for ch in symbols:
        txt = txt.replace(ch, " ")  # re.sub(string.punctuation, " ", doc[1])
    return token_split(txt)
    # txt.lower() standardizes to low-case characters


### Define the more elaborate preprocessing

In [143]:
#### 2. preprocess:
#### MORE ELABORATE PREPROCESSING WHERE STEPS CAN BE SWITCHED OUT
#### BY COMMENTING OUT LINE

import preprocess as pp  # We import the python file preprocess.py with preprocessing function


def preprocess(doc_or_query):
    print("before:",doc_or_query)
    doc_or_query = pp.convert_lower_case(doc_or_query)
    
    doc_or_query = pp.remove_punctuation(
        doc_or_query
    )  # remove comma seperately
    
    doc_or_query = pp.remove_apostrophe(doc_or_query)
    doc_or_query = pp.remove_stop_words(doc_or_query)
    doc_or_query = pp.convert_numbers(doc_or_query)
    doc_or_query = pp.stemming(doc_or_query)
    doc_or_query = pp.remove_punctuation(doc_or_query)
    doc_or_query = pp.convert_numbers(doc_or_query)
    doc_or_query = pp.stemming(
        doc_or_query
    )  
    # needed again as we need to stem the words
    doc_or_query = pp.remove_punctuation(
        doc_or_query
    )  
    # needed again as num2word is giving few hypens and commas fourty-one
    doc_or_query = pp.remove_stop_words(
        doc_or_query
    )
    print("after:",doc_or_query)

    return token_split(doc_or_query)

In [144]:
import sys
import re
import numpy as np
import string
### aDF calculated in advance
symbols = r"!\"#$%&()*+-—.,/:;<=>?@[\]^_`{|}~"
  
DF = {}
c=0
processed_corpus=[]#An array of token arrays
ctr=0
for doc in corpus:
    processed_text=""
    txt=doc[1]
    processed_tokens=preprocess(txt)
    
    #DF includes actually our vocabulary, and for each word its global weight 
    for w in processed_tokens:
        try:
            # DF[w] is a set, and each document will only be added once.
            DF[w].add(ctr)
        except:
            DF[w] = {ctr}
                
    processed_corpus.append(processed_tokens)
    ctr += 1
print("ctr",ctr)
# At the end ctr = N

# WE only need the number of distinct documents indexed  by each word.
for j in DF:
    DF[j]=len(DF[j])

    #Print the first token array in processed_corpus
processed_corpus[0]
DF

before: The complexity of perfect zero-knowledge
after:  complex perfect zero knowledg
before: Lottery and stride scheduling: flexibile proportional-share resource management
after:  lotteri stride schedul flexibil proport share resourc manag
before: An investigation of documents from the World Wide Web
after:  investig document world wide web
before: Performance of multiversion and distributed two-phase locking concurrency control mechanisms in distributed databases
after:  perform multiv distribut two phase lock concurr control mechan distribut databa
before: Benchmarking Anomaly-Based Detection Systems
after:  benchmark anomali base detect system
before: Detecting causal relationships in distributed computations: in search of the holy grail
after:  detect causal relationship distribut comput search holi grail
before: Research problems in data warehousing
after:  research problem data wareh
before: Main Memory Database Systems: An Overview
after:  main memori databa system overview
b

before: On the encipherment of search trees and random access files
after:  enciph search tree random access file
before: THE MIT ALEWIFE MACHINE: A LARGE-SCALE DISTRIBUTED-MEMORY MULTIPROCESSOR
after:  mit alewif machin larg scale distribut memori multiprocessor
before: BASE: Using abstraction to improve fault tolerance
after:  base use abstract improv fault toler
before: A Software Platform for Testing Intrusion Detection Systems
after:  softwar platform test intru detect system
before: Embedded inodes and explicit grouping: exploiting disk bandwidth for small files
after:  emb inod explicit group exploit disk bandwidth small file
before: A decade of software model checking with SLAM
after:  decad softwar model check slam
before: Fast Replicated State Machines Over Partitionable Networks
after:  fast replic state machin partit network
before: Operating System Concepts, 4th Ed.
after:  oper system concept 4th ed
before: StackGuard: automatic adaptive detection and prevention of buffer

after:  unif base pointer analysi direct assign
before: Weak-consistency group communication and membership
after:  weak consist group commun membership
before: An Attack on the Last Two Rounds of MD4
after:  attack last two round md4
before: Free transactions with Rio Vista
after:  free transact rio vista
before: Using queries for distributed monitoring and forensics
after:  use queri distribut monitor foren
before: Knowledge and common knowledge in a distributed environment
after:  knowledg common knowledg distribut environ
before: Processor scheduling in shared memory multiprocessors
after:  processor schedul share memori multiprocessor
before: Removing backing store administration from the CAP operating system
after:  remov back store administr cap oper system
before: An Experimental Analysis of BGP Convergence Time
after:  experi analysi bgp converg time
before: Performance analysis and visualization
after:  perform analysi visual
before: Towards a Theory of Abstract Data Types: A

after:  gain effici transport servic appropri design implement choic
before: Protocol Verification as a Hardware Design Aid
after:  protocol verif hardwar design aid
before: The design, implementation and evaluation of SMART: a scheduler for multimedia applications
after:  design implement evalu smart schedul multimedia applic
before: A hybrid quarantine defense
after:  hybrid quarantin defen
before: Contention in shared memory algorithms
after:  content share memori algorithm
before: Life, death, and the critical transition: finding liveness bugs in systems code
after:  life death critic transit find live bug system code
before: The future of magnetic data storage technology
after:  futur magnet data storag technolog
before: Simulation-based `STRESS' Testing Case Study: A Multicast Routing Protocol
after:  simul base stress test case studi multicast rout protocol
before: An IPv6 Provider-Based Unicast Address Format
after:  ipv6 provid base unicast address format
before: Fine-grained 

after:  charact refer local www
before: Java Security: Present and Near Future
after:  java secur present near futur
before: Fast isolation of arbitrary forwarding faults
after:  fast isol arbitrari forward fault
before: Design and implementation of the Wisconsin storage system
after:  design implement wisconsin storag system
before: Cryptographic Computation: Secure Faut-Tolerant Protocols and the Public-Key Model
after:  cryptograph comput secur faut toler protocol public key model
before: Replicated distributed programs (fault tolerance, communication protocols, operating systems, remote procedure call, computer networks)
after:  replic distribut program fault toler commun protocol oper system remot procedur call comput network
before: Modeling concepts for VLSI CAD objects
after:  model concept vlsi cad object
before: A garbage-collecting typed assembly language
after:  garbag collect type assembl languag
before: Spawn: A Distributed Computational Economy
after:  spawn distribut co

after:  journal versu soft updat asynchron meta data protect file system
before: Direct Minimum-Knowledge Computations
after:  direct minimum knowledg comput
before: Freeblock Scheduling Outside of Disk Firmware
after:  freeblock schedul outsid disk firmwar
before: Providing high availability using lazy replication
after:  provid high avail use lazi replic
before: Generalized Isolation Level Definitions
after:  gener isol level definit
before: Real-time causal message ordering in multimedia systems
after:  real time causal messag order multimedia system
before: P-Store: Genuine Partial Replication in Wide Area Networks
after:  store genuin partial replic wide area network
before: “One size fits all” database architectures do not work for DSS
after:  one size fit databa architectur work dss
before: On the cost of fault-tolerant consensus when there are no faults: preliminary version
after:  cost fault toler consensu fault preliminari version
before: Scheduling algorithms for modern disk

after:  interfac execut model fluke kernel
before: Protecting Free Expression Online with Freenet
after:  protect free express onlin freenet
before: Synchronous and asynchronous
after:  synchron asynchron
before: Composite registers
after:  composit regist
before: Parametric polymorphism for XML
after:  parametr polymorph xml
before: Accelerating Nonnumerical Processing by an Extended Vector Processor
after:  accel nonnum process extend vector processor
before: Matchmaking frameworks for distributed resource management
after:  matchmak framework distribut resourc manag
before: Linearizability: a correctness condition for concurrent objects
after:  lineariz correct condit concurr object
before: Transactional file systems can be fast
after:  transact file system fast
before: NADIR: An automated system for detecting network intrusion and misuse
after:  nadir autom system detect network intru misu
before: Amoeba: A Distributed Operating System for the 1990s
after:  amoeba distribut oper sy

before: The JPEG still picture compression standard
after:  jpeg still pictur compress standard
before: ANTLR: a predicated-LL(k) parser generator
after:  antlr predic parser gener
before: Myriad: Cost-Effective Disaster Tolerance
after:  myriad cost effect disast toler
before: Computers and epidemiology
after:  comput epidemiolog
before: Flow synchronization protocol
after:  flow synchron protocol
before: An in-cache address translation mechanism
after:  cach address translat mechan
before: Packing Messages as a Tool for Boosting the Performance of Total Ordering Protocls
after:  pack messag tool boost perform total order protocl
before: Optimizing the performance of a relational algebra database interface
after:  optim perform relat algebra databa interfac
before: The phoenix recovery system: rebuilding from the ashes of an internet catastrophe
after:  phoenix recoveri system rebuild ash internet catastroph
before: Programming constructs for database system implementation in EXODUS
a

after:  heterogen make gnutella scalabl
before: Network Time Protocol (version 2) specification and implementation
after:  network time protocol version specif implement
before: Issues in the design and use of a distributed file system
after:  issu design use distribut file system
before: Reducing risks from poorly chosen keys
after:  reduc risk poorli chosen key
before: Graceful Quorum Reconfiguration in a Robust Emulation of Shared Memory
after:  grace quorum reconfigur robust emul share memori
before: Thrifty Generic Broadcast
after:  thrifti gener broadcast
before: Parallelism in relational database management systems
after:  parallel relat databa manag system
before: Authenticated Byzantine Fault Tolerance Without Public-Key Cryptography
after:  authent byzantin fault toler without public key cryptographi
before: Cut-and-paste file-systems: integrating simulators and file-systems
after:  cut past file system integr simul file system
before: Routing with polynomial communication-sp

after:  implement perform stabl storag servic unix
before: The Design of XPRS
after:  design xpr
before: Compilers: principles, techniques, and tools
after:  compil principl techniqu tool
before: Queue response to input correlation functions: discrete spectral analysis
after:  queue respon input correl function discret spectral analysi
before: The Bayou Architecture: Support for Data Sharing Among Mobile Users
after:  bayou architectur support data share among mobil user
before: Verifiable secret-ballot elections
after:  verifi secret ballot elect
before: On the performance of object clustering techniques
after:  perform object cluster techniqu
before: Overview of multidatabase transaction management
after:  overview multidataba transact manag
before: Experiences with the Amoeba distributed operating system
after:  experi amoeba distribut oper system
before: Shared virtual memory on loosely coupled multiprocessors
after:  share virtual memori loo coupl multiprocessor
before: Single ter

after:  gener processor share approach flow control integr servic network singl node case
before: Ordered and reliable multicast communication
after:  order reliabl multicast commun
before: Log files: an extended file service exploiting write-once storage
after:  log file extend file servic exploit write storag
before: Disk cache—miss ratio analysis and design considerations
after:  disk cache—miss ratio analysi design consid
before: Conditions on input vectors for consensus solvability in asynchronous distributed systems
after:  condit input vector consensu solvabl asynchron distribut system
before: Expert Oracle Database Architecture: Oracle Database Programming 9i, 10g, and 11g Techniques and Solutions, Second Edition
after:  expert oracl databa architectur oracl databa program 9i 10g 11g techniqu solut second edit
before: Proving sequential consistency of high-performance shared memories (extended abstract)
after:  prove sequenti consist high perform share memori extend abstract
be

after:  map internet
before: Recovery semantics for a DB/DC system
after:  recoveri semant db dc system
before: Distributed Concurrency Control Performance: A Study of Algorithms, Distribution, and Replication
after:  distribut concurr control perform studi algorithm distribut replic
before: Assertations about past and future in Highways: Global flush broadcast and flush-vector-time
after:  assert past futur highway global flush broadcast flush vector time
before: A new look at fault tolerant network routing
after:  new look fault toler network rout
before: Issues of fault tolerance in concurrent computations (databases, reliability, transactions, agreement protocols, distributed computing)
after:  issu fault toler concurr comput databa reliabl transact agreement protocol distribut comput
before: Synchronous atomic broadcast for redundant broadcast for redundant channels
after:  synchron atom broadcast redund broadcast redund channel
before: Additional comments on a problem in concurre

before: UIO: a uniform I/O system interface for distributed systems
after:  uio uniform system interfac distribut system
before: “Data in your face”: push technology in perspective
after:  data face push technolog perspect
before: Preliminary Ada reference manual
after:  preliminari ada refer manual
before: A mean value performance model for locking in databases: the no-waiting case
after:  mean valu perform model lock databa wait case
before: An Evaluation of Starburst's Memory Resident Storage Component
after:  evalu starburst memori resid storag compon
before: Mimicry attacks on host-based intrusion detection systems
after:  mimicri attack host base intru detect system
before: Communicating sequential processes
after:  commun sequenti process
before: Comparison of rate-based service disciplines
after:  comparison rate base servic disciplin
before: Efficient solutions to the replicated log and dictionary problems
after:  effici solut replic log dictionari problem
before: Improving th

after:  award best paper venti new approach archiv data storag
before: High level programming for distributed computing
after:  high level program distribut comput
before: The structuring of systems using upcalls
after:  structur system use upcal
before: Analyzing peer-to-peer traffic across large networks
after:  analyz peer peer traffic across larg network
before: Symbolic security analysis of ruby-on-rails web applications
after:  symbol secur analysi rubi rail web applic
before: Atomic Data Access in Distributed Hash Tables
after:  atom data access distribut hash tabl
before: Protecting Key Exchange and Management Protocols Against Resource Clogging Attacks
after:  protect key exchang manag protocol resourc clog attack
before: Safety, Visibility, and Performance in a Wide-Area File System
after:  safeti visibl perform wide area file system
before: The Mini and Micro Industries
after:  mini micro industri
before: Increasing availability in partitioned database systems
after:  increa

after:  load balanc heurist process behavior
before: MODIST: transparent model checking of unmodified distributed systems
after:  modist transpar model check unmodifi distribut system
before: Distributed Computing with Load-Managed Active Storage
after:  distribut comput load manag activ storag
before: Process logic: preliminary report
after:  process logic preliminari report
before: Low contention linearizable counting
after:  low content lineariz count
before: Functional programs that explain their work
after:  function program explain work
before: Analysis and simulation of congestion in packet-switched networks.
after:  analysi simul congest packet switch network
before: Tangler: a censorship-resistant publishing system based on document entanglements
after:  tangler censorship resist publish system base document entangl
before: Benchmarking Database Systems A Systematic Approach
after:  benchmark databa system systemat approach
before: Iterators, schedulers, and distributed-memory

after:  stronger semant low latenc geo replic storag
before: The peer sampling service: experimental evaluation of unstructured gossip-based implementations
after:  peer sampl servic experi evalu unstructur gossip base implement
before: Specification and execution of transactional workflows
after:  specif execut transact workflow
before: A comparison of mechanisms for improving TCP performance over wireless links
after:  comparison mechan improv tcp perform wireless link
before: The YAGS branch prediction scheme
after:  yag branch predict scheme
before: Attested append-only memory: making adversaries stick to their word
after:  attest append memori make adversari stick word
before: Structuring computer-mediated communication systems to avoid information overload
after:  structur comput mediat commun system avoid inform overload
before: Impact of Deep Submicron Technology on Dependability of VLSI Circuits
after:  impact deep submicron technolog depend vlsi circuit
before: Fault-tolerant

before: Heavy-tailed probability distributions in the World Wide Web
after:  heavi tail probabl distribut world wide web
before: Models of LCF.
after:  model lcf
before: A Tour Through Cedar
after:  tour cedar
before: Metric (Extended Abstract): A kernel instrumentation system for distributed environments
after:  metric extend abstract kernel instrument system distribut environ
before: General purpose parallel architectures
after:  gener purpo parallel architectur
before: Analysis of distributed commit protocols
after:  analysi distribut commit protocol
before: Petal: distributed virtual disks
after:  petal distribut virtual disk
before: Fine-grained mobility in the Emerald system
after:  fine grain mobil emerald system
before: Terra: a virtual machine-based platform for trusted computing
after:  terra virtual machin base platform trust comput
before: Models for studying concurrency control performance: alternatives and implications
after:  model studi concurr control perform altern im

after:  eventu consist transact
before: Characteristics of user file-usage patterns
after:  characterist user file usag pattern
before: The Imposition of Protocols Over Open Distributed Systems
after:  imposit protocol open distribut system
before: Art of Software Testing
after:  art softwar test
before: The impact of operating system structure on memory system performance
after:  impact oper system structur memori system perform
before: How to write parallel programs: a guide to the perplexed
after:  write parallel program guid perplex
before: STRIDER: A Black-box, State-based Approach to Change and Configuration Management and Support
after:  strider black box state base approach chang configur manag support
before: Eventually Consistent
after:  eventu consist
before: Input-output performance evaluation: self-scaling benchmarks, predicted performance
after:  input output perform evalu self scale benchmark predict perform
before: Self-stabilizing systems in spite of distributed contro

before: Software-controlled caches in the VMP multiprocessor
after:  softwar control cach vmp multiprocessor
before: Dynamic Programming
after:  dynam program
before: Fair Public-Key Cryptosystems
after:  fair public key cryptosystem
before: Merging Partitioned Databases
after:  merg partit databa
before: Multicast routing for multimedia communication
after:  multicast rout multimedia commun
before: Multiprocessing compactifying garbage collection
after:  multiprocess compactifi garbag collect
before: Scale and performance in the Denali isolation kernel
after:  scale perform denali isol kernel
before: Further comments on Dijkstra's concurrent programming control problem
after:  comment dijkstra concurr program control problem
before: Concurrent algorithms for search structures (parallel, database)
after:  concurr algorithm search structur parallel databa
before: An ad hoc approach to the implementation of polymorphism
after:  ad hoc approach implement polymorph
before: Public-key crypt

after:  experi evalu assumpt independ multiv program
before: A precise inter-procedural data flow algorithm
after:  preci inter procedur data flow algorithm
before: Fbufs: a high-bandwidth cross-domain transfer facility
after:  fbuf high bandwidth cross domain transfer facil
before: Elements of discrete mathematics (McGraw-Hill computer science series)
after:  element discret mathemat mcgraw hill comput scienc seri
before: Resource management for a medium scale time-sharing operating system
after:  resourc manag medium scale time share oper system
before: Building a Java virtual machine for server applications: the Jvm on 0S/390
after:  build java virtual machin server applic jvm 0s three hundr nineti
before: A framework for alternate queueing: towards traffic management by PC-UNIX based routers
after:  framework altern queue toward traffic manag pc unix base router
before: Simulating synchronized clocks and common knowledge in distributed systems
after:  simul synchron clock common kn

after:  reach agreement presenc fault
before: Lattice basis reduction: improved practical algorithms and solving subset sum problems
after:  lattic basi reduct improv practic algorithm solv subset sum problem
before: Specifying concurrent objects as communicating processes
after:  specifi concurr object commun process
before: Designing disk arrays for high data reliability
after:  design disk array high data reliabl
before: Ubiquitous B-Tree
after:  ubiquit tree
before: AnnoDomini: from type theory to Year 2000 conversion tool
after:  annodomini type theori year two thousand conver tool
before: Compiler transformations for high-performance computing
after:  compil transform high perform comput
before: WebL - a programming language for the Web
after:  webl program languag web
before: A proposal for a new block encryption standard
after:  propo new block encrypt standard
before: Experience with modularity in consul
after:  experi modular consul
before: The Gamma Database Machine Project


before: A simple typed intermediate language for object-oriented languages
after:  simpl type intermedi languag object orient languag
before: One-way functions are essential for complexity based cryptography
after:  one way function essenti complex base cryptographi
before: A Structured Approach to Redundant Disk Array Implementation
after:  structur approach redund disk array implement
before: The vulnerability of vote assignments
after:  vulner vote assign
before: Inside ODBC
after:  insid odbc
before: Computer networks
after:  comput network
before: The Interfaces Group MIB
after:  interfac group mib
before: Dynamo: amazon's highly available key-value store
after:  dynamo amazon highli avail key valu store
before: Reliability Issues in Computing System Design
after:  reliabl issu comput system design
before: Wait Depth Limited Concurrency Control
after:  wait depth limit concurr control
before: Consistency in Hierarchical Database Systems
after:  consist hierarch databa system
befor

before: How to generate cryptographically strong sequences of pseudo-random bits
after:  gener cryptograph strong sequenc pseudo random bit
before: Simulation Modeling and Analysis
after:  simul model analysi
before: The complexity of parallel algorithms
after:  complex parallel algorithm
before: Space-efficient online computation of quantile summaries
after:  space effici onlin comput quantil summari
before: Datagram routing for internet multicasting
after:  datagram rout internet multicast
before: Partitioned operation and distributed data base management system catalogs
after:  partit oper distribut data base manag system catalog
before: On Concurrency Control by Multiple Versions
after:  concurr control multipl version
before: Observations on the dynamics of a congestion control algorithm: the effects of two-way traffic
after:  observ dynam congest control algorithm effect two way traffic
before: Federated database systems for managing distributed, heterogeneous, and autonomous dat

after:  join data redistribut algorithm hypercub
before: Niagara: A 32-Way Multithreaded Sparc Processor
after:  niagara thirti two way multithread sparc processor
before: On rigorous Transaction Scheduling
after:  rigor transact schedul
before: Performance problems in BSD4. 4TCP
after:  perform problem bsd4 4tcp
before: Converting a swap-based system to do paging in an architecture lacking page-referenced bits
after:  convert swap base system page architectur lack page referenc bit
before: Prefetching from Broadcast Disks
after:  prefetch broadcast disk
before: Minimum disclosure proofs of knowledge
after:  minimum disclosur proof knowledg
before: Lifetime analysis of dynamically allocated objects
after:  lifetim analysi dynam alloc object
before: Selected results from the latest decade of quorum systems research
after:  select result latest decad quorum system research
before: Rethinking eventual consistency
after:  rethink eventu consist
before: Effects of communication latency, ove

after:  db2 parallel edit
before: A Non-Blocking Lightweight Implementation of Causal Order Message Delivery
after:  non block lightweight implement causal order messag deliveri
before: On key distribution systems
after:  key distribut system
before: Introduction to algorithms
after:  introduct algorithm
before: Trace-based mobile network emulation
after:  trace base mobil network emul
before: A Value Transmission Method for Abstract Data Types
after:  valu transmiss method abstract data type
before: Graph Theory With Applications
after:  graph theori applic
before: On understanding types, data abstraction, and polymorphism
after:  understand type data abstract polymorph
before: Public-key systems based on the difficulty of tampering (Is there a difference between DES and RSA?)
after:  public key system base difficulti tamper differ de rsa
before: On routes and multicast trees in the Internet
after:  rout multicast tree internet
before: Self-Monitoring and Self-Adapting Operating Syste

after:  consensu membership synchron asynchron distribut system
before: Bigtable: a distributed storage system for structured data
after:  bigtabl distribut storag system structur data
before: Drawing the Red Line in Java
after:  draw red line java
before: Design and evaluation of a conit-based continuous consistency model for replicated services
after:  design evalu conit base continu consist model replic servic
before: Marmot: an optimizing compiler for Java
after:  marmot optim compil java
before: The Probability of Load Balancing Success in a Homogeneous Network
after:  probabl load balanc success homogen network
before: Improving the start-up behavior of a congestion control scheme for TCP
after:  improv start behavior congest control scheme tcp
before: Update propagation protocols for replicated databates
after:  updat propag protocol replic datab
before: How to Make a Multiprocessor Computer That Correctly Executes Multiprocess Programs
after:  make multiprocessor comput correct

after:  applic parallel process numer weather predict
before: Resource overbooking and application profiling in shared hosting platforms
after:  resourc overbook applic profil share host platform
before: Distributed match-making for processes in computer networks (preliminary version)
after:  distribut match make process comput network preliminari version
before: Concurrent timestamping made simple
after:  concurr timestamp made simpl
before: Addition Chain Heuristics
after:  addit chain heurist
before: An Architecture for Differentiated Service
after:  architectur differenti servic
before: Server operating systems
after:  server oper system
before: Cache Performance of the SPEC92 Benchmark Suite
after:  cach perform spec92 benchmark suit
before: Run-time Support for Dynamic Load Balancing and Debugging in Paralex
after:  run time support dynam load balanc debug paralex
before: On hiding information from an oracle
after:  hide inform oracl
before: Basic Techniques for the Efficient Coo

before: Software Rejuvenation: Analysis, Module and Applications
after:  softwar rejuven analysi modul applic
before: Graph-theoretic analysis of structured peer-to-peer systems: routing distances and fault resilience
after:  graph theoret analysi structur peer peer system rout distanc fault resili
before: Alpha architecture reference manual
after:  alpha architectur refer manual
before: WebOS: Operating System Services for Wide Area Applications
after:  webo oper system servic wide area applic
before: Secure deletion of data from magnetic and solid-state memory
after:  secur delet data magnet solid state memori
before: Internetworking with TCP/IP (2nd ed.), vol. I
after:  internetwork tcp ip 2nd ed vol
before: Synchronization with eventcounts and sequencers
after:  synchron eventcount sequenc
before: An Overview of AspectJ
after:  overview aspectj
before: Classic Paxos vs. fast Paxos: caveat emptor
after:  classic paxo vs fast paxo caveat emptor
before: A unified approach to loop-free

after:  survey model parallel comput
before: A Performance Study of Alternative Object Faulting and Pointer Swizzling Strategies
after:  perform studi altern object fault pointer swizzl strategi
before: Making Pointer-Based Data Structures Cache Conscious
after:  make pointer base data structur cach consciou
before: Impact of Checkpoint Latency on Overhead Ratio of a Checkpointing Scheme
after:  impact checkpoint latenc overhead ratio checkpoint scheme
before: Witness-based cryptographic program checking and robust function sharing
after:  wit base cryptograph program check robust function share
before: Type dispatch for named hierarchical types
after:  type dispatch name hierarch type
before: An  Introduction to Database Systems
after:  introduct databa system
before: Computer architecture: a quantitative approach
after:  comput architectur quantit approach
before: Performance Modeling of Distributed and Replicated Databases
after:  perform model distribut replic databa
before: Hierar

after:  messag integr cryptograph protocol
before: Dataflow query processing using multiprocessor hash-partitioned algorithms (database, pipeline, parallelism)
after:  dataflow queri process use multiprocessor hash partit algorithm databa pipelin parallel
before: The eclipse operating system: providing quality of service via reservation domains
after:  eclip oper system provid qualiti servic via reserv domain
before: Symbolic model checking for asynchronous boolean programs
after:  symbol model check asynchron boolean program
before: Performance Evaluation of Multiversion Database Systems
after:  perform evalu multiv databa system
before: Replica determinism in distributed real-time systems: a brief survey
after:  replica determin distribut real time system brief survey
before: Finer Grained Concurrency for the Database Cache
after:  finer grain concurr databa cach
before: Collision-Resistant Hashing: Towards Making UOWHFs Practical
after:  colli resist hash toward make uowhf practic
b

before: Eventually consistent
after:  eventu consist
before: The R*-tree: an efficient and robust access method for points and rectangles
after:  tree effici robust access method point rectangl
before: WEAK-CONSISTENCY GROUP COMMUNICATION AND MEMBERSHIP (Ph.D. dissertation)
after:  weak consist group commun membership ph dissert
before: Astrolabe: A robust and scalable technology for distributed system monitoring, management, and data mining
after:  astrolab robust scalabl technolog distribut system monitor manag data mine
before: On the possibility and impossibility of achieving clock synchronization
after:  possibl imposs achiev clock synchron
before: Cooperative Task Management Without Manual Stack Management
after:  cooper task manag without manual stack manag
before: Synchronization in actor systems
after:  synchron actor system
before: A Methodology for Testing Intrusion Detection Systems
after:  methodolog test intru detect system
before: NFS version 4 Protocol
after:  nf versio

after:  approxim distanc oracl
before: The Soft Error Problem: An Architectural Perspective
after:  soft error problem architectur perspect
before: Relations Among Complexity Measures
after:  relat among complex measur
before: Predicate abstraction via symbolic decision procedures
after:  predic abstract via symbol deci procedur
before: Founding crytpography on oblivious transfer
after:  found crytpographi oblivi transfer
before: Data speculation support for a chip multiprocessor
after:  data specul support chip multiprocessor
before: Understanding Protocols for Byzantine Clock Synchronization
after:  understand protocol byzantin clock synchron
before: A Performance Comparison of Two Architectures for Fast Transaction Processing
after:  perform comparison two architectur fast transact process
before: Disk Paxos
after:  disk paxo
before: Kqueue - A Generic and Scalable Event Notification Facility
after:  kqueue gener scalabl event notif facil
before: Hybrid concurrency control for abstr

after:  proactiv recoveri byzantin fault toler system
before: Putting consistency back into eventual consistency
after:  put consist back eventu consist
before: Don't Be Lazy, Be Consistent: Postgres-R, A New Way to Implement Database Replication
after:  dont lazi consist postgr new way implement databa replic
before: A Case for NOW (Networks of Workstations)
after:  case network workstat
before: The Brave Little Toaster Meets Usenet
after:  brave littl toaster meet usenet
before: Bandera: extracting finite-state models from Java source code
after:  bandera extract finit state model java sourc code
before: An experiment in microprocessor-based distributed digital simulation
after:  experi microprocessor base distribut digit simul
before: NFS Version 3 Protocol Specification
after:  nf version protocol specif
before: Non-transitive transfer of confidence: A perfect zero-knowledge interactive protocol for SAT and beyond
after:  non transit transfer confid perfect zero knowledg interact p

before: Interconnection of broadband local area networks
after:  interconnect broadband local area network
before: Optimistic Virtual Synchrony
after:  optimist virtual synchroni
before: VMTP: a transport protocol for the next generation of communication systems
after:  vmtp transport protocol next gener commun system
before: TCP Congestion Control
after:  tcp congest control
before: BACKUP AND RECOVERY OF ON-LINE INFORMATION IN A COMPUTER
after:  backup recoveri line inform comput
before: The ethernet: a local area network: data link layer and physical layer specifications
after:  ethernet local area network data link layer physic layer specif
before: Writing Apache Modules with PERL and C
after:  write apach modul perl
before: Using & managing UUCP
after:  use manag uucp
before: A Dynamic Probe Strategy for Quorum Systems
after:  dynam probe strategi quorum system
before: A formal approach to recovery by compensating transactions
after:  formal approach recoveri compen transact
befor

after:  locat independ remot execut nest
before: Derflow: distributed deterministic dataflow programming for erlang
after:  derflow distribut determinist dataflow program erlang
before: Implementing a cache consistency protocol
after:  implement cach consist protocol
before: Some requirements for architectural support of software debugging
after:  requir architectur support softwar debug
before: Sequential consistency versus linearizability
after:  sequenti consist versu lineariz
before: Hash Functions Based on Block Ciphers and Quaternary Codes
after:  hash function base block cipher quaternari code
before: Component database systems: introduction, foundations, and overview
after:  compon databa system introduct foundat overview
before: Scalable Secure Storage when Half the System Is Faulty
after:  scalabl secur storag half system faulti
before: IPng: Internet protocol next generation
after:  ipng internet protocol next gener
before: Thread scheduling for multiprogrammed multiprocesso

before: Nsclick:: bridging network simulation and deployment
after:  nsclick bridg network simul deploy
before: Making tuple spaces safe for heterogeneous distributed systems
after:  make tupl space safe heterogen distribut system
before: Computation & communication in R: a distributed database manager
after:  comput commun distribut databa manag
before: Large Parallel Computers
after:  larg parallel comput
before: Report on the programming language Haskell: a non-strict, purely functional language version 1.2
after:  report program languag haskel non strict pure function languag version
before: Atomic Broadcast
after:  atom broadcast
before: Integrity constraints: semantics and applications
after:  integr constraint semant applic
before: Protocol boosters
after:  protocol booster
before: Computer Interconnection Structures: Taxonomy, Characteristics, and Examples
after:  comput interconnect structur taxonomi characterist exampl
before: Server recovery using naturally replicated state:

before: Interconnection networks: a survey and assessment
after:  interconnect network survey assess
before: Scalable Management and Data Mining Using Astrolabe
after:  scalabl manag data mine use astrolab
before: Building secure and reliable network applications
after:  build secur reliabl network applic
before: Computing on an anonymous ring
after:  comput anonym ring
before: Congestion avoidance in computer networks with a connectionless network layer
after:  congest avoid comput network connectionless network layer
before: Tag-free garbage collection using explicit type parameters
after:  tag free garbag collect use explicit type paramet
before: A combinatorial characterization of the distributed tasks which are solvable in the presence of one faulty processor
after:  combinatori charact distribut task solvabl presenc one faulti processor
before: Impact of communication networks on fault-tolerant distributed computing
after:  impact commun network fault toler distribut comput
befor

before: Dynamics of distributed shortest-path routing algorithms
after:  dynam distribut shortest path rout algorithm
before: Reaching consistency in unreliable distributed systems (synchronization, data consistency)
after:  reach consist unr distribut system synchron data consist
before: Molecular Objects, Abstract Data Types, and Data Models: A Framework
after:  molecular object abstract data type data model framework
before: Dedalus: datalog in time and space
after:  dedalu datalog time space
before: Distributed processes: a concurrent programming concept
after:  distribut process concurr program concept
before: Workshop on Run-Time Systems for Parallel Programming (RTSPP)
after:  workshop run time system parallel program rtspp
before: Fault Detection for Byzantine Quorum Systems
after:  fault detect byzantin quorum system
before: Building intrusion tolerant applications
after:  build intru toler applic
before: Internet X.509 Public Key Infrastructure Certificate Management Protocol

before: The importance of long-range dependence of VBR video traffic in ATM traffic engineering: myths and realities
after:  import long rang depend vbr video traffic atm traffic engin myth realiti
before: PNUTS: Yahoo!'s hosted data serving platform
after:  pnut yahoo host data serv platform
before: Guava: a dialect of Java without data races
after:  guava dialect java without data race
before: How slow is one gigabit per second?
after:  slow one gigabit per second
before: Secret sharing homomorphisms: keeping shares of a secret secret
after:  secret share homomorph keep share secret secret
before: Merging and extending the PGP and PEM trust models-the ICE-TEL trust model
after:  merg extend pgp pem trust model ice tel trust model
before: Simics: A Full System Simulation Platform
after:  simic full system simul platform
before: Practical guidelines for boosting Java server performance
after:  practic guidelin boost java server perform
before: ALMI: an application level multicast infra

after:  cookbook use model view control user interfac paradigm smalltalk eighti
before: The TWA reservation system
after:  twa reserv system
before: Total Order Multicast to Multiple Groups
after:  total order multicast multipl group
before: On the use and performance of content distribution networks
after:  use perform content distribut network
before: Secure Intrusion-tolerant Replication on the Internet
after:  secur intru toler replic internet
before: A history and evaluation of System R
after:  histori evalu system
before: Resilient distributed computing
after:  resili distribut comput
before: The consensus problem in fault-tolerant computing
after:  consensu problem fault toler comput
before: Trading packet headers for packet processing
after:  trade packet header packet process
before: The millicent protocols for electronic commerce
after:  millic protocol electron commerc
before: Improved algorithms for synchronizing computer network clocks
after:  improv algorithm synchron com

after:  analyz stabil wide area network perform
before: Software Model Checking
after:  softwar model check
before: The object database standard: ODMG 2.0
after:  object databa standard odmg
before: Estimating model discrepancy
after:  estim model discrep
before: Cilk: an efficient multithreaded runtime system
after:  cilk effici multithread runtim system
before: Preemptable remote execution facilities for the V-system
after:  preemptabl remot execut facil system
before: Piranha: a scalable architecture based on single-chip multiprocessing
after:  piranha scalabl architectur base singl chip multiprocess
before: Life or death at block-level
after:  life death block level
before: Composing Specifications
after:  compo specif
before: Parity-based loss recovery for reliable multicast transmission
after:  pariti base loss recoveri reliabl multicast transmiss
before: Necessary and sufficient conditions on information for causal message ordering and their optimal implementation
after:  necess

after:  distribut commun via global buffer
before: The Performance of Spin Lock Alternatives for Shared-Memory Multiprocessors
after:  perform spin lock altern share memori multiprocessor
before: Using MS-DOS 6.22
after:  use ms twenti two
before: A fast file system for UNIX
after:  fast file system unix
before: On network-aware clustering of Web clients
after:  network awar cluster web client
before: Analysis of the evolution of peer-to-peer systems
after:  analysi evolut peer peer system
before: A practical protocol for large group oriented networks
after:  practic protocol larg group orient network
before: A Chip Multithreaded Processor for Network-Facing Workloads
after:  chip multithread processor network face workload
before: Difficulties in simulating the internet
after:  difficulti simul internet
before: Riemann's Hypothesis and tests for primality
after:  riemann hypothesi test primal
before: Mitigating routing misbehavior in mobile ad hoc networks
after:  mitig rout misbehavi

after:  reduc instruct set comput
before: SafeStore: a durable and practical storage system
after:  safestor durabl practic storag system
before: Flash crowds and denial of service attacks: characterization and implications for CDNs and web sites
after:  flash crowd denial servic attack charact implic cdn web site
before: Dynamics of IP traffic: a study of the role of variability and the impact of control
after:  dynam ip traffic studi role variabl impact control
before: Concurrency in database systems: a simulation study
after:  concurr databa system simul studi
before: Some new attacks upon security protocols
after:  new attack upon secur protocol
before: Optimizing the migration of virtual computers
after:  optim migrat virtual comput
before: Effectiveness of Parallel Joins
after:  effect parallel join
before: Matching events in a content-based subscription system
after:  match event content base subscript system
before: Non-Interactive Generation of Shared Pseudorandom Sequences
af

after:  logic reason likelihood
before: An architecture for reliable decentralized systems
after:  architectur reliabl decentr system
before: The IceCube approach to the reconciliation of divergent replicas
after:  icecub approach reconcili diverg replica
before: Designing a Coordination Model for Open Systems
after:  design coordin model open system
before: Demonstrating that a public predicate can be satisfied without revealing any information about how
after:  demonstr public predic satisfi without reveal inform
before: Simulating (log/sup c/n)-wise independence in NC
after:  simul log sup wise independ nc
before: Plutus: Scalable Secure File Sharing on Untrusted Storage
after:  plutu scalabl secur file share untrust storag
before: Camelot and Avalon: a distributed transaction facility
after:  camelot avalon distribut transact facil
before: New Hybrid Fault Models for Asynchronous Approximate Agreement
after:  new hybrid fault model asynchron approxim agreement
before: Software Depe

after:  architectur surviv coordin larg distribut system
before: A lazy cache algorithm
after:  lazi cach algorithm
before: Generalized secret sharing and monotone functions
after:  gener secret share monoton function
before: Regeneration with Virtual Copies for Distributed Computing Systems
after:  regen virtual copi distribut comput system
before: Probability and Statistics with Reliability, Queuing and Computer Science Applications
after:  probabl statist reliabl queu comput scienc applic
before: RSA and Rabin functions: certain parts are as hard as the whole
after:  rsa rabin function certain part hard whole
before: PADS: a policy architecture for distributed storage systems
after:  pad polici architectur distribut storag system
before: Exploiting inter-operation parallelism in XPRS
after:  exploit inter oper parallel xpr
before: The Grid Protocol: A High Performance Scheme for Maintaining Replicated Data
after:  grid protocol high perform scheme maintain replic data
before: A new 

after:  effici line electron cash system base repres problem
before: How to play ANY mental game
after:  play mental game
before: Distributed FIFO allocation of identical resources using small shared space
after:  distribut fifo alloc ident resourc use small share space
before: Reliable broadband communication using a burst erasure correcting code
after:  reliabl broadband commun use burst erasur correct code
before: The modified object buffer: a storage management technique for object-oriented databases
after:  modifi object buffer storag manag techniqu object orient databa
before: Improving the performance of log-structured file systems with adaptive methods
after:  improv perform log structur file system adapt method
before: A methodology for database system performance evaluation
after:  methodolog databa system perform evalu
before: Deconstructing Commodity Storage Clusters
after:  deconstruct commod storag cluster
before: Atomic shared register access by asynchronous hardware
aft

before: Power-Aware Epidemics
after:  power awar epidem
before: The  MOSIX Distributed Operating System: Load Balancing for UNIX
after:  mosix distribut oper system load balanc unix
before: A study of three alternative workstation server architectures for object-oriented database systems
after:  studi three altern workstat server architectur object orient databa system
before: Disk Infant Mortality in Large Storage Systems
after:  disk infant mortal larg storag system
before: Fast and secure distributed read-only file system
after:  fast secur distribut read file system
before: TED—a language for modeling telecommunication networks
after:  ted—a languag model telecommun network
before: Riak DT map: a composable, convergent replicated dictionary
after:  riak dt map compo converg replic dictionari
before: An expressive, scalable type theory for certified code
after:  express scalabl type theori certifi code
before: Scalable kernel performance for internet servers under realistic loads
af

after:  size determin degr topolog
before: The SecureRing group communication system
after:  secur group commun system
before: A system for interprocess communication in a resource sharing computer network
after:  system interprocess commun resourc share comput network
before: WAVES: Automatic Synthesis of Client-Side Validation Code for Web Applications
after:  wave automat synthesi client side valid code web applic
before: The invisible computer
after:  invi comput
before: Counterexamples to a one writer multireader atomic variable construction of Burns and Peterson
after:  counterexampl one writer multiread atom variabl construct burn peterson
before: Paralex: an environment for parallel programming in distributed systems
after:  paralex environ parallel program distribut system
before: An Eye on Network Intruder-Administrator Shootouts
after:  eye network intrud administr shootout
before: High performance messaging on workstations: Illinois fast messages (FM) for Myrinet
after:  hi

after:  tempest typhoon user level share memori
before: Declarative routing: extensible routing with declarative queries
after:  declar rout exten rout declar queri
before: The case for profile-directed selection of garbage collectors
after:  case profil direct select garbag collector
before: Protocols for secure computations
after:  protocol secur comput
before: Experience with topaz telebugging
after:  experi topaz telebug
before: The &Ω key management service
after:  key manag servic
before: Wait-freedom vs. t-resiliency and the robustness of wait-free hierarchies (extended abstract)
after:  wait freedom vs resili robust wait free hierarchi extend abstract
before: Videoconferencing on the Internet
after:  videoconferenc internet
before: Delayed Internet routing convergence
after:  delay internet rout converg
before: Coyote: a system for constructing fine-grain configurable communication services
after:  coyot system construct fine grain configur commun servic
before: A Large Scale, 

after:  wireless data system standard servic
before: Hierarchical clustering: a structure for scalable multiprocessor operating system design
after:  hierarch cluster structur scalabl multiprocessor oper system design
before: Hardware support for fast capability-based addressing
after:  hardwar support fast capabl base address
before: Reducing &Ω to   ◊W 
after:  reduc ◊w
before: Query by Image and Video Content: The QBIC System
after:  queri imag video content qbic system
before: A Heuristically-Aided Algorithm for Mutual Exclusion in Distributed Systems
after:  heurist aid algorithm mutual exclu distribut system
before: Exploiting IP multicast in content-based publish-subscribe systems
after:  exploit ip multicast content base publish subscrib system
before: Sustaining cooperation in multi-hop wireless networks
after:  sustain cooper multi hop wireless network
before: Multicoordinated Paxos
after:  multicoordin paxo
before: Concepts and Notations for Concurrent Programming
after:  co

after:  lazi receiv process lrp network subsystem architectur server system
before: Parallel database systems: the future of high performance database systems
after:  parallel databa system futur high perform databa system
before: Boosting System Performance with Optimistic Distributed Protocols
after:  boost system perform optimist distribut protocol
before: Using fault injection and modeling to evaluate the performability of cluster-based services
after:  use fault inject model evalu perform cluster base servic
before: Statistics of mixed data traffic on a local area network
after:  statist mix data traffic local area network
before: Tussle in cyberspace: defining tomorrow's internet
after:  tussl cyberspac defin tomorrow internet
before: RMTP: a reliable multicast transport protocol
after:  rmtp reliabl multicast transport protocol
before: Factors in the performance of the AN1 computer network
after:  factor perform an1 comput network
before: Practical implementations of non-blockin

after:  share memori vs messag pass asynchron distribut environ
before: Parallel graph processing using depth-first search and breadth-first search.
after:  parallel graph process use depth first search breadth first search
before: High-speed switch scheduling for local-area networks
after:  high speed switch schedul local area network
before: Specifying Concurrent Program Modules
after:  specifi concurr program modul
before: Observing the effects of multi-zone disks
after:  observ effect multi zone disk
before: Tcl and the Tk toolkit
after:  tcl tk toolkit
before: The nesC language: A holistic approach to networked embedded systems
after:  nesc languag holist approach network emb system
before: Lazy consistency using loosely synchronized clocks
after:  lazi consist use loo synchron clock
before: Convertible Undeniable Signatures
after:  convert undeni signatur
before: Censorship resistant peer-to-peer content addressable networks
after:  censorship resist peer peer content address net

before: To CATOCS or not to CATOCS, that is the …
after:  catoc catoc
before: Grapevine: an exercise in distributed computing
after:  grapevin exerci distribut comput
before: Exokernel: an operating system architecture for application-level resource management
after:  exokernel oper system architectur applic level resourc manag
before: Managing Wire Delay in Large Chip-Multiprocessor Caches
after:  manag wire delay larg chip multiprocessor cach
before: The ensemble system
after:  ensembl system
before: On the Time Required to Perform Addition
after:  time requir perform addit
before: HFS: a performance-oriented flexible file system based on building-block compositions
after:  hf perform orient flexibl file system base build block composit
before: Fault Tolerant Video on Demand Services
after:  fault toler video demand servic
before: Performance of multihop wireless networks: shortest path is not enough
after:  perform multihop wireless network shortest path enough
before: Hancock: a la

after:  transact chop algorithm perform studi
before: Generation and analysis of very long address traces
after:  gener analysi long address trace
before: Scalable Update Propagation in Epidemic Replicated Databases
after:  scalabl updat propag epidem replic databa
before: Predictive Application-Performance Modeling in a Computational Grid Environment
after:  predict applic perform model comput grid environ
before: Locking performance in centralized databases
after:  lock perform central databa
before: Stochastic models for the Web graph
after:  stochast model web graph
before: Using Programmer-Written Compiler Extensions to Catch Security Holes
after:  use programm written compil exten catch secur hole
before: Software structures for ultraparallel computing
after:  softwar structur ultraparallel comput
before: Application-level document caching in the Internet
after:  applic level document cach internet
before: Statistical analysis of simulation output data
after:  statist analysi sim

after:  agil applic awar adapt mobil
before: Concurrent Certifications by Intervals of Timestamps in Distributed Database Systems
after:  concurr certif interv timestamp distribut databa system
before: Detection of Mutual Inconsistency in Distributed Systems
after:  detect mutual inconsist distribut system
before: The case for geographical push-caching
after:  case geograph push cach
before: The Virtue of Patience: Concurrent Programming with and Without Waiting
after:  virtu patienc concurr program without wait
before: Converting Lamport's regular register to atomic register
after:  convert lamport regular regist atom regist
before: The fuzzball
after:  fuzzbal
before: Efficient and accurate cost models for parallel query optimization (extended abstract)
after:  effici accur cost model parallel queri optim extend abstract
before: Implementing Distributed Read-Only Transactions
after:  implement distribut read transact
before: Interconnecting a distributed processor system for avionics

before: Design alternatives for scalable Web server accelerators
after:  design altern scalabl web server accel
before: User-mode Linux
after:  user mode linux
before: Pushing the CAP: Strategies for Consistency and Availability
after:  push cap strategi consist avail
before: Protocol Complications with the IP Network Address Translator
after:  protocol complic ip network address translat
before: QuickCheck: a lightweight tool for random testing of Haskell programs
after:  quickcheck lightweight tool random test haskel program
before: Routing, merging, and sorting on parallel models of computation
after:  rout merg sort parallel model comput
before: On the inclusion properties for multi-level cache hierarchies
after:  inclu properti multi level cach hierarchi
before: System Deadlocks
after:  system deadlock
before: On per-session end-to-end delay distributions and the call admission problem for real-time applications with QOS requirements
after:  per session end end delay distribut cal

before: Parallel algorithms and architectures for rule-based systems
after:  parallel algorithm architectur rule base system
before: Digital Signatures for Flows and Multicasts
after:  digit signatur flow multicast
before: An overview of the mesa processor architecture
after:  overview mesa processor architectur
before: FATE and DESTINI: a framework for cloud recovery testing
after:  fate destini framework cloud recoveri test
before: Parallel Programming Using Shared Objects and Broadcasting
after:  parallel program use share object broadcast
before: Preemptive scheduling under time and resource constraints
after:  preemptiv schedul time resourc constraint
before: NAMING AND SYNCHRONIZATION IN A DECENTRALIZED COMPUTER SYSTEM
after:  name synchron decentr comput system
before: Communicating sequential processes
after:  commun sequenti process
before: LFSR-based Hashing and Authentication
after:  lfsr base hash authent
before: Understanding transactions in the operating system context
af

after:  transact process concept techniqu
before: Correctness of a gossip based membership protocol
after:  correct gossip base membership protocol
before: Counting networks and multi-processor coordination
after:  count network multi processor coordin
before: The primary-backup approach
after:  primari backup approach
before: OS Support for General-Purpose Routers
after:  os support gener purpo router
before: Optimality of wait-free atomic multiwriter variables
after:  optim wait free atom multiwrit variabl
before: A generalization of the divide-sort-merge strategy for sorting networks
after:  gener divid sort merg strategi sort network
before: Filing and printing services on a local-area network
after:  file print servic local area network
before: The anatomy of a large-scale hypertextual Web search engine
after:  anatomi larg scale hypertextu web search engin
before: End-to-end routing behavior in the Internet
after:  end end rout behavior internet
before: SESAME V2 public key and a

after:  flip coin mani pocket byzantin agreement uniformli random valu
before: Directory Reference Patterns in Hierarchical File Systems
after:  directori refer pattern hierarch file system
before: Scalability and failure recovery in a linux cluster file system
after:  scalabl failur recoveri linux cluster file system
before: Simulating fail-stop in asynchronous distributed systems
after:  simul fail stop asynchron distribut system
before: The synthesis of digital machines with provable epistemic properties
after:  synthesi digit machin provabl epistem properti
before: Memory system characterization of commercial workloads
after:  memori system charact commerci workload
before: Experimental queueing analysis with long-range dependent packet traffic
after:  experi queue analysi long rang depend packet traffic
before: Anatomy of the NHFSSTONES benchmarks
after:  anatomi nhfsstone benchmark
before: Concurrent manipulation of binary search trees
after:  concurr manipul binari search tree
b

after:  analysi oper system behavior simultan multithread architectur
before: Soft updates: a technique for eliminating most synchronous writes in the fast filesystem
after:  soft updat techniqu elimin synchron write fast filesystem
before: On the use of registers in achieving wait-free consensus
after:  use regist achiev wait free consensu
before: Loadable Kernel Modules
after:  loadabl kernel modul
before: Hashing Methods and Relational Algebra Operations
after:  hash method relat algebra oper
before: Analysis of query processing in distributed database systems with fully replicated files: hierarchical approach
after:  analysi queri process distribut databa system fulli replic file hierarch approach
before: Automatic node selection for high performance applications on networks
after:  automat node select high perform applic network
before: Realistic BGP traffic for test labs
after:  realist bgp traffic test lab
before: Inductive Definitions in the system Coq - Rules and Properties
af

after:  jdbc databa access java tutori annot refer
before: Cyber defense technology networking and evaluation
after:  cyber defen technolog network evalu
before: The available capacity of a privately owned workstation environment
after:  avail capac privat workstat environ
before: The ANSA project and standards
after:  ansa project standard
before: Boxwood: abstractions as the foundation for storage infrastructure
after:  boxwood abstract foundat storag infrastructur
before: The Delay Due to Dynamic Two-Phase Locking
after:  delay due dynam two phase lock
before: Memory coherence in shared virtual memory systems
after:  memori coher share virtual memori system
before: Direct spatial search on pictorial databases using packed R-trees
after:  direct spatial search pictori databa use pack tree
before: Language support for fast and reliable message-based communication in singularity OS
after:  languag support fast reliabl messag base commun singular os
before: Complete Freebsd
after:  comp

after:  design implement ddh distribut dynam hash algorithm
before: Generalized FLP impossibility result for t-resilient asynchronous computations
after:  gener flp imposs result resili asynchron comput
before: Time-adaptive algorithms for synchronization
after:  time adapt algorithm synchron
before: An integrated experimental environment for distributed systems and networks
after:  integr experi environ distribut system network
before: Incremental cryptography and application to virus protection
after:  increment cryptographi applic viru protect
before: Object and File Management in the EXODUS Extensible Database System
after:  object file manag exodu exten databa system
before: A new scheme for memory-efficient probabilistic verification
after:  new scheme memori effici probabilist verif
before: The CRISIS wide area security architecture
after:  crisi wide area secur architectur
before: Symmetric multiprocessing in Solaris 2.0
after:  symmetr multiprocess solari
before: Data caching 

after:  work set model program behavior
before: An Evaluation of Linear Models for Host Load Prediction
after:  evalu linear model host load predict
before: Superdatabases for Composition of Heterogeneous Databases
after:  superdataba composit heterogen databa
before: A theory of atomic transactions
after:  theori atom transact
before: An Implementation of a Remote Procedure Call Protocol in the Berkeley
after:  implement remot procedur call protocol berkeley
before: Termination proofs for systems code
after:  termin proof system code
before: The Totem multiple-ring ordering and topology maintenance protocol
after:  totem multipl ring order topolog mainten protocol
before: Specification-based anomaly detection: a new approach for detecting network intrusions
after:  specif base anomali detect new approach detect network intru
before: Maintaining order in a generalized linked list
after:  maintain order gener link list
before: TCP/IP illustrated (vol. 3): TCP for transactions, HTTP, NNT

after:  self secur storag protect data compromi system
before: Characteristics of files in NFS environments
after:  characterist file nf environ
before: Lazy caching
after:  lazi cach
before: Bounded quantification is undecidable
after:  bound quantif undecid
before: Improving fast mutual exclusion
after:  improv fast mutual exclu
before: Automatic Event-Stream Notarization Using Digital Signatures
after:  automat event stream notar use digit signatur
before: Specifying Reusable Components Using Contracts
after:  specifi reusabl compon use contract
before: Limitations of the Kerberos authentication system
after:  limit kerbero authent system
before: S/390 CMOS server I/O: the continuing evolution
after:  three hundr nineti cmo server continu evolut
before: Towards a theory of type structure
after:  toward theori type structur
before: Consistency and orderability: semantics-based correctness criteria for databases
after:  consist order semant base correct criteria databa
before: KLEE: u

before: Devil: an IDL for hardware programming
after:  devil idl hardwar program
before: Protection in an information processing utility
after:  protect inform process util
before: On the cryptographic applications of random functions
after:  cryptograph applic random function
before: NBMA Next Hop Resolution Protocol (NHRP)
after:  nbma next hop resolut protocol nhrp
before: A Continuous Media I/O Server and Its Synchronization Mechanism
after:  continu media server synchron mechan
before: Lightweight causal and atomic group multicast
after:  lightweight causal atom group multicast
before: Using process groups to implement failure detection in asynchronous environments
after:  use process group implement failur detect asynchron environ
before: Advances in Network Simulation
after:  advanc network simul
before: Concurrency control in groupware systems
after:  concurr control groupwar system
before: ZooKeeper: wait-free coordination for internet-scale systems
after:  zookeep wait free c

before: An experiment using registers for fast message-based interprocess communication
after:  experi use regist fast messag base interprocess commun
before: Towards robust distributed systems (abstract)
after:  toward robust distribut system abstract
before: Using Failure Detectors to Solve Consensus in Asynchronous Sharde-Memory Systems (Extended Abstract)
after:  use failur detector solv consensu asynchron shard memori system extend abstract
before: A Modular Approach to Fault-Tolerant Broadcasts and Related Problems
after:  modular approach fault toler broadcast relat problem
before: Working with Persistent Objects: To Swizzle or Not to Swizzle
after:  work persist object swizzl swizzl
before: The cache coherence problem in shared-memory multiprocessors
after:  cach coher problem share memori multiprocessor
before: Understanding The Linux Kernel
after:  understand linux kernel
before: Sticky bits and universality of consensus
after:  sticki bit univ consensu
before: The parallel I

after:  totem reliabl order deliveri protocol interconnect local area network
before: Using pathchar to estimate Internet link characteristics
after:  use pathchar estim internet link characterist
before: A digital signature scheme secure against adaptive chosen-message attacks
after:  digit signatur scheme secur adapt chosen messag attack
before: Towards an evolvable internet architecture
after:  toward evolv internet architectur
before: A Case Study of CES: A Distributed Collaborative Editing System Implemented in Argus
after:  case studi ce distribut collabor edit system implement argu
before: Atomicity versus Anonymity: Distributed Transactions for Electronic Commerce
after:  atom versu anonym distribut transact electron commerc
before: Antisocial Agents and Vickrey Auctions
after:  antisoci agent vickrey auction
before: Modeling and Analysis of Distributed Database Concurrency Control Algorithms Using an Extended Petri Net Formalism
after:  model analysi distribut databa concurr c

before: Distributed genetic algorithms
after:  distribut genet algorithm
before: Resource pricing and the evolution of congestion control
after:  resourc price evolut congest control
before: ACC: using active networking to enhance feedback congestion control mechanisms
after:  acc use activ network enhanc feedback congest control mechan
before: Speeding Lamport's fast mutual exclusion algorithm
after:  speed lamport fast mutual exclu algorithm
before: The implementation and performance of compressed databases
after:  implement perform compress databa
before: Lasp: a language for distributed, coordination-free programming
after:  lasp languag distribut coordin free program
before: A comparison of file system workloads
after:  comparison file system workload
before: Stable Leader Election
after:  stabl leader elect
before: Replica control protocols that guarantee high availability and low access cost
after:  replica control protocol guarant high avail low access cost
before: The Performa

before: Type-preserving garbage collectors
after:  type preserv garbag collector
before: Performance of the Firefly RPC
after:  perform firefli rpc
before: Policies and Patterns for High-Performance, Real-Time Object Request Brokers
after:  polici pattern high perform real time object request broker
before: An Optimal Probabilistic Protocol for Synchronous Byzantine Agreement
after:  optim probabilist protocol synchron byzantin agreement
before: Avoiding deadlock in distributed data bases
after:  avoid deadlock distribut data base
before: Windows NT/2000 Native API Reference
after:  window nt two thousand nativ api refer
before: A Secure Group Membership Protocol
after:  secur group membership protocol
before: Transaction security system
after:  transact secur system
before: Understanding and using asynchronous message passing (Preliminary Version)
after:  understand use asynchron messag pass preliminari version
before: A dynamic network architecture
after:  dynam network architectur
b

before: A “paradoxical” identity-based signature scheme resulting from zero-knowledge
after:  paradox ident base signatur scheme result zero knowledg
before: A remark on signature scheme where forgery can be proved
after:  remark signatur scheme forgeri prove
before: A cost-effective, high-bandwidth storage architecture
after:  cost effect high bandwidth storag architectur
before: Secure History Preservation Through Timeline Entanglement
after:  secur histori preserv timelin entangl
before: Queue-based multi-processing LISP
after:  queue base multi process lisp
before: Self-testing/correcting with applications to numerical problems
after:  self test correct applic numer problem
before: Axioms for concurrent objects
after:  axiom concurr object
before: Sharing memory robustly in message-passing systems
after:  share memori robustli messag pass system
before: The Globus toolkit
after:  globu toolkit
before: Relational Database Systems: Where We Stand Today
after:  relat databa system sta

after:  system type assembl languag
before: Attacking the process migration bottleneck
after:  attack process migrat bottleneck
before: A Mechanism for Establishing Policies for Electronic Commerce
after:  mechan establish polici electron commerc
before: On the space complexity of randomized synchronization
after:  space complex random synchron
before: Active Database Systems: Triggers and Rules for Advanced Database Processing
after:  activ databa system trigger rule advanc databa process
before: Using stålmarck’s algorithm to prove inequalities
after:  use stålmarck algorithm prove inequ
before: A lattice model of secure information flow
after:  lattic model secur inform flow
before: Measurement, modeling, and analysis of a peer-to-peer file-sharing workload
after:  measur model analysi peer peer file share workload
before: Performance isolation: sharing and isolation in shared-memory multiprocessors
after:  perform isol share isol share memori multiprocessor
before: Cello: a disk sc

after:  secret share infinit domain extend abstract
before: BGP Route Flap Damping
after:  bgp rout flap damp
before: Improved query performance with variant indexes
after:  improv queri perform variant index
before: Query Optimization in Database Systems
after:  queri optim databa system
before: Achieving scalability and expressiveness in an Internet-scale event notification service
after:  achiev scalabl express internet scale event notif servic
before: Simplifying fault-tolerance: providing the abstraction of crash failures
after:  simplifi fault toler provid abstract crash failur
before: Skew-aware automatic database partitioning in shared-nothing, parallel OLTP systems
after:  skew awar automat databa partit share noth parallel oltp system
before: Design & analysis of fault tolerant digital systems
after:  design analysi fault toler digit system
before: An evaluation of redundant arrays of disks using an Amdahl 5890
after:  evalu redund array disk use amdahl five thousand eight hu

after:  univ oper unari versu binari
before: The Cedar file system
after:  cedar file system
before: Distributed RAID - A New Multiple Copy Algorithm
after:  distribut raid new multipl copi algorithm
before: Correct memory operation of cache-based multiprocessors
after:  correct memori oper cach base multiprocessor
before: Reducing multidatabase query response time by tree balancing
after:  reduc multidataba queri respon time tree balanc
before: A Study of Index Structures for Main Memory Database Management Systems
after:  studi index structur main memori databa manag system
before: Real-time computing with lock-free shared objects
after:  real time comput lock free share object
before: Replication Techniques in Distributed Systems
after:  replic techniqu distribut system
before: Client-Server Paradise
after:  client server paradi
before: Holding intruders accountable on the Internet
after:  hold intrud account internet
before: Operating system protection for fine-grained programs
aft

after:  exploit punctuat semant continu data stream
before: IRM Enforcement of Java Stack Inspection
after:  irm enforc java stack inspect
before: A universal access smart-card-based secure file system
after:  univ access smart card base secur file system
before: Design and Implementation of a Distributed X-Multiplexor
after:  design implement distribut multiplexor
before: Main memory database recovery
after:  main memori databa recoveri
before: Coherent functions and program checkers
after:  coher function program checker
before: The Smalltalk-76 programming system design and implementation
after:  smalltalk seventi six program system design implement
before: A scalable content-addressable network
after:  scalabl content address network
before: An Improvement of the Fiat-Shamir Identification and Signature Scheme
after:  improv fiat shamir identif signatur scheme
before: Asynchronous fault-tolerant total ordering algorithms
after:  asynchron fault toler total order algorithm
before: A

after:  framework interprocedur optim presenc dynam class load
before: Query processing in main memory database management systems
after:  queri process main memori databa manag system
before: Economical solutions for the critical section problem in a distributed system (Extended Abstract)
after:  econom solut critic section problem distribut system extend abstract
before: Design and evaluation of a continuous consistency model for replicated services
after:  design evalu continu consist model replic servic
before: Combinatorics: set systems, hypergraphs, families of vectors, and combinatorial probability
after:  combin set system hypergraph famili vector combinatori probabl
before: An adaptive quality of service aware middleware for replicated services
after:  adapt qualiti servic awar middlewar replic servic
before: Concurrent Timestamping Made Simple
after:  concurr timestamp made simpl
before: Towards a theory of software protection and simulation by oblivious RAMs
after:  toward t

{'complex': 56,
 'perfect': 4,
 'zero': 21,
 'knowledg': 56,
 'lotteri': 2,
 'stride': 3,
 'schedul': 61,
 'flexibil': 1,
 'proport': 5,
 'share': 202,
 'resourc': 70,
 'manag': 200,
 'investig': 5,
 'document': 15,
 'world': 39,
 'wide': 62,
 'web': 80,
 'perform': 310,
 'multiv': 11,
 'distribut': 670,
 'two': 43,
 'phase': 13,
 'lock': 50,
 'concurr': 182,
 'control': 189,
 'mechan': 67,
 'databa': 362,
 'benchmark': 37,
 'anomali': 10,
 'base': 259,
 'detect': 76,
 'system': 1156,
 'causal': 22,
 'relationship': 7,
 'comput': 344,
 'search': 42,
 'holi': 1,
 'grail': 1,
 'research': 15,
 'problem': 97,
 'data': 327,
 'wareh': 8,
 'main': 14,
 'memori': 174,
 'overview': 30,
 'valuat': 1,
 'trust': 26,
 'open': 21,
 'network': 438,
 'reput': 9,
 'approach': 131,
 'choo': 4,
 'reliabl': 111,
 'peer': 59,
 'design': 172,
 'computer—th': 1,
 'six': 6,
 'thousand': 20,
 'hundr': 37,
 'fail': 14,
 'stop': 7,
 'faulti': 12,
 'program': 274,
 'empir': 15,
 'studi': 59,
 'competitv': 1,
 's

# We Form the TFIDF valued TD-matrix

In [145]:
from collections import Counter

doc = 0
tf_idf = {}  # Initializing the matrix
showed = {}
for d in range(N):  # For all documents
    tokens = processed_corpus[d]
    counter = Counter(tokens)  # counts unique tokens in the tokens array
    # and creates a dictionary of tokens counts
    words_count = len(tokens)
    for token in np.unique(tokens):  # sorted unique tokens
        tf = counter[token]  # /words_count
        logtf = 1 + np.log2(tf)  # log
        if token in DF:
            df = DF[token]
        else:
            df = 0
        idf = np.log2(1 + (N / df))  # log

        tf_idf[d, token] = (
            logtf * idf
        )  # tf_idf is implemented as tuple-keyed dictionary
doc += 1
# Printing an example from the Matrix: the TFIDF value of the word "Automated" in document 1
print(tf_idf)

{(0, 'complex'): 6.708591257051913, (0, 'knowledg'): 6.708591257051913, (0, 'perfect'): 10.50308035175293, (0, 'zero'): 8.114981701344005, (1, 'flexibil'): 12.502334580197997, (1, 'lotteri'): 11.502583213553354, (1, 'manag'): 4.9071310247467945, (1, 'proport'): 10.1814007617393, (1, 'resourc'): 6.3901075244029935, (1, 'schedul'): 6.586439915126655, (1, 'share'): 4.893256469875102, (1, 'stride'): 10.917869303345588, (2, 'document'): 8.598920958311076, (2, 'investig'): 10.1814007617393, (2, 'web'): 6.199917680274569, (2, 'wide'): 6.563227031009598, (2, 'world'): 7.2263504349051315, (3, 'concurr'): 5.038858707456259, (3, 'control'): 4.986097863455075, (3, 'databa'): 4.0895711886854045, (3, 'distribut'): 6.543511353072951, (3, 'lock'): 6.870611313044545, (3, 'mechan'): 6.452563965215617, (3, 'multiv'): 9.045387368995543, (3, 'perform'): 4.301068360412807, (3, 'phase'): 8.804875637995146, (3, 'two'): 7.086475708101401, (4, 'anomali'): 9.182642644437047, (4, 'base'): 4.5482937906737595, (4, 

# Simple matching of a query 
## add up term - TFIDF scores for each doc

In [146]:
from collections import deque

outtable_simple = []


def matching_score(query):
    # Process the query just like you processed the documents
    # remove punctuation
    processed_tokens = preprocess(query)

    tokens = processed_tokens
    print("Matching Score")
    print("\nQuery:", query)
    print("tokens:")
    print(tokens)

    query_weights = {}
    # Simply add up the tfidfs for the words in the documents they index.
    for key in tf_idf:
        # remember, key is composed of
        if key[1] in tokens:
            try:
                print("key[1]", key[1])
                if key[0] not in query_weights:
                    query_weights[key[0]]=0.0
                query_weights[key[0]] += tf_idf[key]  # Accummulate and add tfidf-values for the term in each document
            except:
                print("exception:", key, tf_idf[key])
                query_weights[key[0]] = tf_idf[key]  #
            # print("query_weights["+str(key[0])+"]="+str(query_weights[key[0]]))
    # Sort the resulting weights to give us a ranked list
    query_weights = sorted(query_weights.items(), key=lambda x: x[1], reverse=True)

    # print("query_weights", query_weights)

    l = []
    qw = []
    # List the first 10 matching documents
    for i in query_weights[:10]:
        l.append(i[0])
        qw.append(i[1])
    queue = deque(qw)
    outtable_simple.append(["Query: ", "'"+query+"'", ""])
    outtable_simple.append(["doc_nr", "doc", "score"])
    for d in l:
        score = queue.popleft()
        outtable_simple.append([d, corpus[d], score])
        # print(d,corpus[d], score)

In [147]:
#matching_score("Without the drive of Rebeccah's insistence, Kate lost her momentum. She stood next a slatted oak bench, canisters still clutched, surveying")
query="Operating system"
matching_score(query)

display(HTML(tabulate.tabulate(outtable_simple, tablefmt='html')))


before: Operating system
after:  oper system
Matching Score

Query: Operating system
tokens:
['oper', 'system']
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
ke

key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] system
key[1] system
key[1] system
key[1] oper
key[1] sys

0,1,2
Query:,'Operating system',
doc_nr,doc,score
4161,"('papers/1098638.txt', 'The Cambridge CAP computer and its operating system (Operating and programming systems series)')",15.256365260645481
1315,"('papers/1268686.txt', 'Extending the operating system at the user level: the Ufo global file system')",10.21750655318922
1330,"('papers/168629.txt', 'The impact of operating system structure on memory system performance')",10.21750655318922
1516,"('papers/121911.txt', 'LOCUS operating system, a transparent system')",10.21750655318922
4976,"('papers/325090.txt', 'Pros and cons of operating system transactions for data base systems')",10.21750655318922
72,"('papers/822430.txt', 'Operating System Directions for the Next Millennium')",7.6281826303227405
84,"('papers/562353.txt', 'Operating System Concepts, 4th Ed.')",7.6281826303227405
110,"('papers/808449.txt', 'On attaining reliable software for a secure operating system')",7.6281826303227405


In [148]:
# Using the numpy.linalg package to multiply the lengths of the vectors
def cosine_sim(a, b):
    cos_sim = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
    return cos_sim

# creating a TD numpy-matrix D, with tfidf-values
### For mathematical calculations, it is much better to use the numpy-package.<br> For this we need to reform the tf_idf matrix into a numpy matrix. We call it D

In [149]:
total_vocab = [x for x in DF]
total_vocab_size = len(DF)

print()
D = np.zeros((N, total_vocab_size))
for tpl in tf_idf:  # tpl is a tuple (tpl[0]: document number tpl[1] term)
    try:
        ind = total_vocab.index(tpl[1])
        D[tpl[0]][ind] = tf_idf[tpl]
    except:
        print("passed")
        pass
print(tpl)


(5800, 'process')


# generating a vector of tokens 
##  for example a query vector
### This vector can be "cosined" with all the document vectors,<br> to get the similarities, and rank by them.

In [150]:
import math


def gen_vector(tokens):
    # We generate a vector of tfidf values the vocabulary from the keys of the DF dictionary
    total_vocab = [x for x in DF]
    print(total_vocab)
    Q = np.zeros((len(total_vocab)))

    counter = Counter(tokens)
    words_count = len(tokens)

    query_weights = {}

    for token in np.unique(tokens):
        """
        tf = (
            counter[token] / words_count
        )  # The local weight: frequency of the token in the vector

        # not all query vectors are represented in the vocabulary
        if token in DF:
            df = DF[token]  # DF is the global weight of the term
        else:
            df = 0
        idf = math.log2((N + 1) / (df + 1))  # log
        """
        tf = counter[token]  # /words_count
        logtf = 1 + np.log2(tf)  # log
        if token in DF:
            df = DF[token]
        else:
            df = 0
        idf = np.log2(1 + (N / df))  # log

        try:
            ind = total_vocab.index(token)
            Q[ind] = tf * idf
        except:
            pass
    return Q

In [154]:
outtable_cos = []


def cosine_similarity(query, D=D):
    # Create an array of cosine values
    print("Cosine Similarity")
    preprocessed_query = preprocess(query)
    # tokens = word_tokenize(str(preprocessed_query))
    tokens = preprocessed_query
    print("\nQuery:", query)
    print("")
    print(tokens)
    # print("D=", type(D))

    d_cosines = []

    query_vector = gen_vector(tokens)
    for q in query_vector:
        print(q)
    # We go through all vectors in the TD (tfidf) matrix D
    for d in D:
        cs = cosine_sim(query_vector, d)
        if np.isnan(cs):
            cs = np.float_(-10e3)
        d_cosines.append(cs)

    # argsort() returns the indexes that would sort the array.
    ## sorts by the cosines, but returns the indexes (document numbers, the first 10.)
  
    out = np.array(d_cosines).argsort()[-10:][::-1]
    outtable_cos.append(["Query: ", "'"+query+"'", ""])
    outtable_cos.append(["doc_nr", "doc", "score"])

    for d in out:
        outtable_cos.append([d, corpus[d], d_cosines[d]])

In [155]:
# cosine_similarity("Without the drive of Rebeccah's insistence, Kate lost her momentum. She stood next a slatted oak bench, canisters still clutched, surveying")
cosine_similarity(query)

Cosine Similarity
before: Operating system
after:  oper system

Query: Operating system

['oper', 'system']
['complex', 'perfect', 'zero', 'knowledg', 'lotteri', 'stride', 'schedul', 'flexibil', 'proport', 'share', 'resourc', 'manag', 'investig', 'document', 'world', 'wide', 'web', 'perform', 'multiv', 'distribut', 'two', 'phase', 'lock', 'concurr', 'control', 'mechan', 'databa', 'benchmark', 'anomali', 'base', 'detect', 'system', 'causal', 'relationship', 'comput', 'search', 'holi', 'grail', 'research', 'problem', 'data', 'wareh', 'main', 'memori', 'overview', 'valuat', 'trust', 'open', 'network', 'reput', 'approach', 'choo', 'reliabl', 'peer', 'design', 'computer—th', 'six', 'thousand', 'hundr', 'fail', 'stop', 'faulti', 'program', 'empir', 'studi', 'competitv', 'spin', 'multiprocessor', 'bug', 'deviant', 'behavior', 'gener', 'infer', 'error', 'code', 'structur', 'parallel', 'algorithm', 'step', 'toward', 'cach', 'resid', 'transact', 'process', 'risc', 'group', 'logic', 'vs', 'physic

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.09596087243655117
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [6.70859126 0.         0.         ... 0.         0.         0.        ]
cs 0.09122187017797162
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.17931171402894036
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
c

cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [6.70859126 0.         0.         ... 0.         0.         0.        ]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.

cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.06186985138574436
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.04890443828818333
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.059029755477478484
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.20536575387445885
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.04722298871014148
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.07952022151387007
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.11665662978140309
query_vector [0. 0. 0. ...

cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0722755181942937
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.11095699909094718
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ...

d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.05886889523161555
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.06559487930935787
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.08060843130707171
query_vec

cs 0.08665978864095795
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.3828221570680903
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0.        0.        8.1149817 ... 0.        0.        0.       ]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs nan
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.09279522837111893
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.055278590501210124
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0

  This is separate from the ipykernel package so we can avoid doing imports until



cs 0.09414471227257101
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.06952828138891362
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.09385485476380519
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.06024414359419056
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.13269996582967944
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
qu

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 

cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0525977162883074
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0770327697665162
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.45784657659172295
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]


query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.05719053042396762
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.05729923346456658
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0.

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 

d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0

d [6.70859126 0.         0.         ... 0.         0.         0.        ]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [6.70859126 0.         0.         ... 0.         0.         0.        ]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.09146515273937741
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.45261346456493784
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.05164340457396479
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.07428486826902342
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ...

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.10345586627726695
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
quer

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [6.70859126 0.         0.         ... 0.         0.         0.        ]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0.

d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.3914676407703433
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.07931390947351526
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.054422954555162224
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.35107048209973857
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]


query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.07901610267845191
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
quer

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.07018231294724432
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.080459

cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.07885977010561532
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.07360122658654412
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ..

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.08969897972593224
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.062116942436501385
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.05738754820308103
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.06051912248756048
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0.

cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.053131963662228186
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.06170655007271547
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.07900323124414019
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.04410458992475792
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.07662675666126112
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.08324136801036512
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.05980120965651426
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.10399334307457368
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vect

d [0. 0. 0. ... 0. 0. 0.]
cs 0.08267847120633642
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.07790816534839105
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ..

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 

d [0. 0. 0. ... 0. 0. 0.]
cs 0.05316875422482601
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.06392666558499568
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.062098467201689586
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_ve

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.32913461936671257
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.09507879286866525
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.07495667518989506
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0.

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.060573333052081006
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.53355

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 

d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.272349143141926
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.05604252193999883
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.05288055944292481
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.12599788633800194
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs

query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.37038004958195936
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.06305367875542216
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0. 0.]
cs 0.0
query_vector [0. 0. 0. ... 0. 0. 0.]
d [0. 0. 0. ... 0. 0.

In [156]:
from IPython.display import HTML, display
import tabulate


display(HTML(tabulate.tabulate(outtable_simple, tablefmt="html")))
display(HTML(tabulate.tabulate(outtable_cos, tablefmt="html")))

0,1,2
Query:,'Operating system',
doc_nr,doc,score
4161,"('papers/1098638.txt', 'The Cambridge CAP computer and its operating system (Operating and programming systems series)')",15.256365260645481
1315,"('papers/1268686.txt', 'Extending the operating system at the user level: the Ufo global file system')",10.21750655318922
1330,"('papers/168629.txt', 'The impact of operating system structure on memory system performance')",10.21750655318922
1516,"('papers/121911.txt', 'LOCUS operating system, a transparent system')",10.21750655318922
4976,"('papers/325090.txt', 'Pros and cons of operating system transactions for data base systems')",10.21750655318922
72,"('papers/822430.txt', 'Operating System Directions for the Next Millennium')",7.6281826303227405
84,"('papers/562353.txt', 'Operating System Concepts, 4th Ed.')",7.6281826303227405
110,"('papers/808449.txt', 'On attaining reliable software for a secure operating system')",7.6281826303227405


0,1,2
Query:,'Operating system',
doc_nr,doc,score
5740,"('papers/6074.txt', 'Distributed operating systems')",0.865963145630514
3252,"('papers/184674.txt', 'Distributed operating systems')",0.865963145630514
3989,"('papers/549055.txt', 'Distributed Operating Systems and Algorithms')",0.7159835959134231
2025,"('papers/504478.txt', 'Server operating systems')",0.6910173342475195
4050,"('papers/574894.txt', 'Operating Systems Theory')",0.6572814338161644
3685,"('papers/906270.txt', 'Correctness in operating systems')",0.6129987513476209
5734,"('papers/356680.txt', 'Fault Tolerant Operating Systems')",0.6108244460923834
5398,"('papers/21853.txt', 'Operating systems: design and implementation')",0.6098756273100501
